U
    d 1                     @   s  d dl mZ d dlmZmZmZmZ d dlmZm	Z	 G dd dZ
G dd deZG dd	 d	eZG d
d deZG dd deZG dd deZeee edddZe
eeef edddZee
 eeef ee
ef dddZe
e
edddZee
 ee
ef edddZdS )     )Enum)
NamedTupleDictListSet)Nodemap_argc                   @   s@   e Zd ZdZeddddZdd Zdd	 Zd
d Zdd Z	dS )	PartitionzPartition class contains all the information about an individual partition.
    It also provides necessary methods for manipulation the partition.
    N)partition_idreturnc                 C   s4   t  | _|| _t  | _t  | _d| _d| _g | _d S )Nr   )setnodesr
   parentschildrenZ	bfs_levelused_mem_byteslogical_device_ids)selfr
    r   K/tmp/pip-unpacked-wheel-ua33x9lu/torch/fx/experimental/partitioner_utils.py__init__   s    zPartition.__init__c                 C   s
   t | jS N)strr
   r   r   r   r   __str__   s    zPartition.__str__c                 C   s,   d| _ | jD ]}|  j t|| j7  _ qd S )Nr   )r   r   get_extra_size_of)r   noder   r   r   recalculate_mem_size   s    
zPartition.recalculate_mem_sizec                    sd   i  t |j fdd t |j fdd  D ]}|jdkr0| j| q0| j| |   d S )Nc                    s
     | S r   
setdefaultninput_nodesr   r   <lambda>       z$Partition.add_node.<locals>.<lambda>c                    s
     | S r   r   r    r"   r   r   r$       r%   >   placeholderget_attr)r   argskwargsopr   addr   )r   r   r!   r   r"   r   add_node   s    
zPartition.add_nodec                    s   |j krj | i  t|j fdd t|j fdd  D ]2}tfdd|jD rF|jdkrFj | qF  d S )Nc                    s
     | S r   r   r    r"   r   r   r$   .   r%   z'Partition.remove_node.<locals>.<lambda>c                    s
     | S r   r   r    r"   r   r   r$   /   r%   c                    s   g | ]}| j kqS r   )r   .0r!   r   r   r   
<listcomp>5   s     z)Partition.remove_node.<locals>.<listcomp>>   r&   r'   )	r   remover   r(   r)   allusersr*   r   )r   r   Z
input_noder   )r#   r   r   remove_node(   s    
zPartition.remove_node)
__name__
__module____qualname____doc__intr   r   r   r,   r3   r   r   r   r   r	      s   	r	   c                   @   s&   e Zd ZU eed< eed< eed< dS )DevicenameZavailable_mem_bytesZ
logical_idN)r4   r5   r6   r   __annotations__r8   r   r   r   r   r9   ;   s   
r9   c                   @   s   e Zd ZU eed< eed< dS )NodeLatencymem_latency_seccomputer_latency_secNr4   r5   r6   floatr;   r   r   r   r   r<   A   s   
r<   c                   @   s&   e Zd ZU eed< eed< eed< dS )PartitionLatencyr=   r>   overall_latency_secNr?   r   r   r   r   rA   H   s   
rA   c                   @   s    e Zd ZdZdZdZdZdZdS )PartitionModer               N)r4   r5   r6   
size_basedZ	sparse_nnZ
cost_awareZkl_basedZ	aot_basedr   r   r   r   rC   Q   s
   rC   c                   @   s   e Zd ZU ee ed< ejZeed< dZ	e
ed< i Zeeef ed< i Zeeef ed< i Zeeee f ed< dZeed	< d
S )PartitionerConfigZdevicesmode        transfer_rate_bytes_per_secnode_to_latency_mappingnode_to_partition_mapping#partition_to_logical_device_mappingFsaturate_hostN)r4   r5   r6   r   r9   r;   rC   rH   rJ   rL   r@   rM   r   r   r<   rN   r8   rO   rP   boolr   r   r   r   rI   Y   s   
rI   )r   r   r   c                    s   i  t | j fdd t | j fdd d} D ]0}||kr4t|dd}|r\||j7 }q4tdq4t| dd}|r||j7 }ntd|S )zGiven a node and a set of nodes,
    this function return the extra size that needed
    if this node is included in this set.
    c                    s
     | S r   r   r    r"   r   r   r$   k   r%   z#get_extra_size_of.<locals>.<lambda>c                    s
     | S r   r   r    r"   r   r   r$   l   r%   r   
size_bytesNznode has no size_bytes attr)r   r(   r)   getattroutput_sizeRuntimeErrorZ
total_size)r   r   Ztotal_size_of_input_nodesr!   rR   r   r"   r   r   d   s    
r   )	partitionrM   r   c                    st   t tt ddd}ttd fdd |}tdddd}|D ](} |tdddd}|j|jkrF|}qF|S )	zUGiven a partiton and its nodes' latency, return a PartitionLatency for this partition)rV   r   c                    sl   g }j D ]\}|jdkrq
i  t|j fdd t|j fdd tfdd D s
|| q
|S )z>Given a partition, return a list of nodes on the top bfs level>   r&   r'   c                    s
     | S r   r   r    r"   r   r   r$      r%   zEget_latency_of_one_partition.<locals>.get_top_nodes.<locals>.<lambda>c                    s
     | S r   r   r    r"   r   r   r$      r%   c                    s    g | ]}| j ko|jd kqS )>   r&   r'   )r   r*   r-   )rV   r   r   r/      s   zGget_latency_of_one_partition.<locals>.get_top_nodes.<locals>.<listcomp>)r   r*   r   r(   r)   anyappend)rV   	top_nodesr   r   )r#   rV   r   get_top_nodes   s    


z3get_latency_of_one_partition.<locals>.get_top_nodes)r   r   c           
         s   |  }|j t|j|j }|j|j }|j|j }t| jj}|rtdddd}|D ]&} |t|||}	|	j |j kr\|	}q\|S t|||S )zyGiven a top node of a partition, this function returns
        the latency of the critical path in the partition
        rK   r=   r>   rB   )	rB   maxr>   r=   r   r2   intersectionr   rA   )
r   partition_latencyZnode_latencyrB   r=   r>   r2   Zmax_latencyr!   Znew_partition_latency
dfs_helperrM   rV   r   r   r`      sH     

      z0get_latency_of_one_partition.<locals>.dfs_helperrK   r[   )r	   r   r   rA   rB   )rV   rM   rZ   rY   Zcritical_path_latencyr   r^   r   r_   r   get_latency_of_one_partition   s.    ,    ra   )
partitionsrM   r   c                 C   s$   i }| D ]}t ||}|||< q|S )zGiven all the partitions and node_to_latency_mapping dictionary,
    return a mapping dictionary of each partition to its overall latency
    )ra   )rb   rM   partition_to_latency_mappingrV   r^   r   r   r    get_partition_to_latency_mapping   s     
rd   )parent_partitionchild_partitionrL   c                    s   | j g kr$|j g kr$| j |j kr$dS d}t }|jD ]t}i  t|j fdd t|j fdd  D ]>}|| jkrh||krht|dd}|dk	r||j7 }|| qhq4|| S )zfGiven two partitions (parent and child),
    calculate the communication latency between the two.
    rK   r   c                    s
     | S r   r   r    r"   r   r   r$     r%   z*get_comm_latency_between.<locals>.<lambda>c                    s
     | S r   r   r    r"   r   r   r$     r%   rR   N)	r   r   r   r   r(   r)   rS   rT   r+   )re   rf   rL   Z	comm_sizeZvisited_nodesr   r!   rR   r   r"   r   get_comm_latency_between   s(    



rg   )rb   rc   rL   c                    sb   t ttd fdd tt  tt  ddd}|| }d}|D ]} |d}||krB|}qB|S )zGiven all paritions in a graph, find the critical path among all partitions
    and return its latency as the latency of the whole graph
    )rV   latency_so_far_secr   c                    sX   ||  j 7 }| j}| jrTd}| jD ]*}t| |} ||| }||kr$|}q$|S |S )zJThis function helps to recursively get the latency of a path of partitionsrK   )rB   r   rg   )rV   rh   r   Zmax_latency_secchildZcomm_latency_secZnew_latency_secr`   rc   rL   r   r   r`     s(    
   z4get_latency_of_partitioned_graph.<locals>.dfs_helper)rb   r   c                 S   s*   g }| D ]}t |jdkr|| q|S )zvThis function is to return all the partitions without parents
        as the starting points of all the paths
        r   )lenr   rX   )rb   top_partitionsrV   r   r   r   get_top_partitions.  s
    z<get_latency_of_partitioned_graph.<locals>.get_top_partitionsrK   )r	   r@   r   )rb   rc   rL   rm   rl   Zcritical_path_latency_secrV   Zlatency_secr   rj   r    get_latency_of_partitioned_graph  s    	
rn   N)enumr   typingr   r   r   r   Ztorch.fx.noder   r   r	   r9   r<   rA   rC   rI   r8   r   ra   rd   r@   rg   rn   r   r   r   r   <module>   s2   4	 
\ 

%
