U
    d                     @   s   U d Z ddlmZ ddlmZmZmZmZmZ ddl	Z	ddl	m
Z
 ddlmZmZmZmZmZmZ g Zee ed< ee
 ZG d	d
 d
ZG dd de	jjZG dd de	jjZdS )zkAutograd functions for stream-aware CUDA copy. It is used to overlap copy
and computation on the same GPU.
    )deque)DequeListOptionalTupleSequenceN)Tensor   )AbstractStreamcurrent_stream
get_devicerecord_stream
use_streamwait_stream__all__c                   @   s   e Zd ZU eed< eed< dS )Contextprev_streamnext_streamN)__name__
__module____qualname__r
   __annotations__ r   r   H/tmp/pip-unpacked-wheel-ua33x9lu/torch/distributed/pipeline/sync/copy.pyr      s   
r   c                   @   sJ   e Zd ZdZeeeeedddZeee	e
ee	 df dddZd	S )
Copyz#Copies tensors on specific streams.ctxr   r   returnc              
   G   s   || _ || _g }tt|}t|j t|V |D ]J}t|rt|jt|dd}|| t	|| t	|| q4|| q4W 5 Q R X W 5 Q R X t
|S )NTZnon_blocking)r   r   r   r   r   torch	is_tensortoappendr   tuple)r   r   r   inputoutputZoutput_streamxyr   r   r   forward!   s    


 zCopy.forward.)r   grad_outputr   c           	   
   G   s   | j }| j}tt|d}tt|}t|X t|D t|D ]4}|jt|dd}|	| t
|| t
|| qBW 5 Q R X W 5 Q R X d}|t| S )N)maxlenTr   NN)r   r   r   lenr   r   r   reversedr!   
appendleftr   r#   )	r   r)   r   r   
grad_inputZinput_streamr&   r'   grad_streamsr   r   r   backward:   s    

 zCopy.backwardNr   r   r   __doc__staticmethodr   r
   Tensorsr(   r   r   r   r1   r   r   r   r   r      s
   r   c                   @   sJ   e Zd ZdZeeeeedddZeee	e
ee	 df dddZd	S )
WaitzSynchronizes a stream to another stream.

    Place it just before you want to start an operation on the next stream,
    provided that all operations on the previous stream are done.

    r   c                 G   s(   || _ || _t|| tdd |D S )Nc                 s   s$   | ]}t |r| n|V  qd S )N)r   r    detach).0r&   r   r   r   	<genexpr>a   s     zWait.forward.<locals>.<genexpr>)r   r   r   r#   )r   r   r   r$   r   r   r   r(   Y   s    
zWait.forward.)r   r/   r   c                 G   s"   | j }| j}t|| d}|| S )Nr+   )r   r   r   )r   r/   r   r   r0   r   r   r   r1   c   s
    
zWait.backwardNr2   r   r   r   r   r6   Q   s
   r6   )r3   collectionsr   typingr   r   r   r   r   r   r   streamr
   r   r   r   r   r   r   strr   r5   r   ZautogradZFunctionr   r6   r   r   r   r   <module>   s    3