U
    d                     @   sJ   d dl Z d dlZd dlmZ d dlmZ d dlmZ dd Zd	ddZ	dS )
    N_get_device_index)autocast)ExceptionWrapperc                 C   s~   t | tjr| S t | ts$t | trHtt| D ]}t |tjr.|  S q.t | trztt|  D ]}t |tjr`|  S q`d S )N)	
isinstancetorchZTensorlisttuplemap	get_a_vardictitems)objresult r   D/tmp/pip-unpacked-wheel-ua33x9lu/torch/nn/parallel/parallel_apply.pyr      s    


r   c           	         sb  t | t |kst|dk	r2t | t |ks@tni ft |  }|dk	r^t | t |ksltndgt |  }dd |D }t i t t  d	fdd	 t | dkr  fddtt| |||D }|D ]}|	  q|D ]}|
  qn  d| d |d |d |d  g }tt |D ],}| }t|trP|  || q0|S )
a~  Applies each `module` in :attr:`modules` in parallel on arguments
    contained in :attr:`inputs` (positional) and :attr:`kwargs_tup` (keyword)
    on each of :attr:`devices`.

    Args:
        modules (Module): modules to be parallelized
        inputs (tensor): inputs to the modules
        devices (list of int or torch.device): CUDA devices

    :attr:`modules`, :attr:`inputs`, :attr:`kwargs_tup` (if given), and
    :attr:`devices` (if given) should all have same length. Moreover, each
    element of :attr:`inputs` can either be a single object as the only argument
    to a module, or a collection of positional arguments.
    Nc                 S   s   g | ]}t |d qS )Tr   ).0xr   r   r   
<listcomp>/   s     z"parallel_apply.<locals>.<listcomp>c                    s   t  |d krt| }zht j|: t d$ t|tt	fsN|f}|||}W 5 Q R X W 5 Q R X  || < W 5 Q R X W n: t
k
r    td| |d| < W 5 Q R X Y nX d S )N)Zenabledzin replica {} on device {})where)r   Zset_grad_enabledr   Z
get_deviceZcudadevicer   r   r   r	   	Exceptionr   format)imoduleinputkwargsr   output)autocast_enabledgrad_enabledlockresultsr   r   _worker4   s    

zparallel_apply.<locals>._worker   c              	      s0   g | ](\}\}}}}t j |||||fd qS ))targetargs)	threadingThread)r   r   r   r   r   r   )r"   r   r   r   F   s   r   )N)lenAssertionErrorr&   Lockr   Zis_grad_enabledZis_autocast_enabled	enumeratezipstartjoinranger   r   reraiseappend)	modulesinputsZ
kwargs_tupZdevicesthreadsthreadoutputsr   r   r   )r"   r   r   r    r!   r   parallel_apply   s8    

 r7   )NN)
r&   r   Ztorch.cuda._utilsr   Ztorch.cuda.ampr   Ztorch._utilsr   r   r7   r   r   r   r   <module>   s   