U
    d+                     @   sX   d dl Z d dlmZ d dlmZ G dd deZG dd deZG dd de jjZ	dS )	    N)Functionc                   @   s$   e Zd Zedd Zedd ZdS )SyncBatchNormc
                    s0  |j tjds| }|d k	r&| }t| |d }
|
dkrZ|	dk rZtd|
|j	d }| dkrt
||\}}tjd| |d |j|jd}tj|||gdd ntjd| d |j|jd | d	kr@  }tjd||	  j jd}tj| |d
d t||	|f tj |dd\}}}nL fddt|	D }tj| |d
d tj|dd tj |dd\}}}tj s|ddk}|| }|| }|| }t||||||||d\}}| |||||tj  || _!| dkr"t"||||||S t#|S d S )NZmemory_format      zGExpected more than 1 value per channel when training, got input size {}r   )r   dtypedevicedimZncclFZasync_opc                    s   g | ]}t  qS  )torch
empty_like).0_combinedr   ?/tmp/pip-unpacked-wheel-ua33x9lu/torch/nn/modules/_functions.py
<listcomp>>   s    z)SyncBatchNorm.forward.<locals>.<listcomp>)$is_contiguousr   channels_last
contiguousintnumelsize
ValueErrorformatshapeZbatch_norm_statsfullr   r	   catzerosZ_get_backend_nameemptydistZ_all_gather_baseZreshapesplitrangeZ
all_gatherstackZcudaZis_current_stream_capturingZsqueezeZ#batch_norm_gather_stats_with_countsviewsave_for_backwardtoZint32process_groupZbatch_norm_elemtr   )selfinputweightZbiasZrunning_meanZrunning_varZepsZmomentumr+   Z
world_sizer   num_channelsmeaninvstdcountZcombined_sizeZcombined_flatZmean_allZ
invstd_allZ	count_allZcombined_listmaskr   r   r   forward   st    


	zSyncBatchNorm.forwardc                 C   sp  |j tjds| }| j\}}}}}d  } }}	| j}
| dkrt|||||| jd | jd | jd \}}}}	| jd r|j	d }tj
||gdd}tjj|tjjj|
dd t||\}}t||||||||}|d ks| jd sd }|d ks| jd sZd }	nH|j	d }| jd rZtjd| |j|jd}tjj|tjjj|
dd |||	d d d d d d f	S )	Nr   r   r   r   r
   Fr   r   )r   r   r   r   saved_tensorsr+   r   Zbatch_norm_backward_reduceZneeds_input_gradr   r!   distributedZ
all_reduceZReduceOpZSUMr%   Zbatch_norm_backward_elemtr"   r   r	   )r,   grad_outputZsaved_inputr.   r0   r1   Zcount_tensor
grad_inputZgrad_weightZ	grad_biasr+   Zsum_dyZ
sum_dy_xmur/   r   r   r   r   backwardi   sl    

   
   zSyncBatchNorm.backwardN__name__
__module____qualname__staticmethodr4   r9   r   r   r   r   r      s   
`r   c                   @   s&   e Zd Zed	ddZedd ZdS )
CrossMapLRN2d-C6?      ?r   c                 C   s  || _ || _|| _|| _d | _| dks.t| jp:| | _| }| d}| d}| d}	| d}
|| | j| |}t	j
|d|d t| j d d d }||kr|n|}| jdd}|  t|D ]}||d| qtd|D ]}| jd|d }| jd|}|| ||| d k r`|d|| d }|j|dd ||kr|d|| }|j|dd q| j| j| j  | j t	j
| j| j |d || | || |S )	N   r   r   r      outalphar   )r   rG   betakscaler   AssertionErrornew
resize_as_r   powr   selectzero_r&   add_Zcopy_mul_r)   )ctxr-   r   rG   rH   rI   output
batch_sizechannelsinput_heightinput_widthZinput_squareZpre_padZpre_pad_cropZscale_firstcZscale_previousZscale_currentZsquare_nextZsquare_previousr   r   r   r4      sJ    






zCrossMapLRN2d.forwardc                 C   s  | j \}}| }|d}|d}|d}|d}||| j d ||}	|||}
d| j | j | j }t| j| jd d  }|| tj| j	| j |d
| |	  |	d||}t|D ]}tj|| || |d || j	|  tj|	dd| jd dd|
d t|D ]R}|
|	|| j d   || | j|| | |
| d |
j|	| d	d
 q&q|d d d d fS )Nr   r   r   rC   rD   F)ZkeepdimrE   )valuer   rF   )r5   rL   r   rG   rH   r   rM   r   rN   rJ   rR   rP   Znarrowr&   mulZdiv_sumrQ   Zaddcmul_)rS   r7   r-   rT   r8   rU   rV   rW   rX   Zpaddded_ratioZaccum_ratioZcache_ratio_valueZinversePrePadZpadded_ratio_centernrY   r   r   r   r9      s@    





   "zCrossMapLRN2d.backwardN)r@   rA   r   r:   r   r   r   r   r?      s   7r?   c                   @   s$   e Zd Zedd Zedd ZdS )BackwardHookFunctionc                 G   s   | j dd |D   |S )Nc                 S   s   g | ]}|j s|qS r   )Zrequires_grad)r   argr   r   r   r     s      z0BackwardHookFunction.forward.<locals>.<listcomp>)Zmark_non_differentiablerS   argsr   r   r   r4     s    zBackwardHookFunction.forwardc                 G   s   |S )Nr   r`   r   r   r   r9     s    zBackwardHookFunction.backwardNr:   r   r   r   r   r^     s   
r^   )
r   Ztorch.distributedr6   r$   Ztorch.autograd.functionr   r   r?   Zautogradr^   r   r   r   r   <module>   s    )]