U
    d                     @   s   d dl Z d dlZd dlmZmZ e jje jjhZe jj	j
e jj	jhZeeZddddZddddZdd	 Zd
d ZdddZdddZdddZdddZd ddZd!ddZdS )"    N)DictAny)returnc                 C   s2   t | tkrtj|| _ntj|| d _d S Nr   )type_supported_typestorchnn	Parameterweight)moduler    r   C/tmp/pip-unpacked-wheel-ua33x9lu/torch/ao/quantization/_equalize.pyset_module_weight	   s    r   c                 C   s2   t | tkrtj|| _ntj|| d _d S r   )r   r   r   r	   r
   bias)r   r   r   r   r   set_module_bias   s    r   c                 C   s    t | tkr| jS | d jS d S r   )r   r   r   r   r   r   r   get_module_weight   s    r   c                 C   s    t | tkr| jS | d jS d S r   )r   r   r   r   r   r   r   get_module_bias   s    r   Fc                 C   s*   |j dd |D ]}| ||\} }q| S )z/ Applies 'torch.max' over the given axises
    Treverse)sortmaxinput	axis_listZkeepdimaxis_r   r   r   max_over_ndim!   s    r   c                 C   s*   |j dd |D ]}| ||\} }q| S )z/ Applies 'torch.min' over the given axises
    Tr   )r   minr   r   r   r   min_over_ndim)   s    r    c                 C   sT   | j }tt|}|| t| |}t| |}|d| |ksLtd|| S )zC finds the range of weights associated with a specific channel
    r   zKDimensions of resultant channel range does not match size of requested axis)ndimlistrangeremover    r   sizeAssertionError)r   r   Zsize_of_tensor_dimr   ZminsZmaxsr   r   r   channel_range1   s    


r'      c                 C   s  t | tkst |tkr.tdt | dt |t| }t|}||||krZtdt| }t||}t||}|d7 }t	|| }	t
|	}
||
 }dg|j }||||< dg|j }||||< t|	|}	t|
|}
||
 }||	 }t| | t| | t|| dS )z Given two adjacent tensors', the weights are scaled such that
    the ranges of the first tensors' output channel are equal to the
    ranges of the second tensors' input channel
    zmodule type not supported: z_Number of output channels of first arg do not match         number input channels of second argg&.>r(   N)r   _all_supported_types
ValueErrorr   r%   	TypeErrorr   r'   r   sqrtZ
reciprocalr!   Zreshaper   r   )Zmodule1Zmodule2Zoutput_axisZ
input_axisZweight1Zweight2r   Zweight1_rangeZweight2_rangeZscaling_factorsZinverse_scaling_factorsZsize1Zsize2r   r   r   cross_layer_equalization>   s0    




r.   -C6?Tc           
      C   s   |st | } i }i }dd |D }|  D ] \}}||kr,|||< d||< q,t|||s|D ]R}	t ||	d  ||	d < t ||	d  ||	d < t||	d  ||	d   q^qN| S )a)   Given a list of adjacent modules within a model, equalization will
    be applied between each pair, this will repeated until convergence is achieved

    Keeps a copy of the changing modules from the previous iteration, if the copies
    are not that different than the current modules (determined by converged_test),
    then the modules have converged enough that further equalizing is not necessary

    Implementation of this referced section 4.1 of this paper https://arxiv.org/pdf/1906.04721.pdf

    Args:
        model: a model (nn.module) that equalization is to be applied on
        paired_modules_list: a list of lists where each sublist is a pair of two
            submodules found in the model, for each pair the two submodules generally
            have to be adjacent in the model to get expected/reasonable results
        threshold: a number used by the converged function to determine what degree
            similarity between models is necessary for them to be called equivalent
        inplace: determines if function is inplace or not
    c                 S   s   h | ]}|D ]}|qqS r   r   ).0pairnamer   r   r   	<setcomp>   s       zequalize.<locals>.<setcomp>Nr   r(   )copydeepcopyZnamed_modules	convergedr.   )
modelZpaired_modules_list	thresholdZinplaceZname_to_moduleZprevious_name_to_moduleZname_setr2   r   r1   r   r   r   equalizej   s    

r9   c                 C   s|   |   |  krtdtd}d| kr2dS |   D ]4}t| | }t|| }||}|t|7 }q:t||k S )a`   Tests for the summed norm of the differences between each set of modules
    being less than the given threshold

    Takes two dictionaries mapping names to modules, the set of names for each dictionary
    should be the same, looping over the set of names, for each name take the differnce
    between the associated modules in each dictionary

    zIThe keys to the given mappings must have the same set of names of modulesg        NF)	keysr+   r   Ztensorvaluesr   subZnormbool)Zcurr_modulesZprev_modulesr8   Zsummed_normsr2   Zcurr_weightZprev_weight
differencer   r   r   r6      s    	

r6   )F)F)r   )r   r(   )r/   T)r/   )r   r4   typingr   r   r	   ZConv2dZLinearr   Z	intrinsicZ
ConvReLU2dZ
LinearReLUZ_supported_intrinsic_typesunionr*   r   r   r   r   r   r    r'   r.   r9   r6   r   r   r   r   <module>   s   




,
'