U
    dNZ                  	   @   s^  d Z ddlZddlmZ ddlmZmZmZmZm	Z	m
Z
mZ ddlZddlmZmZ ddlmZmZ dedd	d
ZdedddZdedddZdedddZG dd deeZG dd deZG dd deZG dd deZejeddejejddZejeddej ej!ddZ"ejeddejdd Z#eje
d!Z$eje	d!Z%e$Z&e%Z'ejeddej ej(ddd"Z)ejeej*ejddddd#Z+ejeej*dej,dd$Z-ejeddejejddZ.ejeddejd%Z/ejeddej ej!d&Z0ejeddej ej(d&Z1ejed'dej ej!d(d)Z2ejed'dej ej(d(d)Z3d*d+ Z4d,d- Z5d.d/ Z6d0d1 Z7d2d3 Z8dS )4zX
This module implements modules which are used to perform fake quantization
during QAT.
    N)Module)MovingAverageMinMaxObserverHistogramObserver%MovingAveragePerChannelMinMaxObserverFixedQParamsObserver)default_fixed_qparams_range_0to1_observer,default_fixed_qparams_range_neg1to1_observer
_with_args)ABCabstractmethod)AnyTupleztorch.qscheme)qschemereturnc                 C   s   | t jt jt jfkS N)torchper_channel_symmetricZper_channel_affine per_channel_affine_float_qparamsr    r   G/tmp/pip-unpacked-wheel-ua33x9lu/torch/ao/quantization/fake_quantize.py_is_per_channel   s    r   c                 C   s   | t jt jfkS r   )r   per_tensor_symmetricper_tensor_affiner   r   r   r   _is_per_tensor   s    r   c                 C   s   | t jt jfkS r   )r   r   r   r   r   r   r   _is_symmetric_quant   s    r   c                 C   s   | t jfkS r   )r   r   r   r   r   r   _is_float_qparams   s    r   c                       s   e Zd ZU dZejed< ejed<  fddZedd Z	edd	 Z
ejjdeddddZejjdd ZejjdeddddZejjdd ZeeZ  ZS )FakeQuantizeBasea   Base fake quantize module
    Any fake quantize implementation should derive from this class.

    Concrete fake quantize module should follow the same API. In forward, they will update
    the statistics of the observed Tensor and fake quantize the input. They should also provide a
    `calculate_qparams` function that computes the quantization parameters given
    the collected statistics.

    fake_quant_enabledobserver_enabledc                    sB   t    | dtjdgtjd | dtjdgtjd d S )Nr      dtyper   )super__init__register_bufferr   tensorZuint8self	__class__r   r   r$   /   s    
zFakeQuantizeBase.__init__c                 C   s   d S r   r   )r(   xr   r   r   forward7   s    zFakeQuantizeBase.forwardc                 K   s   d S r   r   )r(   kwargsr   r   r   calculate_qparams;   s    z"FakeQuantizeBase.calculate_qparamsTN)enabledr   c                 C   s   |rdnd| j d< d S Nr    r   )r   r(   r/   r   r   r   enable_fake_quant?   s    z"FakeQuantizeBase.enable_fake_quantc                 C   s   |  d d S NF)r2   r'   r   r   r   disable_fake_quantC   s    z#FakeQuantizeBase.disable_fake_quantc                 C   s   |rdnd| j d< d S r0   )r   r1   r   r   r   enable_observerG   s    z FakeQuantizeBase.enable_observerc                 C   s   |  d d S r3   )r5   r'   r   r   r   disable_observerK   s    z!FakeQuantizeBase.disable_observer)T)T)__name__
__module____qualname____doc__r   Tensor__annotations__r$   r   r,   r.   jitexportboolr2   r4   r5   r6   classmethodr	   	with_args__classcell__r   r   r)   r   r   !   s"   







r   c                       s~   e Zd ZU dZejed< ejed< eddf fdd	Zej	j
dd Zd	d
 Zej	j
dd Z fddZ fddZ  ZS )FakeQuantizea   Simulate the quantize and dequantize operations in training time.
    The output of this module is given by::

        x_out = (
          clamp(round(x/scale + zero_point), quant_min, quant_max) - zero_point
        ) * scale

    * :attr:`scale` defines the scale factor used for quantization.

    * :attr:`zero_point` specifies the quantized value to which 0 in floating point maps to

    * :attr:`fake_quant_enabled` controls the application of fake quantization on tensors, note that
      statistics can still be updated.

    * :attr:`observer_enabled` controls statistics collection on tensors

    * :attr:`dtype` specifies the quantized dtype that is being emulated with fake-quantization,
        allowable values are torch.qint8 and torch.quint8.

    Args:

        observer (module): Module for observing statistics on input tensors and calculating scale
          and zero-point.
        observer_kwargs (optional): Arguments for the observer module

    Attributes:

        activation_post_process (Module): User provided module that collects statistics on the input tensor and
          provides a method to calculate scale and zero-point.

    scale
zero_pointNc                    st  t    |d k	r|d k	r||ks*td|dtj}t|dr^tt|di di d|}t|j	|ksvtd|t|j
kstd|||d |f || _| jj| _| jj| _t| jjrtj}ntj}| dtjd	gtjd
 | dtjdg|d
 | jj| _| jj| _t| jdr4| jjnd| _t| jsdt| jsdtdt| j t| j| _d S )Nz1quant_min must be less than or equal to quant_maxr"   pkeywordszquant_min out of boundzquant_max out of bound)	quant_min	quant_maxrD   g      ?r!   rE   r   ch_axiszYOnly per channel and per tensor quantization are supported in fake quantize got qscheme: )r#   r$   AssertionErrorgetr   quint8hasattrgetattrZiinfominmaxupdateactivation_post_processrH   rI   r   r   floatintr%   r&   r"   rJ   r   r   stris_per_channel)r(   observerrH   rI   observer_kwargsr"   Zzero_point_dtyper)   r   r   r$   u   sH    


 




zFakeQuantize.__init__c                 C   s
   | j  S r   rT   r.   r'   r   r   r   r.      s    zFakeQuantize.calculate_qparamsc                 C   s   | j d dkr| |  |  \}}|| jj|| jj }}| jj|jkrp| j	|j | j	|j | j
| | j
| | jd dkr| jrt|| j| j| j| jj| jj}nt|| j| j| jj| jj}|S )Nr   r    )r   rT   detachr.   torD   ZdevicerE   shaperesize_copy_r   rX   r   Z fake_quantize_per_channel_affinerJ   rH   rI   Zfake_quantize_per_tensor_affine)r(   XZ_scaleZ_zero_pointr   r   r   r,      s6           zFakeQuantize.forwardc                 C   s0   d | j| j| jj| jj| j| j| j| j	| j
	S )Nzfake_quant_enabled={}, observer_enabled={}, quant_min={}, quant_max={}, dtype={}, qscheme={}, ch_axis={}, scale={}, zero_point={})formatr   r   rT   rH   rI   r"   r   rJ   rD   rE   r'   r   r   r   
extra_repr   s          zFakeQuantize.extra_reprc                    s4   t t| ||| | j||d < | j||d < d S NrD   rE   )r#   rC   _save_to_state_dictrD   rE   )r(   ZdestinationprefixZ	keep_varsr)   r   r   re      s    z FakeQuantize._save_to_state_dictc              	      s   ddg}|D ]}	||	 }
|
|kr||
 }|	dkr@| j |j n|	dksLt| j|j tj r|	dkrz| j | q|	dkst| j| q|r|	|
 qt
t| ||||||| d S rd   )rD   r_   r^   rL   rE   r   r=   Zis_scriptingr`   appendr#   rC   _load_from_state_dict)r(   Z
state_dictrf   Zlocal_metadatastrictZmissing_keysZunexpected_keysZ
error_msgsZlocal_statenamekeyvalr)   r   r   rh      s*    
  z"FakeQuantize._load_from_state_dict)r7   r8   r9   r:   r   r;   r<   r   r$   r=   r>   r.   r,   rc   re   rh   rB   r   r   r)   r   rC   Q   s   
 

%

rC   c                       s@   e Zd ZdZ fddZejjdd Zejjdd Z	  Z
S )FixedQParamsFakeQuantizez Simulate quantize and dequantize with fixed quantization
    parameters in training time. Only per tensor quantization
    is supported.
    c                    sl   t  j|d t| jtks2td| jjtjf || _| jj	| _	| jj
| _
t| jshtdt| j d S )NrY   z%s's observer must be a %szWOnly per tensor quantization is supported FixedQParamsFakeQuantize module, got qscheme:)r#   r$   typerT   r   rL   r*   r7   Z_observer_ctrrD   rE   r   r   rW   )r(   rY   r)   r   r   r$      s    

z!FixedQParamsFakeQuantize.__init__c                 C   s   | j | jfS r   )rD   rE   r'   r   r   r   r.      s    z*FixedQParamsFakeQuantize.calculate_qparamsc              
   C   s,   d | j| j| j| j| j| jj| jj| j	S )Nzufake_quant_enabled={}, observer_enabled={}, scale={}, zero_point={}, dtype={}, quant_min={}, quant_max={}, qscheme={})
rb   r   r   rD   rE   r"   rT   rH   rI   r   r'   r   r   r   rc      s         z#FixedQParamsFakeQuantize.extra_repr)r7   r8   r9   r:   r$   r   r=   r>   r.   rc   rB   r   r   r)   r   rm      s   

rm   c                       s   e Zd ZdZeddfeeeedd fddZej	j
eejejf dd	d
Zej	j
edddZejejdddZ  ZS )FusedMovingAvgObsFakeQuantizea  Fused module that is used to observe the input tensor (compute min/max), compute
    scale/zero_point and fake_quantize the tensor.
    This module uses calculation similar MovingAverageMinMaxObserver for the inputs,
    to compute the min/max values in order to compute the scale/zero_point.
    The qscheme input in the observer is used to differentiate between symmetric/affine
    quantization scheme.

    The output of this module is given by
    x_out = (clamp(round(x/scale + zero_point), quant_min, quant_max)-zero_point)*scale

    Similar to :class:`~torch.ao.quantization.FakeQuantize`, and accepts the same attributes as the
    base class.

    r      N)rY   rH   rI   rZ   r   c                    sr   t  j|||f| t| jttfs,td| dtj	dgtj
d | dtj	dgtj
d t| jj| _d S )NzLFused observer+fake_quant module only works with MovingAverageMinMaxObserverr   r    r!   r   )r#   r$   
isinstancerT   r   r   rL   r%   r   r&   longr   r   is_symmetric_quant)r(   rY   rH   rI   rZ   r)   r   r   r$     s    z&FusedMovingAvgObsFakeQuantize.__init__)r   c                 C   s
   | j  S r   r[   r'   r   r   r   r.     s    z/FusedMovingAvgObsFakeQuantize.calculate_qparamsc                 C   s2   d | j| j| j| j| j| jj| jj| j	| jj
	S )Nzfake_quant_enabled={}, observer_enabled={}, scale={}, zero_point={}, dtype={}, quant_min={}, quant_max={}, qscheme={}, reduce_range={})rb   r   r   rD   rE   r"   rT   rH   rI   r   reduce_ranger'   r   r   r   rc   #  s    z(FusedMovingAvgObsFakeQuantize.extra_repr)ra   r   c                 C   sD   t || j| j| jj| jj| j| j| jj	| jj
| jj| j| j| jS r   )r   Zfused_moving_avg_obs_fake_quantr   r   rT   Zmin_valZmax_valrD   rE   averaging_constantrH   rI   rJ   rX   rt   )r(   ra   r   r   r   r,   4  s    z%FusedMovingAvgObsFakeQuantize.forward)r7   r8   r9   r:   r   r   rV   r$   r   r=   r>   r   r;   r.   rW   rc   r,   rB   r   r   r)   r   rp     s    rp   rq   T)rY   rH   rI   r"   r   ru   i   Fr    )rY   rH   rI   r"   rv   rn   )rY   rH   rI   r"   r   ru   rJ   )rY   r   r"   rH   rI   rJ   rv   )rY   r   rJ   r"   rv   )rY   rH   rI   r"   )rY   rH   rI   r"   r   ig      0?)rY   rH   rI   r"   r   Zepsc                 C   sD   t | tjjr@| jjddd }tdd|}|dkp>|dkS dS )zM Returns true if given mod is an instance of FakeQuantize script module.
    .r    z\.___torch_mangle_\d+ z0torch.ao.quantization.fake_quantize.FakeQuantizezAtorch.ao.quantization.fake_quantize.FusedMovingAvgObsFakeQuantizeF)	rr   r   r=   ZRecursiveScriptModuleZ_cZqualified_namesplitresub)modsuffixrj   r   r   r   _is_fake_quant_script_module  s    r   c                 C   s   t | tst| r|   dS )z
    Disable fake quantization for this module, if applicable. Example usage::

      # model is any PyTorch model
      model.apply(torch.ao.quantization.disable_fake_quant)

    N)rr   r   r   r4   r}   r   r   r   r4     s    r4   c                 C   s   t | tst| r|   dS )z
    Enable fake quantization for this module, if applicable. Example usage::

      # model is any PyTorch model
      model.apply(torch.ao.quantization.enable_fake_quant)

    N)rr   r   r   r2   r   r   r   r   r2     s    r2   c                 C   s   t | tst| r|   dS )z
    Disable observation for this module, if applicable. Example usage::

      # model is any PyTorch model
      model.apply(torch.ao.quantization.disable_observer)

    N)rr   r   r   r6   r   r   r   r   r6     s    r6   c                 C   s   t | tst| r|   dS )z
    Enable observation for this module, if applicable. Example usage::

      # model is any PyTorch model
      model.apply(torch.ao.quantization.enable_observer)

    N)rr   r   r   r5   r   r   r   r   r5     s    r5   )9r:   r   Ztorch.nnr   Ztorch.ao.quantization.observerr   r   r   r   r   r   r	   r{   abcr
   r   typingr   r   r?   r   r   r   r   r   rC   rm   rp   rA   rN   r   Zdefault_fake_quantZqint8r   Zdefault_weight_fake_quantZdefault_dynamic_fake_quantZ.default_fixed_qparams_range_neg1to1_fake_quantZ+default_fixed_qparams_range_0to1_fake_quantZ*default_symmetric_fixed_qparams_fake_quantZ'default_affine_fixed_qparams_fake_quantr   Z%default_per_channel_weight_fake_quantr   Zdefault_embedding_fake_quantZquint4x2Z!default_embedding_fake_quant_4bitZdefault_histogram_fake_quantZdefault_fused_act_fake_quantZdefault_fused_wt_fake_quantZ'default_fused_per_channel_wt_fake_quantZ(fused_wt_fake_quant_range_neg_127_to_127Z4fused_per_channel_wt_fake_quant_range_neg_127_to_127r   r4   r2   r6   r5   r   r   r   r   <module>   s   $	0 D
  
  
 

			
