U
    d3                     @   s"  d dl mZmZmZmZmZmZmZ d dlZd dl	Z	d dl
Z
d dlmZ d dlmZmZmZ ddlmZmZmZmZmZ ddlmZmZ ddlmZmZ d	d
lmZmZm Z m!Z! d dl"m#Z#m$Z$m%Z%m&Z& d dl'm(Z( d	dl)m*Z*m+Z+m,Z, d	dl-m.Z.m/Z/ d	dlm0Z0m1Z1m2Z2m3Z3m4Z4m5Z5 d dl6m7Z7m8Z8 d	dl9m9Z9 ej:j;eee<ee<e=f f ee<ef ee< f dddZ>eee<ef e?dddZ@eddddZAe*e*dddZBe*e*dddZCe*e*ddd ZDe*e*dd!d"ZEeeed#d$d%ZFeee<ee<e=f f ee<ef d&d'd(ZGeed)d*d+ZHeee<ej:j;f eej:j; d,d-d.ZIeee<ej:j;f ejJje?eee<ef  d/d0d1ZKeee<ej:j;f ee< ee<ef ee<ef d2d3d4ZLeeee<ej:j;f eeef ee d5d6d7ZMd=ee?ee<ef e?e?ee<ef eee<ef  ej:j;d:d;d<ZNdS )>    )AnyDictListOptionalSetCallableTupleN)GraphModule)GraphNodeArgument   )"activation_is_statically_quantizedweight_is_quantizedget_qparam_dict_parent_nameget_swapped_custom_module_class)
QConfigAnyqconfig_equals)convert_dict_to_ordered_dictupdate_qconfig_for_qat   )generate_qconfig_map$compare_prepare_convert_qconfig_dictupdate_qconfig_for_fusion%is_qconfig_supported_by_dtype_configs)-get_root_module_to_quantized_reference_moduleget_pattern_to_dtype_configsget_fused_module_classesget_qat_module_classes)get_native_backend_config_dict)QuantizedGraphModuleis_observed_moduleis_observed_standalone_module)update_obs_for_equalizationconvert_eq_obs)get_custom_module_class_keysget_quantize_node_infocreate_getattr_from_valuecollect_producer_nodes graph_module_from_producer_nodesWEIGHT_INDEX_DICT)_remove_qconfigis_activation_post_process)lower_to_fbgemm)observedreturnc                 C   s,   t | std| j}| j}| j}|||fS )Nz-incoming model must be produced by prepare_fx)r"   AssertionErrorZ_prepare_custom_config_dictZ_node_name_to_scopeZ_observed_node_names)r/   prepare_custom_config_dictnode_name_to_scopeobserved_node_names r5   D/tmp/pip-unpacked-wheel-ua33x9lu/torch/ao/quantization/fx/convert.pyrestore_state>   s    
r7   )nodeqconfig_mapr0   c                 C   s"   t | to | j|ko || j dkS )z] Check if a node has a qconfig of None, i.e. user requested to not quantize
    the node
    N)
isinstancer   name)r8   r9   r5   r5   r6   has_none_qconfigK   s    r<   c                 C   sn   | j jD ]`}|jdks|jtkr"qt|jD ]:\}}|t|j krDq,t|}|dkrVq,t| |}|  q,qdS )z Extract the subgraph that produces the weight for dynamic quant
    or weight only quant node and run the subgraph to observe the weight.
    Note that the observers of dynamic quant or weight only quant ops are
    run during the convert step.
    call_functionN)	graphnodesoptargetr+   	enumerateargsr)   r*   )r/   r8   iZnode_argZweight_observer_nodesZweight_observer_moduler5   r5   r6   run_weight_observersQ   s     rE   )	quantizedr0   c                 C   s   | }| j jD ]}|jdkr|jtjkrt|j}t|dkr|D ]@}| j 	| | j 
dtj|j|j}W 5 Q R X ||| q@| j | qt|| j |j} | S )Nr=   r   )r>   r?   r@   rA   torchZquantize_per_tensor_dynamiclistusersleninserting_beforecreate_noderC   kwargsreplace_input_with
erase_noder!   preserved_attr_namesrF   quantized_rootr8   rI   userZnew_noder5   r5   r6   duplicate_quantize_dynamic_nodeh   s"    
rT   c                 C   s   | }| j jD ]}|jdkr$|jdks:|jdkr|jtjkrt|j}t|dkr|D ]<}| j 	| | j 
dd|ji }W 5 Q R X ||| qT| j | qt|| j |j} | S )z
    If a dequantize node has multiple uses, duplicate it and create one dequantize node for each use.
    This is to enable the pattern matching to map from individual quant - dequant - ref_module to
    final quantized module.
    call_method
dequantizer=   r   )r>   r?   r@   rA   rG   rV   rH   rI   rJ   rK   rL   rC   rN   rO   r!   rP   rQ   r5   r5   r6   duplicate_dequantize_node{   s     

rW   c              
   C   s   | }| j jD ]z}t|j}dd |jD }t|dkr| j | | j dd|d ji }W 5 Q R X |D ]}|| | j 	| qjqt
|| j |j} | S )z
    Removes duplicate dequant nodes in the graph, for an operator that has multiple dequant nodes as a user,
    replace them with a single dequant node that can be shared across all the uses.
    c                 S   s:   g | ]2}|j d kr|jdks2|j dkr|jtjkr|qS )rU   rV   r=   )r@   rA   rG   rV   ).0rS   r5   r5   r6   
<listcomp>   s     
 

 z+remove_extra_dequantize.<locals>.<listcomp>r   rU   rV   r   )r>   r?   rH   rI   rJ   inserting_afterrL   rC   replace_all_uses_withrO   r!   rP   )rF   rR   r8   rI   Zdequant_usersZ	unique_dqZdequantr5   r5   r6   remove_extra_dequantize   s    
"
r\   c                 C   s   | }| j jD ]}|jdkr|jtjtjfkrt|j}|rB|d nd }t	|dkr|jdkr|jdkr|
|jd  | j | t|j}| j | |D ],}t|trt	t|jdkr| j | qqt|| j |j} | S )Nr=   r   r   rU   rV   )r>   r?   r@   rA   rG   Zquantize_per_tensorZquantize_per_channelrH   rI   rJ   r[   rC   rO   r:   r   r!   rP   )rF   rR   r8   rI   rS   	orig_argsargr5   r5   r6   remove_quant_dequant_pairs   s    
 
r_   )r^   r8   r>   c                 C   s   t | tr6| jdkr6| jdkr6| jd }|| | n`t | ttfr\| D ]}t||| qHn:t | t	r| 
 D ]}t||| qnntdt|   dS )z If the arg is a dequantize Node, or a list/tuple/dict of dequantize Node,
    we'll recursively remove the dequantize Node
    rU   rV   r   z6Unsupported node type in recursive remove dequantize: N)r:   r   r@   rA   rC   rN   rH   tuple!maybe_recursive_remove_dequantizedictvalueswarningswarntype)r^   r8   r>   quantize_nodeZarg_elementr5   r5   r6   ra      s    


ra   )obs_noder3   r9   c                 C   s   | j d }t|ts"td| |j|kr:||j dknd}|rt| j}|rX|d nd}d}|D ]&}|jdkrd|jt	j
jjkrd|} qqd|r|}d}	n|}d}	|r|j|kr||j \}
}nd}
|
|	fS )aj   Given and observer node, get the `Scope` or the fully qualified name for
    the submodule containing the observed node, also return a prefix of "_input"
    when the observed node is an input of a F.linear op, and not the output of another
    quantized op.
    TODO: this logic is hacky, we should think about how to remove it or make it more
    general
    r   z.Expecting observed node to be a Node, but got Nr=   _input )rC   r:   r   r1   r;   rH   rI   r@   rA   rG   nnZ
functionalZlinear)rh   r3   r9   observed_nodeZis_input_observer_onlyrI   Zfirst_linear_use_or_first_useZlinear_nodenprefixmodule_path_r5   r5   r6   get_module_path_and_prefix   s,    

rq   r8   r>   c              	   C   sL   | | 8 |d| f}t| jD ]}||k	r$|| | q$W 5 Q R X dS )z3 Inserts dequantize node for `node` in `graph`
    rV   N)rZ   rU   rb   rI   rN   )r8   r>   Zdequantize_nodeZ	user_noder5   r5   r6   insert_dequantize_node   s
    rs   )r8   modulesr0   c                 C   s@   | j  D ]0\}}|jdkr
|t|j }t|r
|  S q
dS )z\
    If the node is observed, return the observer
    instance. Otherwise, return None.
    call_moduleN)rI   itemsr@   strrA   r-   )r8   rt   Zmaybe_obs_noderp   	maybe_obsr5   r5   r6   maybe_get_observer_for_node  s    

ry   )r8   rt   modelis_referencebackend_config_dictc                 C   s  t jjjj}|t| j }|j }t	| j
}tt|D ]X}	|	|kr:||	 }
|
jdkr:|
jdkr:|
j
d }| |
| t|
jdkr:|j|
 q:|j }t|dkr|d dkstdt| |j ||||d}t| j\}}t|| || ||t| j< dS )a   Converts a observed standalone module to a quantized standalone module by calling
    the fx convert api, currently using the same `is_reference` flag as parent, but we may
    changing this behavior in the future (e.g. separating quantization and lowering for
    standalone module as well)

    Args:
      - node: The call_module node of the observed standalone module
      - modules: named_module of original model
      - model: original model
      - is_reference: a flag from parent provided by user to decide if we want to
        produce a reference model or a fbgemm/qnnpack model
      - backend_config_dict: backend configuration of the target backend of quantization
    rU   rV   r   zCurrently only quantized)r{   r|   N)rG   ZaoZquantizationZquantize_fxZ
convert_fxrw   rA   Z'_standalone_module_input_quantized_idxstolistrH   rC   rangerJ   r@   rN   rI   r>   rO   Z(_standalone_module_output_quantized_idxsr1   rs   r   setattr)r8   rt   rz   r{   r|   convertZobserved_standalone_moduleZsm_input_quantized_idxsrC   idxr^   rg   Zsm_output_quantized_idxsZquantized_standalone_moduleparent_namer;   r5   r5   r6   convert_standalone_module  s4    

r   )r8   rt   r4   r9   r|   c                 C   s.  |t | j }|j}d}t|}t||rV|j}| }t| j\}	}
t||	 |
| | j	|k}|dksvt
| |sv|szdS t|}|t|g }t||sdS t|}|sdS d}|}t|tjjjr|}|d }i }t|tjjr(| }| }||j ||j t|}t|}||d}nt|tjjr|jD ]R}t||r>|dr>t||}| }|jtjkr|| t|||< q>n$|dkr| }||j t|}t |}|t|d}|dk	st!dt| |"||}|dk	r||d< nt| j\}	}
t||	 |
| dS )a   Convert a weighted module to reference quantized module in the model
    If the QConfig of a QAT module is not set, the module will still be converted to
    a float module.

    Args:
      - node: The call_module node of the observed standalone module
      - modules: named_module of original model
      - observed_node_names: names for the set of observed fx node, we can skip
        this conversion if the node is not observed
    Nr   )	weight_ih	weight_hhweightz3No reference quantized module class configured for )#rw   rA   qconfigr   r:   Zweight_fake_quantZto_floatr   r   r;   r<   r   getrf   r   r   rG   rk   Z	intrinsicZ_FusedModuleZRNNCellBaser   r   r   r   ZLSTMZ_flat_weights_nameshasattr
startswithgetattrZdtypeZqint8r   r1   
from_float)r8   rt   r4   r9   r|   Zoriginal_moduler   Zweight_post_processqat_module_classesr   r;   Zis_observedZpattern_to_dtype_configsZdtype_configsZis_weight_quantizedZfused_moduleZfloat_moduleZwq_or_wq_dictZweight_post_process_ihZweight_post_process_hhZweight_qparams_ihZweight_qparams_hhZwnr   )root_module_to_quantized_reference_moduleZref_qmodule_clsZref_qmoduler5   r5   r6   convert_weighted_moduleR  st    









r   )r8   r>   rt   custom_module_class_mapping(statically_quantized_custom_module_nodesc                 C   s   |t | j }t| |}|j}t|r||  | jd }t|tsRt	d| |j
dkr|jdkr| ||jd  t|jdkr|| t| |}	|	dk	st	|	|_t|||}
|
|}t| j\}}t|| || dS )a   Converts an observed custom module to a quantized custom module based on
    `custom_module_class_mapping`
    For static quantization, we'll also remove the previous `dequantize` node and
    attach the observer node for output to the module, the observer for the node
    will be converted to a dequantize node instead of quantize-dequantize pairs
    later in the graph. In the end we would have a quantized custom module that
    has the same interface as a default quantized module in nn.quantized namespace,
    i.e. quantized input and quantized output.

    Args:
      - node: The call_module node of the observed standalone module
      - graph: The graph containing the node
      - modules: named_module of original model
      - custom_module_class_mapping: mapping from observed custom module class to
        quantized custom module class, used to swap custom modules
      - statically_quantized_custom_module_nodes: we'll add the custom module node
        if we find it is statically quantized, this will be used later when converting
        observers to quant/dequant node pairs, if the observed node is a statically
        quantized custom module nodes, we'll convert the observer to a dequantize node,
        this is to keep the interface the same as the default quantized module.
        TODO: maybe we want to redesign this part to align with reference model design
        as well, but there has been some discussions around the interface, so we can do
        it later.
    r   zDExpecting the argument for custom module node to be a Node, but got rU   rV   N)rw   rA   ry   r   r   addrC   r:   r   r1   r@   rN   rJ   rI   rO   activation_post_processr   Zfrom_observedr   r   )r8   r>   rt   r   r   Zobserved_custom_modulerx   r   Z	prev_noder   Zquantized_custom_module_classZquantized_custom_moduler   r;   r5   r5   r6   convert_custom_module  s2    




  r   FT)rz   r{   convert_custom_config_dictis_standalone_module_remove_qconfig_flagconvert_qconfig_dictr|   r0   c           &   	   C   s`  |dkri }t | \}}}	| j}
|s,|   t| jdd}|r| j}t|}t| | j	rht
|i }t| |}t|| t| || j||}|
 D ]N\}}||kstd||| dk	rt||| std|||| q|}
t|d}|di }| jdk	r"t| |}t| || t|  g }| jjD ]}|jdkr6||j q6tjjt t!t"t#tjjf t"t#t$t#t%f f t"t#t&f ddd	d
}t!t ddd}d}|dg }|dg }|dkrt' }t(|}t)|* }t+|}t,|}t- }t.| jjD ]}|jdkrB|} |d7 }| |krt/|| j q
|jdkrt0|dkr`q
|}!|j1d }"t2|"t.t)fr|D ]}#t3|"|# |!| j qn>t2|"t!tfrd|krt3|"|!| j nt45dt%|"  q
|jdkr
t6||j7 r4|j1d }$|$|kr||| j n|| | j||||
 nt8||j7 rVt9||| || nt%||j7 t-|:|:|krt%||j7 |krt%||j7 d |krq
t;|||	|
| n&t%||j7 |kr
t<|| j||| q
t-|dg }%t=| t| j|%} | j>  | ?  |sNt@| } tA| } tB| |
|} tC| } tD| } |r\tE|  | S )a  
    We will convert an observed model (a module with observer calls) to a reference
    quantized model, the rule is simple:
    1. for each observer module call in the graph, we'll convert it to calls to
       quantize and dequantize functions based on the observer instance
    2. for weighted operations like linear/conv, we need to convert them to reference
       quantized module, this requires us to know whether the dtype configured for the
       weight is supported in the backend, this is done in prepare step and the result
       is stored in observed_node_names, we can decide whether we need to swap the
       module based on this set

    standalone_module means it a submodule that is not inlined in
    parent module, and will be quantized separately as one unit.

    Returns a quantized standalone module, whether input/output is quantized is
    specified by prepare_custom_config_dict, with
    input_quantized_idxs, output_quantized_idxs, please
    see docs for prepare_fx for details
    NF)Zremove_duplicatez&Expected key {} in convert qconfig_mapz~Expected k {} to have the same value in prepare qconfig_dict                 and convert qconfig_dict, found {} updated to {}.Z)observed_to_quantized_custom_module_classplaceholder)rz   r>   r8   rt   r3   r9   r0   c              	      sZ  |dk	st t|jtst t|| \}}||j }t|}	t fddt|jt|j	
  D }
|
st|	dkr||  ||jd  || W 5 Q R X n|	\}}}|| |jd }|g}| D ]>\}}|dkrt| ||| | |}|| q|| q|||t|i }|jd|fd}|| || W 5 Q R X dS )z Replace activation_post_process module call node with quantize and
        dequantize node

        Before:
        ... -> observer_0(x) -> ...
        After:
        ... -> torch.quantize_per_tensor(x, ...) -> x.dequantize() -> ...
        Nc                    s   g | ]}t | qS r5   )r<   )rX   rm   r9   r5   r6   rY   r  s    zSconvert.<locals>.replace_observer_with_quantize_dequantize_node.<locals>.<listcomp>r   )Z_scale_Z_zero_point_rV   )rC   )r1   r:   rA   rw   rq   r'   allrH   rC   rI   keysrK   r[   rO   rv   r(   appendrL   r`   rU   )rz   r>   r8   rt   r3   r9   ro   rn   Zobserver_moduleZmaybe_quantize_node_infoZskip_replacementZ	node_typeZquantize_opZqparamsZ
input_nodeinputskeyvalueZqparam_nodeZquantized_nodeZdequantized_noder5   r   r6   .replace_observer_with_quantize_dequantize_node\  s2    




z?convert.<locals>.replace_observer_with_quantize_dequantize_noderr   c                 S   sD   | j d }t|ts"td| | | ||  t|| d S )Nr   z@Expecting the for call custom module node to be a Node, but got )rC   r:   r   r1   r[   rO   rs   )r8   r>   Zcall_custom_module_noder5   r5   r6   %replace_observer_with_dequantize_node  s    


z6convert.<locals>.replace_observer_with_dequantize_noder   input_quantized_idxsoutput_quantized_idxsr   outputz1Unsupported node type for output_quantized_idxs: ru   preserved_attributes)Fr7   Z_qconfig_mapcpurb   Znamed_modulesZ_qconfig_dictcopydeepcopyr   Z_is_qatr   r   r   r   r>   rv   r1   formatr   r&   r   Z_equalization_qconfig_mapr$   r%   rE   r?   r@   r   r;   rG   rk   Moduler
   r   r   rw   r   rf   r   r    r   r`   r   r   r   setrH   rs   rJ   rC   r:   ra   rd   re   r-   rA   r#   r   unionr   r   r!   Zeliminate_dead_codeZ	recompilerW   rT   r.   r_   r\   r,   )&rz   r{   r   r   r   r   r|   r3   r2   r4   r9   rt   Zprepare_qconfig_dictZmodules_copyZconvert_qconfig_mapkvZcustom_module_classesr   Zweight_eq_obs_dictZgraph_inputsr8   r   r   Zplaceholder_node_seen_cntr   r   r   Zroot_module_classesr   Zfused_module_classesr   Zcur_placeholder_node_idxZreturn_noder   r   rl   r   r5   r5   r6   r     s   



  

 9
  





               
r   )FNFTNN)Otypingr   r   r   r   r   r   r   rG   r   rd   Ztorch.fxr	   Ztorch.fx.graphr
   r   r   utilsr   r   r   r   r   r   r   r   Zqconfig_dict_utilsr   r   Zqconfig_utilsr   r   r   r   Z*torch.ao.quantization.backend_config.utilsr   r   r   r   Z$torch.ao.quantization.backend_configr    Zgraph_moduler!   r"   r#   Z	_equalizer$   r%   r&   r'   r(   r)   r*   r+   Ztorch.ao.quantization.quantizer,   r-   r.   rk   r   rw   rf   r7   boolr<   rE   rT   rW   r\   r_   ra   rq   rs   ry   Zfxr   r   r   r   r5   r5   r5   r6   <module>   s   $ 	

0
?

m
B       

 