U
    9%en                     @   s   d dl Z d dlZd dlmZmZ d dlmZ d dlmZm	Z	m
Z
mZmZmZ d dlZddlmZ ddgZG d	d
 d
ZG dd deZdd ZG dd dZdS )    N)abcdefaultdict)Enum)AnycastDictListOptionalTuple   )amp_definitely_not_availableOptState
GradScalerc                   @   s2   e Zd ZdZejddddZejdddZdS )	_MultiDeviceReplicatorz_
    Lazily serves copies of a tensor to requested devices.  Copies are cached per-device.
    N)master_tensorreturnc                 C   s&   |j s|jjdkst|| _i | _d S )Nxla)is_cudadevicetypeAssertionErrormaster_per_device_tensors)selfr    r   Y/var/www/html/Darija-Ai-API/env/lib/python3.8/site-packages/torch/cuda/amp/grad_scaler.py__init__   s    z_MultiDeviceReplicator.__init__r   c                 C   s6   | j |d }|d kr2| jj|ddd}|| j |< |S )NT)r   non_blockingcopy)r   getr   to)r   r   retvalr   r   r   r       s
    
z_MultiDeviceReplicator.get)__name__
__module____qualname____doc__torchTensorr   r    r   r   r   r   r      s   r   c                   @   s   e Zd ZdZdZdZdS )r   r   r      N)r#   r$   r%   READYUNSCALEDSTEPPEDr   r   r   r   r   %   s   c                   C   s   t ji dS )N)stagefound_inf_per_device)r   r*   r   r   r   r   _refresh_per_optimizer_state+   s    r/   c                   @   s   e Zd ZU eej ed< eej ed< eeee	e
f f ed< d=d	d
Zeejejf dddZdd Zdd Zdd Zdd Zdd Zdd Zd>ddZdd Zdd  Zd!d" Zd#d$ Zd%d& Zd'd( Zd)d* Zd+d, Zd-d. Zd/d0 Zd1d2 Zd3d4 Z d5d6 Z!d7d8 Z"d9d: Z#d;d< Z$dS )?r   _scaleZ_grows_tracker_per_optimizer_states      @       @      ?  Tc                 C   s   |rt  rtd d| _n|| _| jr||dks8td|dk sHtd|| _d | _|| _|| _|| _	d| _
d | _tt| _d S )NzLtorch.cuda.amp.GradScaler is enabled, but CUDA is not available.  Disabling.F      ?z The growth factor must be > 1.0.z!The backoff factor must be < 1.0.r   )r   warningswarn_enabledr   _init_scaler0   _growth_factor_backoff_factor_growth_interval_init_growth_tracker_growth_trackerr   r/   r1   )r   Z
init_scalegrowth_factorbackoff_factorgrowth_intervalenabledr   r   r   r   t   s"    
zGradScaler.__init__r   c                 C   sL   d}| j d k	s"td| d| | jd k	s@td| d| | j | jfS )NzaThis may indicate your script did not use scaler.scale(loss or outputs) earlier in the iteration.z
Attempted z but _scale is None.  z but _growth_tracker is None.  )r0   r   r?   )r   funcnameZfixr   r   r   _check_scale_growth_tracker   s    z&GradScaler._check_scale_growth_trackerc                 C   sF   | j d kstdtjd| jtj|d| _tjd| jtj|d| _ d S )Nz)_growth_tracker initialized before _scaler   dtyper   )	r?   r   r'   fullr:   float32r0   r>   Zint32)r   devr   r   r   _lazy_init_scale_growth_tracker   s       z*GradScaler._lazy_init_scale_growth_trackerc                    s   j s
|S t|tjrf|js,|jjdks,tjdkrB	|j jdk	sPt|jj
|jdd S g  fdd  |S )a2  
        Multiplies ('scales') a tensor or list of tensors by the scale factor.

        Returns scaled outputs.  If this instance of :class:`GradScaler` is not enabled, outputs are returned
        unmodified.

        Args:
            outputs (Tensor or iterable of Tensors):  Outputs to scale.
        r   NTr   r   c                    s   t | tjrv| js"| jjdks"ttdkrbjd krD	| j jd k	sRt
tj | d | j S t | tjrt | }t | ttfrt| |S |S ntdd S )Nr   r   z2outputs must be a Tensor or an iterable of Tensors)
isinstancer'   r(   r   r   r   r   lenr0   rK   appendr   r    r   Iterablemaplisttuple
ValueError)valiterableapply_scaler   stashr   r   rX      s    

z%GradScaler.scale.<locals>.apply_scale)r9   rM   r'   r(   r   r   r   r   r0   rK   r!   )r   outputsr   rW   r   scale   s    

zGradScaler.scalec              
   C   s   t |}t |}tdd }t  |jD ]}|d D ]t}	|	jd krHq8|sb|	jjtjkrbtd|	jj	r|	jjtjkr|	j
 |	_|	j }
n|	j}
||
j |
j |
 q8q,| D ]0\}}| D ]}t||||| qqW 5 Q R X |jS )Nc                   S   s   t tS N)r   rR   r   r   r   r   <lambda>       z,GradScaler._unscale_grads_.<locals>.<lambda>paramsz%Attempting to unscale FP16 gradients.)r   r   r'   Zno_gradZparam_groupsZgradrG   Zfloat16rT   Z	is_sparseZcoalesceZ_valuesr   rO   itemsvaluesZ*_amp_foreach_non_finite_check_and_unscale_r    r   )r   	optimizer	inv_scale	found_infZ
allow_fp16Zper_device_inv_scaleZper_device_found_infZper_device_and_dtype_gradsgroupparamZ
to_unscaler   Zper_dtype_gradsZgradsr   r   r   _unscale_grads_   s8    


zGradScaler._unscale_grads_c                 C   s   | j s
dS | d | jt| }|d tjkr:tdn|d tjkrPtd| jdk	s^t	| j
   }tjddtj| jjd}| |||d	|d
< tj|d< dS )as  
        Divides ("unscales") the optimizer's gradient tensors by the scale factor.

        :meth:`unscale_` is optional, serving cases where you need to
        :ref:`modify or inspect gradients<working-with-unscaled-gradients>`
        between the backward pass(es) and :meth:`step`.
        If :meth:`unscale_` is not called explicitly,  gradients will be unscaled  automatically during :meth:`step`.

        Simple example, using :meth:`unscale_` to enable clipping of unscaled gradients::

            ...
            scaler.scale(loss).backward()
            scaler.unscale_(optimizer)
            torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm)
            scaler.step(optimizer)
            scaler.update()

        Args:
            optimizer (torch.optim.Optimizer):  Optimizer that owns the gradients to be unscaled.

        .. note::
            :meth:`unscale_` does not incur a CPU-GPU sync.

        .. warning::
            :meth:`unscale_` should only be called once per optimizer per :meth:`step` call,
            and only after all gradients for that optimizer's assigned parameters have been accumulated.
            Calling :meth:`unscale_` twice for a given optimizer between each :meth:`step` triggers a RuntimeError.

        .. warning::
            :meth:`unscale_` may unscale sparse gradients out of place, replacing the ``.grad`` attribute.
        Nunscale_r-   zMunscale_() has already been called on this optimizer since the last update().z(unscale_() is being called after step().r           rF   Fr.   )r9   rE   r1   idr   r+   RuntimeErrorr,   r0   r   doubleZ
reciprocalfloatr'   rH   rI   r   rg   )r   rb   optimizer_staterc   rd   r   r   r   rh      s(     
   zGradScaler.unscale_c                 O   s.   d }t dd |d  D s*|j||}|S )Nc                 s   s   | ]}|  V  qd S r\   )item).0vr   r   r   	<genexpr>:  s     z-GradScaler._maybe_opt_step.<locals>.<genexpr>r.   )sumra   step)r   rb   rn   argskwargsr"   r   r   r   _maybe_opt_step8  s    zGradScaler._maybe_opt_stepc           	         s  | j s|j||S d|kr"td| d | jt| }|d tjkrPtdd}t|dr*|j	r*|}dt
|jjk}|rtd	t |d| i nd|d tjkr| | |   ttjt fd
d|d  D }|d tjkrdn |_||_|j||}tj|d< |s&|`|`|S |d tjkrD| | t|d dks^td| j||f||}tj|d< |S )a  
        :meth:`step` carries out the following two operations:

        1.  Internally invokes ``unscale_(optimizer)`` (unless :meth:`unscale_` was explicitly called for ``optimizer``
            earlier in the iteration).  As part of the :meth:`unscale_`, gradients are checked for infs/NaNs.
        2.  If no inf/NaN gradients are found, invokes ``optimizer.step()`` using the unscaled
            gradients.  Otherwise, ``optimizer.step()`` is skipped to avoid corrupting the params.

        ``*args`` and ``**kwargs`` are forwarded to ``optimizer.step()``.

        Returns the return value of ``optimizer.step(*args, **kwargs)``.

        Args:
            optimizer (torch.optim.Optimizer):  Optimizer that applies the gradients.
            args:  Any arguments.
            kwargs:  Any keyword arguments.

        .. warning::
            Closure use is not currently supported.
        closurez@Closure use is not currently supported if GradScaler is enabled.rt   r-   z7step() has already been called since the last update().N_step_supports_amp_scalingZgrad_scalerzGradScaler is going to stop passing itself as a keyword argument to the passed optimizer. In the near future GradScaler registers `grad_scale: Tensor` and `found_inf: Tensor` to the passed optimizer and let the optimizer use them directly.c                    s   g | ]}|j  jd dqS )T)r   )r!   r   )rp   tZscalerr   r   
<listcomp>  s   z#GradScaler.step.<locals>.<listcomp>r.   r   z/No inf checks were recorded for this optimizer.) r9   rt   rk   rE   r1   rj   r   r,   hasattrry   inspect	signature
parametersr7   r8   FutureWarningupdater*   _check_inf_per_device_get_scale_asyncr   r'   r(   rs   ra   r+   Z
grad_scalerd   rh   rN   r   rw   )	r   rb   ru   rv   rn   r"   kwargs_Zhas_grad_scaler_kwargrd   r   r{   r   rt   >  sn    







zGradScaler.stepNc                    s  | j s
dS | d\ }|dk	rt|tr8| j| nLd}t|tjjsRt	||
 dksft	||jdksxt	|| j| nz fdd| j D }t|dkst	d	|d }t|dkrtdt|D ]}||| 7 }qt ||| j| j| j tt| _dS )
aS  
        Updates the scale factor.

        If any optimizer steps were skipped the scale is multiplied by ``backoff_factor``
        to reduce it. If ``growth_interval`` unskipped iterations occurred consecutively,
        the scale is multiplied by ``growth_factor`` to increase it.

        Passing ``new_scale`` sets the new scale value manually. (``new_scale`` is not
        used directly, it's used to fill GradScaler's internal scale tensor. So if
        ``new_scale`` was a tensor, later in-place changes to that tensor will not further
        affect the scale GradScaler uses internally.)

        Args:
            new_scale (float or :class:`torch.cuda.FloatTensor`, optional, default=None):  New scale factor.

        .. warning::
            :meth:`update` should only be called at the end of the iteration, after ``scaler.step(optimizer)`` has
            been invoked for all optimizers used this iteration.

        .. warning::
            For performance reasons, we do not check the scale factor value to avoid synchronizations,
            so the scale factor is not guaranteed to be above 1. If the scale falls below 1 and/or
            you are seeing NaNs in your gradients or loss, something is likely wrong. For example,
            bf16-pretrained models are often incompatible with AMP/fp16 due to differing dynamic ranges.
        Nr   z[new_scale should be a float or a 1-element torch.cuda.FloatTensor with requires_grad=False.r   Fc                    s.   g | ]&}|d    D ]}|j jddqqS )r.   TrL   )ra   r!   r   )rp   staterd   r0   r   r   r|     s    z%GradScaler.update.<locals>.<listcomp>r   z,No inf checks were recorded prior to update.)r9   rE   rM   rm   r0   fill_r'   cudaZFloatTensorr   ZnumelZrequires_gradZcopy_r1   ra   rN   rangeZ_amp_update_scale_r;   r<   r=   r   r/   )r   Z	new_scaler?   reasonZ
found_infsZfound_inf_combinedir   r   r   r     s8    


zGradScaler.updatec                 C   s   | j S r\   r   r   r   r   r   r     s    zGradScaler._get_scale_asyncc                 C   s*   | j r"| jdkr| jS |   S dS dS )z
        Returns a Python float containing the current scale, or 1.0 if scaling is disabled.

        .. warning::
            :meth:`get_scale` incurs a CPU-GPU sync.
        Nr6   )r9   r0   r:   r   ro   r   r   r   r   	get_scale  s    
zGradScaler.get_scalec                 C   s   | j S )zL
        Returns a Python float containing the scale growth factor.
        r;   r   r   r   r   get_growth_factor  s    zGradScaler.get_growth_factorc                 C   s
   || _ dS )zd
        Args:
            new_scale (float):  Value to use as the new scale growth factor.
        Nr   r   Z
new_factorr   r   r   set_growth_factor  s    zGradScaler.set_growth_factorc                 C   s   | j S )zM
        Returns a Python float containing the scale backoff factor.
        r<   r   r   r   r   get_backoff_factor  s    zGradScaler.get_backoff_factorc                 C   s
   || _ dS )ze
        Args:
            new_scale (float):  Value to use as the new scale backoff factor.
        Nr   r   r   r   r   set_backoff_factor  s    zGradScaler.set_backoff_factorc                 C   s   | j S )zF
        Returns a Python int containing the growth interval.
        r=   r   r   r   r   get_growth_interval  s    zGradScaler.get_growth_intervalc                 C   s
   || _ dS )za
        Args:
            new_interval (int):  Value to use as the new growth interval.
        Nr   )r   Znew_intervalr   r   r   set_growth_interval  s    zGradScaler.set_growth_intervalc                 C   s(   | j r | jd kr| jS | j S dS d S )Nr   )r9   r?   r>   ro   r   r   r   r   _get_growth_tracker%  s    zGradScaler._get_growth_trackerc                 C   s   | j S )zM
        Returns a bool indicating whether this instance is enabled.
        )r9   r   r   r   r   
is_enabled/  s    zGradScaler.is_enabledc                 C   s(   | j r$|  | j| j| j|  dS i S )a   
        Returns the state of the scaler as a :class:`dict`.  It contains five entries:

        * ``"scale"`` - a Python float containing the current scale
        * ``"growth_factor"`` - a Python float containing the current growth factor
        * ``"backoff_factor"`` - a Python float containing the current backoff factor
        * ``"growth_interval"`` - a Python int containing the current growth interval
        * ``"_growth_tracker"`` - a Python int containing the number of recent consecutive unskipped steps.

        If this instance is not enabled, returns an empty dict.

        .. note::
           If you wish to checkpoint the scaler's state after a particular iteration, :meth:`state_dict`
           should be called after :meth:`update`.
        )r[   r@   rA   rB   r?   )r9   r   r;   r<   r=   r   r   r   r   r   
state_dict5  s    zGradScaler.state_dictc                 C   s   | j s
dS t|dkrtd|d | _| jdk	rB| j|d  |d | _|d | _|d | _|d | _	| j
dk	r| j
|d  dS )	z
        Loads the scaler state.  If this instance is disabled, :meth:`load_state_dict` is a no-op.

        Args:
           state_dict(dict): scaler state.  Should be an object returned from a call to :meth:`state_dict`.
        Nr   zeThe source state dict is empty, possibly because it was saved from a disabled instance of GradScaler.r[   r@   rA   rB   r?   )r9   rN   rk   r:   r0   r   r;   r<   r=   r>   r?   )r   r   r   r   r   load_state_dictQ  s    






zGradScaler.load_state_dictc                 C   sR   | j  }| jrNt| jdks&td|  |d< |  |d< d |d< d |d< |S )Nr   zpA GradScaler instance may only be pickled at the beginning of an iteration, or at the end after scaler.update().r:   r>   r0   r?   )__dict__r   r9   rN   r1   r   r   r   r   r   r   r   r   __getstate__k  s    
zGradScaler.__getstate__c                 C   s   | j | d S r\   )r   r   r   r   r   r   __setstate__{  s    zGradScaler.__setstate__c                 C   sj   |  d\}}tjddtj|jd}tjddtj|jd}| |||d| jt| d< | jt| d S )Nr   r   r6   rF   ri   Tr.   )rE   r'   rH   rI   r   rg   r1   rj   )r   rb   r0   _Zdummy_inv_scalerd   r   r   r   r   ~  s    z GradScaler._check_inf_per_devicec                 C   s   | j t| d S )Nr.   )r1   rj   )r   rb   r   r   r   _found_inf_per_device  s    z GradScaler._found_inf_per_device)r2   r3   r4   r5   T)N)%r#   r$   r%   r	   r'   r(   __annotations__r   intstrr   r   r
   rE   rK   r[   rg   rh   rw   rt   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   /   sD   
D     
!.,8h
E
)r~   r7   collectionsr   r   enumr   typingr   r   r   r   r	   r
   r'   commonr   __all__r   r   r/   r   r   r   r   r   <module>   s    