U
    d                     @   sZ   d dl mZmZmZmZ d dlZd dlm  mZ	 d dlm
Z
 ejjG dd deZdS )    )ListDictOptionalTupleN)Tensorc                   @   sb   e Zd Zdee eeeef eeeeeed	ddZee	e d	d
dZ
ee	e  dddZdS )_FunctionalAdamMbP?g?g+?:0yE>        F)	paramslrbetasepsweight_decayamsgradmaximizeforeach_allow_empty_param_listc
           
      C   s  d|kst d|d|ks,t d|d|d   krDdk sXn t d|d d|d   krpdk sn t d|d d|kst d	||||d |d |d
| _|| _|| _|| _tjt	tj
t	ttj
f f i | _t|dkr|	st dd|i| _d S )Nr   zInvalid learning rate: {}zInvalid epsilon value: {}r   g      ?z%Invalid beta parameter at index 0: {}   z%Invalid beta parameter at index 1: {}zInvalid weight_decay value: {})r   r   beta1beta2r   z%optimizer got an empty parameter listr   )
ValueErrorformatdefaultsr   r   r   torchjitZannotater   r   strstatelenparam_group)
selfr   r   r   r   r   r   r   r   r    r"   K/tmp/pip-unpacked-wheel-ua33x9lu/torch/distributed/optim/functional_adam.py__init__   s.    $z_FunctionalAdam.__init__)paramgradc                 C   sR  |g}g }g }g }g }g }g }	|dk	r:| | | | || jkri | j|< | j| }
td|
d< tj|tjd|
d< tj|tjd|
d< | jrtj|tjd|
d< | j| }
| |
d  | |
d  | jr| |
d  |	 |
d  t P tj	||||||	| j| j
| jd | jd	 | jd
 | jd | jd | jd W 5 Q R X dS )zo
        Similar to step, but operates on a single parameter and optionally a
        gradient tensor.
        Nr   stepZmemory_formatexp_avg
exp_avg_sqmax_exp_avg_sqr   r   r   r   r   r   r   r   r   r   r   r   r   )appendr   r   tensor
zeros_likepreserve_formatr   no_gradFadamr   r   r   )r!   r%   r&   r   params_with_gradgradsexp_avgsexp_avg_sqsmax_exp_avg_sqsstate_stepsr   r"   r"   r#   
step_param<   sP    






z_FunctionalAdam.step_param)	gradientsc                 C   s  | j d }g }g }g }g }g }g }t|t|krXtddt| d dt|  t| j d |D ]\}	}
|
d k	rh||	 ||
 |	| jkri | j|	< | j|	 }td|d< tj|	tj	d|d	< tj|	tj	d|d
< | j
rtj|	tj	d|d< | j|	 }||d	  ||d
  | j
r6||d  ||d  qht P tj||||||| j
| j| jd | jd | jd | jd | jd | jd W 5 Q R X d S )Nr   zEthe gradients passed in does not equal to the size of the parameters!zParams length: z. zGradients length: r   r'   r(   r)   r*   r+   r   r   r   r   r   r,   )r    r   r   zipr-   r   r   r.   r/   r0   r   r1   r2   r3   r   r   r   )r!   r;   r   r4   r5   r6   r7   r8   r9   r%   Zgradientr   r"   r"   r#   r'   l   sb    







z_FunctionalAdam.stepN)r   r	   r
   r   FFFF)__name__
__module____qualname__r   r   floatr   boolr$   r   r:   r'   r"   r"   r"   r#   r      s*           
*0r   )typingr   r   r   r   r   Ztorch.optim._functionalZoptimZ_functionalr2   r   r   scriptobjectr   r"   r"   r"   r#   <module>   s
   