U
    d                     @   s4   d dl Z ddlmZ ddlmZ G dd deZdS )    N   )_functional)	Optimizerc                       s4   e Zd ZdZd
 fdd	Ze ddd	Z  ZS )
SparseAdama  Implements lazy version of Adam algorithm suitable for sparse tensors.

    In this variant, only moments that show up in the gradient get updated, and
    only those portions of the gradient get applied to the parameters.

    Args:
        params (iterable): iterable of parameters to optimize or dicts defining
            parameter groups
        lr (float, optional): learning rate (default: 1e-3)
        betas (Tuple[float, float], optional): coefficients used for computing
            running averages of gradient and its square (default: (0.9, 0.999))
        eps (float, optional): term added to the denominator to improve
            numerical stability (default: 1e-8)

    .. _Adam\: A Method for Stochastic Optimization:
        https://arxiv.org/abs/1412.6980
    MbP?g?g+?:0yE>c                    s&  d|k st d|d|k s,t d|d|d   krDdk sXn t d|d d|d   krpdk sn t d|d t|}g }t|D ]R\}}t|trt|d	g D ]\}}	|	jr|||g qq|jr|| q|rt d
| dt|||d}
t	t
| ||
 d S )Ng        zInvalid learning rate: {}zInvalid epsilon value: {}r   g      ?z%Invalid beta parameter at index 0: {}r   z%Invalid beta parameter at index 1: {}paramszSparse params at indices z-: SparseAdam requires dense parameter tensors)lrbetaseps)
ValueErrorformatlist	enumerate
isinstancedictget	is_sparseappendsuperr   __init__)selfr	   r
   r   r   Zsparse_paramsindexparamZd_indexZd_paramdefaults	__class__ ;/tmp/pip-unpacked-wheel-ua33x9lu/torch/optim/sparse_adam.pyr      s.    

zSparseAdam.__init__Nc                 C   sJ  d}|dk	r&t   | }W 5 Q R X | jD ]}g }g }g }g }g }|d }	|d }
|d \}}|d D ]}|jdk	rj|| |jjstd||j | j| }t|dkrd|d< t j	|t j
d	|d
< t j	|t j
d	|d< ||d
  ||d  |d  d7  < ||d  qjtj||||||||d |d d	 q,|S )zPerforms a single optimization step.

        Args:
            closure (callable, optional): A closure that reevaluates the model
                and returns the loss.
        Nr   r
   r   r	   zISparseAdam does not support dense gradients, please consider Adam insteadr   step)Zmemory_formatZexp_avgZ
exp_avg_sqr   )beta1beta2r
   r   )torchZenable_gradZparam_groupsZgradr   r   RuntimeErrorstatelenZ
zeros_likeZpreserve_formatFZsparse_adam)r   closureZlossgroupZparams_with_gradZgradsZexp_avgsZexp_avg_sqsZstate_stepsr   r
   r!   r"   pr%   r   r   r   r    5   sN    




zSparseAdam.step)r   r   r   )N)	__name__
__module____qualname____doc__r   r#   Zno_gradr    __classcell__r   r   r   r   r      s   r   )r#    r   r'   Z	optimizerr   r   r   r   r   r   <module>   s   