U
    dK                     @   s   d dl mZ ddlmZmZ ddlmZ G dd deZG dd	 d	eZ	G d
d deeZ
G dd deZG dd deeZG dd deZG dd deeZdS )    )Tensor   )_LazyNormBase	_NormBase   )
functionalc                       sj   e Zd Zdeeeeedd fddZdd	 Zd
d Zdd Z	dd Z
 fddZeedddZ  ZS )_InstanceNormh㈵>皙?FN)num_featuresepsmomentumaffinetrack_running_statsreturnc           	         s*   ||d}t t| j|||||f| d S )N)devicedtype)superr   __init__)	selfr   r   r   r   r   r   r   Zfactory_kwargs	__class__ A/tmp/pip-unpacked-wheel-ua33x9lu/torch/nn/modules/instancenorm.pyr      s    


    z_InstanceNorm.__init__c                 C   s   t d S NNotImplementedErrorr   inputr   r   r   _check_input_dim   s    z_InstanceNorm._check_input_dimc                 C   s   t d S r   r   r   r   r   r   _get_no_batch_dim   s    z_InstanceNorm._get_no_batch_dimc                 C   s   |  |ddS )Nr   )_apply_instance_normZ	unsqueezeZsqueezer   r   r   r   _handle_no_batch_input   s    z$_InstanceNorm._handle_no_batch_inputc              
   C   s.   t || j| j| j| j| jp"| j | j| j	S r   )
FZinstance_normrunning_meanrunning_varZweightZbiasZtrainingr   r   r   r   r   r   r   r"      s          z"_InstanceNorm._apply_instance_normc              	      s   | dd }|d kr| jsg }	dD ]}
||
 }||kr"|	| q"t|	dkr|djddd |	D | jjd |	D ]}|| qzt	t
| ||||||| d S )	Nversion)r%   r&   r   a  Unexpected running stats buffer(s) {names} for {klass} with track_running_stats=False. If state_dict is a checkpoint saved before 0.4.0, this may be expected because {klass} does not track running stats by default since 0.4.0. Please remove these keys from state_dict. If the running stats are actually needed, instead set track_running_stats=True in {klass} to enable them. See the documentation of {klass} for details.z and c                 s   s   | ]}d  |V  qdS )z"{}"N)format).0kr   r   r   	<genexpr>9   s     z6_InstanceNorm._load_from_state_dict.<locals>.<genexpr>)namesklass)getr   appendlenr(   joinr   __name__popr   r   _load_from_state_dict)r   Z
state_dictprefixZlocal_metadatastrictZmissing_keysZunexpected_keysZ
error_msgsr'   Zrunning_stats_keysnamekeyr   r   r   r4   $   s2    
     z#_InstanceNorm._load_from_state_dict)r   r   c                 C   s.   |  | | |  kr$| |S | |S r   )r   dimr!   r#   r"   r   r   r   r   forwardB   s    

z_InstanceNorm.forward)r	   r
   FFNN)r2   
__module____qualname__intfloatboolr   r   r!   r#   r"   r4   r   r:   __classcell__r   r   r   r   r      s(         r   c                   @   s    e Zd ZdZdd Zdd ZdS )InstanceNorm1da  Applies Instance Normalization over a 2D (unbatched) or 3D (batched) input
    as described in the paper
    `Instance Normalization: The Missing Ingredient for Fast Stylization
    <https://arxiv.org/abs/1607.08022>`__.

    .. math::

        y = \frac{x - \mathrm{E}[x]}{ \sqrt{\mathrm{Var}[x] + \epsilon}} * \gamma + \beta

    The mean and standard-deviation are calculated per-dimension separately
    for each object in a mini-batch. :math:`\gamma` and :math:`\beta` are learnable parameter vectors
    of size `C` (where `C` is the number of features or channels of the input) if :attr:`affine` is ``True``.
    The standard-deviation is calculated via the biased estimator, equivalent to
    `torch.var(input, unbiased=False)`.

    By default, this layer uses instance statistics computed from input data in
    both training and evaluation modes.

    If :attr:`track_running_stats` is set to ``True``, during training this
    layer keeps running estimates of its computed mean and variance, which are
    then used for normalization during evaluation. The running estimates are
    kept with a default :attr:`momentum` of 0.1.

    .. note::
        This :attr:`momentum` argument is different from one used in optimizer
        classes and the conventional notion of momentum. Mathematically, the
        update rule for running statistics here is
        :math:`\hat{x}_\text{new} = (1 - \text{momentum}) \times \hat{x} + \text{momentum} \times x_t`,
        where :math:`\hat{x}` is the estimated statistic and :math:`x_t` is the
        new observed value.

    .. note::
        :class:`InstanceNorm1d` and :class:`LayerNorm` are very similar, but
        have some subtle differences. :class:`InstanceNorm1d` is applied
        on each channel of channeled data like multidimensional time series, but
        :class:`LayerNorm` is usually applied on entire sample and often in NLP
        tasks. Additionally, :class:`LayerNorm` applies elementwise affine
        transform, while :class:`InstanceNorm1d` usually don't apply affine
        transform.

    Args:
        num_features: number of features or channels :math:`C` of the input
        eps: a value added to the denominator for numerical stability. Default: 1e-5
        momentum: the value used for the running_mean and running_var computation. Default: 0.1
        affine: a boolean value that when set to ``True``, this module has
            learnable affine parameters, initialized the same way as done for batch normalization.
            Default: ``False``.
        track_running_stats: a boolean value that when set to ``True``, this
            module tracks the running mean and variance, and when set to ``False``,
            this module does not track such statistics and always uses batch
            statistics in both training and eval modes. Default: ``False``

    Shape:
        - Input: :math:`(N, C, L)` or :math:`(C, L)`
        - Output: :math:`(N, C, L)` or :math:`(C, L)` (same shape as input)

    Examples::

        >>> # Without Learnable Parameters
        >>> m = nn.InstanceNorm1d(100)
        >>> # With Learnable Parameters
        >>> m = nn.InstanceNorm1d(100, affine=True)
        >>> input = torch.randn(20, 100, 40)
        >>> output = m(input)
    c                 C   s   dS Nr   r   r    r   r   r   r!      s    z InstanceNorm1d._get_no_batch_dimc                 C   s"   |  dkrtd|  d S N)r      z'expected 2D or 3D input (got {}D input)r9   
ValueErrorr(   r   r   r   r   r      s    zInstanceNorm1d._check_input_dimNr2   r;   r<   __doc__r!   r   r   r   r   r   rA   K   s   BrA   c                   @   s$   e Zd ZdZeZdd Zdd ZdS )LazyInstanceNorm1daq  A :class:`torch.nn.InstanceNorm1d` module with lazy initialization of
    the ``num_features`` argument of the :class:`InstanceNorm1d` that is inferred
    from the ``input.size(1)``.
    The attributes that will be lazily initialized are `weight`, `bias`,
    `running_mean` and `running_var`.

    Check the :class:`torch.nn.modules.lazy.LazyModuleMixin` for further documentation
    on lazy modules and their limitations.

    Args:
        num_features: :math:`C` from an expected input of size
            :math:`(N, C, L)` or :math:`(C, L)`
        eps: a value added to the denominator for numerical stability. Default: 1e-5
        momentum: the value used for the running_mean and running_var computation. Default: 0.1
        affine: a boolean value that when set to ``True``, this module has
            learnable affine parameters, initialized the same way as done for batch normalization.
            Default: ``False``.
        track_running_stats: a boolean value that when set to ``True``, this
            module tracks the running mean and variance, and when set to ``False``,
            this module does not track such statistics and always uses batch
            statistics in both training and eval modes. Default: ``False``

    Shape:
        - Input: :math:`(N, C, L)` or :math:`(C, L)`
        - Output: :math:`(N, C, L)` or :math:`(C, L)` (same shape as input)
    c                 C   s   dS rB   r   r    r   r   r   r!      s    z$LazyInstanceNorm1d._get_no_batch_dimc                 C   s"   |  dkrtd|  d S rC   rE   r   r   r   r   r      s    z#LazyInstanceNorm1d._check_input_dimN)r2   r;   r<   rH   rA   cls_to_becomer!   r   r   r   r   r   rI      s   rI   c                   @   s    e Zd ZdZdd Zdd ZdS )InstanceNorm2da  Applies Instance Normalization over a 4D input (a mini-batch of 2D inputs
    with additional channel dimension) as described in the paper
    `Instance Normalization: The Missing Ingredient for Fast Stylization
    <https://arxiv.org/abs/1607.08022>`__.

    .. math::

        y = \frac{x - \mathrm{E}[x]}{ \sqrt{\mathrm{Var}[x] + \epsilon}} * \gamma + \beta

    The mean and standard-deviation are calculated per-dimension separately
    for each object in a mini-batch. :math:`\gamma` and :math:`\beta` are learnable parameter vectors
    of size `C` (where `C` is the input size) if :attr:`affine` is ``True``.
    The standard-deviation is calculated via the biased estimator, equivalent to
    `torch.var(input, unbiased=False)`.

    By default, this layer uses instance statistics computed from input data in
    both training and evaluation modes.

    If :attr:`track_running_stats` is set to ``True``, during training this
    layer keeps running estimates of its computed mean and variance, which are
    then used for normalization during evaluation. The running estimates are
    kept with a default :attr:`momentum` of 0.1.

    .. note::
        This :attr:`momentum` argument is different from one used in optimizer
        classes and the conventional notion of momentum. Mathematically, the
        update rule for running statistics here is
        :math:`\hat{x}_\text{new} = (1 - \text{momentum}) \times \hat{x} + \text{momentum} \times x_t`,
        where :math:`\hat{x}` is the estimated statistic and :math:`x_t` is the
        new observed value.

    .. note::
        :class:`InstanceNorm2d` and :class:`LayerNorm` are very similar, but
        have some subtle differences. :class:`InstanceNorm2d` is applied
        on each channel of channeled data like RGB images, but
        :class:`LayerNorm` is usually applied on entire sample and often in NLP
        tasks. Additionally, :class:`LayerNorm` applies elementwise affine
        transform, while :class:`InstanceNorm2d` usually don't apply affine
        transform.

    Args:
        num_features: :math:`C` from an expected input of size
            :math:`(N, C, H, W)` or :math:`(C, H, W)`
        eps: a value added to the denominator for numerical stability. Default: 1e-5
        momentum: the value used for the running_mean and running_var computation. Default: 0.1
        affine: a boolean value that when set to ``True``, this module has
            learnable affine parameters, initialized the same way as done for batch normalization.
            Default: ``False``.
        track_running_stats: a boolean value that when set to ``True``, this
            module tracks the running mean and variance, and when set to ``False``,
            this module does not track such statistics and always uses batch
            statistics in both training and eval modes. Default: ``False``

    Shape:
        - Input: :math:`(N, C, H, W)` or :math:`(C, H, W)`
        - Output: :math:`(N, C, H, W)` or :math:`(C, H, W)` (same shape as input)

    Examples::

        >>> # Without Learnable Parameters
        >>> m = nn.InstanceNorm2d(100)
        >>> # With Learnable Parameters
        >>> m = nn.InstanceNorm2d(100, affine=True)
        >>> input = torch.randn(20, 100, 35, 45)
        >>> output = m(input)
    c                 C   s   dS NrD   r   r    r   r   r   r!     s    z InstanceNorm2d._get_no_batch_dimc                 C   s"   |  dkrtd|  d S N)rD      z'expected 3D or 4D input (got {}D input)rE   r   r   r   r   r     s    zInstanceNorm2d._check_input_dimNrG   r   r   r   r   rK      s   CrK   c                   @   s$   e Zd ZdZeZdd Zdd ZdS )LazyInstanceNorm2da  A :class:`torch.nn.InstanceNorm2d` module with lazy initialization of
    the ``num_features`` argument of the :class:`InstanceNorm2d` that is inferred
    from the ``input.size(1)``.
    The attributes that will be lazily initialized are `weight`, `bias`,
    `running_mean` and `running_var`.

    Check the :class:`torch.nn.modules.lazy.LazyModuleMixin` for further documentation
    on lazy modules and their limitations.

    Args:
        num_features: :math:`C` from an expected input of size
            :math:`(N, C, H, W)` or :math:`(C, H, W)`
        eps: a value added to the denominator for numerical stability. Default: 1e-5
        momentum: the value used for the running_mean and running_var computation. Default: 0.1
        affine: a boolean value that when set to ``True``, this module has
            learnable affine parameters, initialized the same way as done for batch normalization.
            Default: ``False``.
        track_running_stats: a boolean value that when set to ``True``, this
            module tracks the running mean and variance, and when set to ``False``,
            this module does not track such statistics and always uses batch
            statistics in both training and eval modes. Default: ``False``

    Shape:
        - Input: :math:`(N, C, H, W)` or :math:`(C, H, W)`
        - Output: :math:`(N, C, H, W)` or :math:`(C, H, W)` (same shape as input)
    c                 C   s   dS rL   r   r    r   r   r   r!   )  s    z$LazyInstanceNorm2d._get_no_batch_dimc                 C   s"   |  dkrtd|  d S rM   rE   r   r   r   r   r   ,  s    z#LazyInstanceNorm2d._check_input_dimN)r2   r;   r<   rH   rK   rJ   r!   r   r   r   r   r   rO     s   rO   c                   @   s    e Zd ZdZdd Zdd ZdS )InstanceNorm3da  Applies Instance Normalization over a 5D input (a mini-batch of 3D inputs
    with additional channel dimension) as described in the paper
    `Instance Normalization: The Missing Ingredient for Fast Stylization
    <https://arxiv.org/abs/1607.08022>`__.

    .. math::

        y = \frac{x - \mathrm{E}[x]}{ \sqrt{\mathrm{Var}[x] + \epsilon}} * \gamma + \beta

    The mean and standard-deviation are calculated per-dimension separately
    for each object in a mini-batch. :math:`\gamma` and :math:`\beta` are learnable parameter vectors
    of size C (where C is the input size) if :attr:`affine` is ``True``.
    The standard-deviation is calculated via the biased estimator, equivalent to
    `torch.var(input, unbiased=False)`.

    By default, this layer uses instance statistics computed from input data in
    both training and evaluation modes.

    If :attr:`track_running_stats` is set to ``True``, during training this
    layer keeps running estimates of its computed mean and variance, which are
    then used for normalization during evaluation. The running estimates are
    kept with a default :attr:`momentum` of 0.1.

    .. note::
        This :attr:`momentum` argument is different from one used in optimizer
        classes and the conventional notion of momentum. Mathematically, the
        update rule for running statistics here is
        :math:`\hat{x}_\text{new} = (1 - \text{momentum}) \times \hat{x} + \text{momentum} \times x_t`,
        where :math:`\hat{x}` is the estimated statistic and :math:`x_t` is the
        new observed value.

    .. note::
        :class:`InstanceNorm3d` and :class:`LayerNorm` are very similar, but
        have some subtle differences. :class:`InstanceNorm3d` is applied
        on each channel of channeled data like 3D models with RGB color, but
        :class:`LayerNorm` is usually applied on entire sample and often in NLP
        tasks. Additionally, :class:`LayerNorm` applies elementwise affine
        transform, while :class:`InstanceNorm3d` usually don't apply affine
        transform.

    Args:
        num_features: :math:`C` from an expected input of size
            :math:`(N, C, D, H, W)` or :math:`(C, D, H, W)`
        eps: a value added to the denominator for numerical stability. Default: 1e-5
        momentum: the value used for the running_mean and running_var computation. Default: 0.1
        affine: a boolean value that when set to ``True``, this module has
            learnable affine parameters, initialized the same way as done for batch normalization.
            Default: ``False``.
        track_running_stats: a boolean value that when set to ``True``, this
            module tracks the running mean and variance, and when set to ``False``,
            this module does not track such statistics and always uses batch
            statistics in both training and eval modes. Default: ``False``

    Shape:
        - Input: :math:`(N, C, D, H, W)` or :math:`(C, D, H, W)`
        - Output: :math:`(N, C, D, H, W)` or :math:`(C, D, H, W)` (same shape as input)

    Examples::

        >>> # Without Learnable Parameters
        >>> m = nn.InstanceNorm3d(100)
        >>> # With Learnable Parameters
        >>> m = nn.InstanceNorm3d(100, affine=True)
        >>> input = torch.randn(20, 100, 35, 45, 10)
        >>> output = m(input)
    c                 C   s   dS NrN   r   r    r   r   r   r!   v  s    z InstanceNorm3d._get_no_batch_dimc                 C   s"   |  dkrtd|  d S N)rN      z'expected 4D or 5D input (got {}D input)rE   r   r   r   r   r   y  s    zInstanceNorm3d._check_input_dimNrG   r   r   r   r   rP   2  s   CrP   c                   @   s$   e Zd ZdZeZdd Zdd ZdS )LazyInstanceNorm3da  A :class:`torch.nn.InstanceNorm3d` module with lazy initialization of
    the ``num_features`` argument of the :class:`InstanceNorm3d` that is inferred
    from the ``input.size(1)``.
    The attributes that will be lazily initialized are `weight`, `bias`,
    `running_mean` and `running_var`.

    Check the :class:`torch.nn.modules.lazy.LazyModuleMixin` for further documentation
    on lazy modules and their limitations.

    Args:
        num_features: :math:`C` from an expected input of size
            :math:`(N, C, D, H, W)` or :math:`(C, D, H, W)`
        eps: a value added to the denominator for numerical stability. Default: 1e-5
        momentum: the value used for the running_mean and running_var computation. Default: 0.1
        affine: a boolean value that when set to ``True``, this module has
            learnable affine parameters, initialized the same way as done for batch normalization.
            Default: ``False``.
        track_running_stats: a boolean value that when set to ``True``, this
            module tracks the running mean and variance, and when set to ``False``,
            this module does not track such statistics and always uses batch
            statistics in both training and eval modes. Default: ``False``

    Shape:
        - Input: :math:`(N, C, D, H, W)` or :math:`(C, D, H, W)`
        - Output: :math:`(N, C, D, H, W)` or :math:`(C, D, H, W)` (same shape as input)
    c                 C   s   dS rQ   r   r    r   r   r   r!     s    z$LazyInstanceNorm3d._get_no_batch_dimc                 C   s"   |  dkrtd|  d S rR   rE   r   r   r   r   r     s    z#LazyInstanceNorm3d._check_input_dimN)r2   r;   r<   rH   rP   rJ   r!   r   r   r   r   r   rT     s   rT   N)Ztorchr   Z	batchnormr   r    r   r$   r   rA   rI   rK   rO   rP   rT   r   r   r   r   <module>   s   DL'M'M