U
    ‰dŽy  ã                   @   sT  U d dl Z d dlZd dlmZ d dlmZmZmZmZ d dl	Z	d dl
mZ ddlmZ ddlmZ ddlmZ dd	lmZ dd
lmZ ddlmZ ddlmZ ddlmZ ddlmZ ddlmZ ddl m!Z! ddl"m#Z# ddl$m%Z% ddl&m'Z' ddl(m)Z) ddl*m+Z+ ddl,m-Z-m.Z.m/Z/ ddl0m1Z1m2Z2 ddl3m4Z4 ddl5m6Z6 ddl7m8Z8 ddl9m:Z: ddl;m<Z< ddl=m>Z> ddl?m@Z@mAZB i ZCi ZDeeeef ef eEd< d d!„ ZFeG d"d#„ d#eGƒƒZHd$d%„ ZId&d'„ ZJd(d)„ ZKd*d+„ ZLd,d-„ ZMeFeeƒd.d/„ ƒZNeFeeƒd0d1„ ƒZOeFeeƒd2d3„ ƒZPeFeeƒd4d5„ ƒZQeFeeƒd6d7„ ƒZReFeeƒd8d9„ ƒZSeFeeƒd:d;„ ƒZTeFeeƒd<d=„ ƒZUeFe!e!ƒd>d?„ ƒZVeFe%e%ƒd@dA„ ƒZWeFe#e#ƒdBdC„ ƒZXeFe'e'ƒdDdE„ ƒZYeFe+e+ƒdFdG„ ƒZZeFe-e-ƒdHdI„ ƒZ[eFe1e-ƒdJdK„ ƒZ\eFe-e1ƒdLdM„ ƒZ]eFe1e1ƒdNdO„ ƒZ^eFe4e4ƒdPdQ„ ƒZ_eFe6e6ƒdRdS„ ƒZ`eFe8e8ƒdTdU„ ƒZaeFe:e:ƒdVdW„ ƒZbeFe<e<ƒdXdY„ ƒZceFe>e>ƒdZd[„ ƒZdeFee:ƒd\d]„ ƒZeeFeeƒd^d_„ ƒZfeFee8ƒd`da„ ƒZgeFeeƒdbdc„ ƒZheFee!ƒddde„ ƒZieFee4ƒdfdg„ ƒZjeFee>ƒdhdi„ ƒZkeFee8ƒdjdk„ ƒZleFeeƒdldm„ ƒZmeFee4ƒdndo„ ƒZneFee>ƒdpdq„ ƒZoeFeeƒeFeeƒeFee8ƒeFee>ƒdrds„ ƒƒƒƒZpeFee!ƒdtdu„ ƒZqeFee%ƒdvdw„ ƒZreFee4ƒdxdy„ ƒZseFe!eƒeFe!eƒeFe!e8ƒeFe!e>ƒdzd{„ ƒƒƒƒZteFe!eƒd|d}„ ƒZueFe!e%ƒd~d„ ƒZveFe!e4ƒd€d„ ƒZweFe%eƒeFe%eƒeFe%eƒeFe%e!ƒeFe%e8ƒeFe%e>ƒd‚dƒ„ ƒƒƒƒƒƒZxeFe%e4ƒd„d…„ ƒZyeFe+eƒeFe+eƒeFe+eƒeFe+e!ƒeFe+e8ƒeFe+e>ƒd†d‡„ ƒƒƒƒƒƒZzeFe+e4ƒdˆd‰„ ƒZ{eFe4eƒeFe4eƒeFe4eƒeFe4e!ƒeFe4e8ƒeFe4e>ƒdŠd‹„ ƒƒƒƒƒƒZ|eFe4e%ƒdŒd„ ƒZ}eFe4e+ƒdŽd„ ƒZ~eFe8eƒeFe8eƒeFe8e>ƒdd‘„ ƒƒƒZeFe8eƒd’d“„ ƒZ€eFe8e!ƒd”d•„ ƒZeFe8e4ƒd–d—„ ƒZ‚eFe:eƒeFe:eƒd˜d™„ ƒƒZƒeFe>eƒdšd›„ ƒZ„eFe>eƒdœd„ ƒZ…eFe>eƒdždŸ„ ƒZ†eFe>e!ƒd d¡„ ƒZ‡eFe>e%ƒd¢d£„ ƒZˆeFe>e4ƒd¤d¥„ ƒZ‰eFe>e8ƒd¦d§„ ƒZŠeFe)e)ƒd¨d©„ ƒZ‹eFeeƒdªd«„ ƒZŒd¬d­„ ZdS )®é    N)Útotal_ordering)ÚTypeÚDictÚCallableÚTuple)Úinfé   )Ú	Bernoulli)ÚBeta)ÚBinomial)ÚCategorical)ÚCauchy)ÚContinuousBernoulli)Ú	Dirichlet)ÚDistribution)ÚExponential)ÚExponentialFamily)ÚGamma)Ú	Geometric)ÚGumbel)Ú
HalfNormal)ÚIndependent)ÚLaplace)ÚLowRankMultivariateNormalÚ_batch_lowrank_logdetÚ_batch_lowrank_mahalanobis)ÚMultivariateNormalÚ_batch_mahalanobis)ÚNormal)ÚOneHotCategorical)ÚPareto)ÚPoisson)ÚTransformedDistribution)ÚUniform)Ú_sum_rightmostÚeuler_constantÚ_KL_MEMOIZEc                    sV   t ˆ tƒs"tˆ tƒr"td ˆ ¡ƒ‚t ˆtƒsDtˆtƒrDtd ˆ¡ƒ‚‡ ‡fdd„}|S )a[  
    Decorator to register a pairwise function with :meth:`kl_divergence`.
    Usage::

        @register_kl(Normal, Normal)
        def kl_normal_normal(p, q):
            # insert implementation here

    Lookup returns the most specific (type,type) match ordered by subclass. If
    the match is ambiguous, a `RuntimeWarning` is raised. For example to
    resolve the ambiguous situation::

        @register_kl(BaseP, DerivedQ)
        def kl_version1(p, q): ...
        @register_kl(DerivedP, BaseQ)
        def kl_version2(p, q): ...

    you should register a third most-specific implementation, e.g.::

        register_kl(DerivedP, DerivedQ)(kl_version1)  # Break the tie.

    Args:
        type_p (type): A subclass of :class:`~torch.distributions.Distribution`.
        type_q (type): A subclass of :class:`~torch.distributions.Distribution`.
    z8Expected type_p to be a Distribution subclass but got {}z8Expected type_q to be a Distribution subclass but got {}c                    s   | t ˆ ˆf< t ¡  | S ©N)Ú_KL_REGISTRYr&   Úclear)Úfun©Útype_pÚtype_q© ú:/tmp/pip-unpacked-wheel-ua33x9lu/torch/distributions/kl.pyÚ	decoratorG   s    zregister_kl.<locals>.decorator)Ú
isinstanceÚtypeÚ
issubclassr   Ú	TypeErrorÚformat)r,   r-   r0   r.   r+   r/   Úregister_kl(   s    r6   c                   @   s*   e Zd ZdgZdd„ Zdd„ Zdd„ ZdS )	Ú_MatchÚtypesc                 G   s
   || _ d S r'   ©r8   )Úselfr8   r.   r.   r/   Ú__init__S   s    z_Match.__init__c                 C   s   | j |j kS r'   r9   )r:   Úotherr.   r.   r/   Ú__eq__V   s    z_Match.__eq__c                 C   s8   t | j|jƒD ]$\}}t||ƒs& dS ||k	r q4qdS )NFT)Úzipr8   r3   )r:   r<   ÚxÚyr.   r.   r/   Ú__le__Y   s    
z_Match.__le__N)Ú__name__Ú
__module__Ú__qualname__Ú	__slots__r;   r=   rA   r.   r.   r.   r/   r7   O   s   r7   c           	         s   ‡ ‡fdd„t D ƒ}|stS tdd„ |D ƒƒj\}}tdd„ |D ƒƒj\}}t ||f }t ||f }||k	rŒt d ˆ jˆj|j|j¡t¡ |S )zP
    Find the most specific approximate match, assuming single inheritance.
    c                    s,   g | ]$\}}t ˆ |ƒrt ˆ|ƒr||f‘qS r.   )r3   )Ú.0Zsuper_pZsuper_qr+   r.   r/   Ú
<listcomp>f   s    
 
ÿz _dispatch_kl.<locals>.<listcomp>c                 s   s   | ]}t |Ž V  qd S r'   )r7   ©rF   Úmr.   r.   r/   Ú	<genexpr>m   s     z_dispatch_kl.<locals>.<genexpr>c                 s   s   | ]}t t|ƒŽ V  qd S r'   )r7   ÚreversedrH   r.   r.   r/   rJ   n   s     z;Ambiguous kl_divergence({}, {}). Please register_kl({}, {}))	r(   ÚNotImplementedÚminr8   ÚwarningsÚwarnr5   rB   ÚRuntimeWarning)	r,   r-   ÚmatchesZleft_pZleft_qZright_qZright_pZleft_funZ	right_funr.   r+   r/   Ú_dispatch_klb   s"       ÿþrR   c                 C   s   t  | t¡S )zI
    Helper function for obtaining infinite KL Divergence throughout
    )ÚtorchZ	full_liker   ©Ztensorr.   r.   r/   Ú_infinite_likex   s    rU   c                 C   s   | |   ¡  S )z2
    Utility function for calculating x log x
    )ÚlogrT   r.   r.   r/   Ú_x_log_x   s    rW   c                 C   sD   |   d¡}|   d¡}|  d|| ¡ d¡ d¡}| | jdd… ¡S )zp
    Utility function for calculating the trace of XX^{T} with X having arbitrary trailing batch dimensions
    éÿÿÿÿéþÿÿÿé   N)ÚsizeZreshapeÚpowÚsumÚshape)ZbmatÚnrI   Z
flat_tracer.   r.   r/   Ú_batch_trace_XXT†   s    

r`   c                 C   s~   zt t| ƒt|ƒf }W n: tk
rR   tt| ƒt|ƒƒ}|t t| ƒt|ƒf< Y nX |tkrttd | jj|jj¡ƒ‚|| |ƒS )a"  
    Compute Kullback-Leibler divergence :math:`KL(p \| q)` between two distributions.

    .. math::

        KL(p \| q) = \int p(x) \log\frac {p(x)} {q(x)} \,dx

    Args:
        p (Distribution): A :class:`~torch.distributions.Distribution` object.
        q (Distribution): A :class:`~torch.distributions.Distribution` object.

    Returns:
        Tensor: A batch of KL divergences of shape `batch_shape`.

    Raises:
        NotImplementedError: If the distribution types have not been registered via
            :meth:`register_kl`.
    z8No KL(p || q) is implemented for p type {} and q type {})	r&   r2   ÚKeyErrorrR   rL   ÚNotImplementedErrorr5   Ú	__class__rB   )ÚpÚqr*   r.   r.   r/   Úkl_divergence   s     ÿrf   c                 C   sx   | j | j |j   ¡  }t||j dk< d|| j dk< d| j  d| j  d|j    ¡  }t||j dk< d|| j dk< || S ©Nr   r   )ÚprobsrV   r   ©rd   re   Út1Út2r.   r.   r/   Ú_kl_bernoulli_bernoulliµ   s    "rl   c           	      C   s¦   | j | j }|j |j }|j  ¡ |j ¡  | ¡  }| j  ¡ | j ¡  | ¡  }| j |j  t | j ¡ }| j|j t | j¡ }|| t |¡ }|| | | | S r'   )Úconcentration1Úconcentration0ÚlgammarS   Údigamma)	rd   re   Zsum_params_pZsum_params_qrj   rk   Út3Út4Út5r.   r.   r/   Ú_kl_beta_betaÀ   s    rt   c                 C   sh   | j |j k  ¡ rtdƒ‚| j | j| j|j  | j  ¡  |j  ¡   }| j |j k}t|| ƒ||< |S )NzKKL between Binomials where q.total_count > p.total_count is not implemented)Ztotal_countÚanyrb   rh   ÚlogitsÚlog1prU   )rd   re   ÚklZinf_idxsr.   r.   r/   Ú_kl_binomial_binomialÌ   s    0ry   c                 C   sD   | j | j|j  }t||j dk |¡< d|| j dk |¡< | d¡S )Nr   rX   )rh   rv   r   Z	expand_asr]   )rd   re   Útr.   r.   r/   Ú_kl_categorical_categoricalØ   s    r{   c                 C   sL   | j | j|j  }|  ¡ t | j ¡ }| ¡  t |j ¡ }|| | S r'   )Úmeanrv   Ú_cont_bern_log_normrS   rw   rh   ©rd   re   rj   rk   rq   r.   r.   r/   Ú-_kl_continuous_bernoulli_continuous_bernoullià   s    r   c                 C   s|   | j  d¡}|j  d¡}| ¡ | ¡  }| j  ¡ |j  ¡   d¡}| j |j  }| j  ¡ | ¡  d¡ }|| ||  d¡ S )NrX   )Úconcentrationr]   ro   rp   Ú	unsqueeze)rd   re   Zsum_p_concentrationZsum_q_concentrationrj   rk   rq   rr   r.   r.   r/   Ú_kl_dirichlet_dirichletè   s    r‚   c                 C   s"   |j | j  }| ¡  }|| d S ©Nr   ©ÚraterV   )rd   re   Z
rate_ratiorj   r.   r.   r/   Ú_kl_exponential_exponentialô   s    
r†   c                 C   s˜   t | ƒt |ƒkstdƒ‚dd„ | jD ƒ}|j}| j|Ž }tjj| ¡ |dd}|j|Ž | }t|||ƒD ]*\}}}	|| |	 }
|t	|
t
|jƒƒ8 }qh|S )Nz‡The cross KL-divergence between different exponential families cannot                             be computed using Bregman divergencesc                 S   s   g | ]}|  ¡  ¡ ‘qS r.   )ÚdetachZrequires_grad_)rF   Únpr.   r.   r/   rG      s     z+_kl_expfamily_expfamily.<locals>.<listcomp>T)Zcreate_graph)r2   rb   Z_natural_paramsZ_log_normalizerrS   ZautogradZgradr]   r>   r$   ÚlenÚevent_shape)rd   re   Z	p_nparamsZ	q_nparamsZ	lg_normalZ	gradientsÚresultZpnpZqnpÚgZtermr.   r.   r/   Ú_kl_expfamily_expfamilyû   s    
r   c                 C   sn   |j | j|j  ¡  }t |j ¡t | j ¡ }| j |j  t | j ¡ }|j| j | j | j  }|| | | S r'   )r€   r…   rV   rS   ro   rp   ©rd   re   rj   rk   rq   rr   r.   r.   r/   Ú_kl_gamma_gamma  s
    r   c                 C   sl   | j |j  }|j|j  }| j|j  }| ¡  | | }|t }t |d|  ¡  | ¡}|| | dt  S rƒ   )ÚscaleÚlocrV   Ú_euler_gammarS   Úexpro   )rd   re   Zct1Zct2Zct3rj   rk   rq   r.   r.   r/   Ú_kl_gumbel_gumbel  s    r”   c                 C   s$   |   ¡  t |j ¡| j  |j S r'   )ÚentropyrS   rw   rh   rv   ©rd   re   r.   r.   r/   Ú_kl_geometric_geometric  s    r—   c                 C   s   t | j|jƒS r'   )Ú_kl_normal_normalÚ	base_distr–   r.   r.   r/   Ú_kl_halfnormal_halfnormal$  s    rš   c                 C   sV   | j |j  }| j|j  ¡ }| ¡  }||j  }|t | | j  ¡ }|| | d S rƒ   )r   r‘   ÚabsrV   rS   r“   )rd   re   Úscale_ratioZloc_abs_diffrj   rk   rq   r.   r.   r/   Ú_kl_laplace_laplace)  s    

r   c                 C   sú   | j |j krtdƒ‚t|j|j|jƒt| j| j| jƒ }t|j|j|j| j |jƒ}|jj|j 	d¡ }t
jj|j|dd}| j|j  d¡}t| j|j ¡  	d¡ ƒ}t|| j ¡  	d¡ ƒ}t| | j¡ƒ}	|| | |	 }
d||
 | | j d   S )NzKL-divergence between two Low Rank Multivariate Normals with                          different event shapes cannot be computedrY   F©ÚupperrX   ç      à?r   )rŠ   Ú
ValueErrorr   Ú_unbroadcasted_cov_factorÚ_unbroadcasted_cov_diagÚ_capacitance_trilr   r‘   ÚmTr   rS   ÚlinalgÚsolve_triangularr]   r`   ÚrsqrtÚsqrtÚmatmul)rd   re   Úterm1Úterm3Ú	qWt_qDinvÚAÚterm21Úterm22Zterm23Zterm24Úterm2r.   r.   r/   Ú7_kl_lowrankmultivariatenormal_lowrankmultivariatenormal4  s2    
ÿ
ÿþ

þ
ÿÿr²   c           	      C   sÔ   | j |j krtdƒ‚t|j|j|jƒd| jjddd ¡  	d¡  }t
|j|j|j| j |jƒ}|jj|j d¡ }tjj|j|dd}t| j|j ¡  d¡ ƒ}t| | j¡ƒ}|| }d|| | | j d	   S )
NúKL-divergence between two (Low Rank) Multivariate Normals with                          different event shapes cannot be computedrZ   rY   rX   ©Zdim1Zdim2Frž   r    r   )rŠ   r¡   r   r¢   r£   r¤   Ú_unbroadcasted_scale_trilÚdiagonalrV   r]   r   r‘   r¥   r   rS   r¦   r§   r`   r¨   rª   )	rd   re   r«   r¬   r­   r®   r¯   r°   r±   r.   r.   r/   Ú0_kl_multivariatenormal_lowrankmultivariatenormalP  s*    
ÿþ

þ
ÿÿr·   c                 C   s$  | j |j krtdƒ‚d|jjddd ¡  d¡ t| j| j| j	ƒ }t
|j|j| j ƒ}tj |jjd d… | jjd d… ¡}| j d }|j |||f ¡}| j ||| j d¡f ¡}t | j ¡ ¡ |||f ¡}ttjj||ddƒ}	ttjj||ddƒ}
|	|
 }d	|| | | j d   S )
Nr³   rZ   rY   rX   r´   r   Frž   r    )rŠ   r¡   rµ   r¶   rV   r]   r   r¢   r£   r¤   r   r‘   rS   Ú_CÚ_infer_sizer^   ÚexpandZ
cov_factorr[   Z
diag_embedr©   r`   r¦   r§   )rd   re   r«   r¬   Úcombined_batch_shaper_   Úq_scale_trilZp_cov_factorZ
p_cov_diagr¯   r°   r±   r.   r.   r/   Ú0_kl_lowrankmultivariatenormal_multivariatenormali  s.    
ÿÿÿ
ÿ
ÿr½   c           	      C   sÞ   | j |j krtdƒ‚|jjddd ¡  d¡| jjddd ¡  d¡ }tj |jj	d d… | jj	d d… ¡}| j d }|j 
|||f ¡}| j 
|||f ¡}ttjj||ddƒ}t|j|j| j ƒ}|d|| |   S )	NzvKL-divergence between two Multivariate Normals with                          different event shapes cannot be computedrY   rX   r´   r   Frž   r    )rŠ   r¡   rµ   r¶   rV   r]   rS   r¸   r¹   r^   rº   r`   r¦   r§   r   r‘   )	rd   re   Z
half_term1r»   r_   r¼   Zp_scale_trilr±   r¬   r.   r.   r/   Ú)_kl_multivariatenormal_multivariatenormalƒ  s    ÿÿ
r¾   c                 C   sB   | j |j   d¡}| j|j |j   d¡}d|| d | ¡   S ©NrZ   r    r   ©r   r\   r‘   rV   )rd   re   Z	var_ratiorj   r.   r.   r/   r˜   –  s    r˜   c                 C   s   t | j|jƒS r'   )r{   Z_categoricalr–   r.   r.   r/   Ú'_kl_onehotcategorical_onehotcategorical  s    rÁ   c                 C   sX   | j |j  }|j| j }|j| ¡  }| ¡  }|| | d }t|| jj|jjk < |S rƒ   )r   ÚalpharV   r   ÚsupportÚlower_bound)rd   re   rœ   Zalpha_ratiorj   rk   r‹   r.   r.   r/   Ú_kl_pareto_pareto¢  s    
rÅ   c                 C   s&   | j | j  ¡ |j  ¡   | j |j   S r'   r„   r–   r.   r.   r/   Ú_kl_poisson_poisson®  s    rÆ   c                 C   s.   | j |j krt‚| j|jkr t‚t| j|jƒS r'   )Z
transformsrb   rŠ   rf   r™   r–   r.   r.   r/   Ú_kl_transformed_transformed³  s
    rÇ   c                 C   s<   |j |j | j | j   ¡ }t||j| jk|j | j k B < |S r'   )ÚhighÚlowrV   r   ©rd   re   r‹   r.   r.   r/   Ú_kl_uniform_uniform¼  s    rË   c                 C   s    |   ¡  | j|j ¡  |j  S r'   )r•   rh   r…   rV   r–   r.   r.   r/   Ú_kl_bernoulli_poissonÄ  s    rÌ   c                 C   s,   |   ¡  | j|j  t |j ¡ | ¡  S r'   )r•   r|   rv   rS   rw   rh   r}   r–   r.   r.   r/   Ú_kl_beta_continuous_bernoulliÉ  s    rÍ   c                 C   s
   t | jƒS r'   )rU   rm   r–   r.   r.   r/   Ú_kl_beta_infinityÎ  s    rÎ   c                 C   s,   |   ¡  |j ¡  |j| j| j| j    S r'   )r•   r…   rV   rm   rn   r–   r.   r.   r/   Ú_kl_beta_exponentialÓ  s    rÏ   c                 C   sp   |   ¡  }|j ¡ |j|j ¡   }|jd | j ¡ | j| j  ¡   }|j| j | j| j  }|| | | S rƒ   )r•   r€   ro   r…   rV   rm   rp   rn   rŽ   r.   r.   r/   Ú_kl_beta_gammaØ  s
    
$rÐ   c           	      C   sš   | j | j | j  }|j d¡}|  ¡  }d|d tj  ¡  }|d|  | j | j d  | d¡ d }|j| }|j d¡d }|| || | |  S r¿   )	rm   rn   r   r\   r•   ÚmathÚpirV   r‘   )	rd   re   ZE_betaÚ
var_normalrj   rk   rq   rr   rs   r.   r.   r/   Ú_kl_beta_normalã  s    
*
rÔ   c                 C   s>   |   ¡  |j|j  ¡  }t||j| jjk|j| jjk B < |S r'   )r•   rÈ   rÉ   rV   r   rÃ   rÄ   Úupper_boundrÊ   r.   r.   r/   Ú_kl_beta_uniformï  s     rÖ   c                 C   s
   t | jƒS r'   )rU   rh   r–   r.   r.   r/   Ú!_kl_continuous_bernoulli_infinityø  s    r×   c                 C   s"   |   ¡  t |j¡ |j| j  S r'   )r•   rS   rV   r…   r|   r–   r.   r.   r/   Ú$_kl_continuous_bernoulli_exponentialý  s    rØ   c                 C   sz   |   ¡  }dt dtj ¡t |j|j ¡  t |j¡ }| jt | j	¡ d|j | j	  dt |j¡  }|| | S )Nr    g       @)
r•   rÑ   rV   rÒ   rS   Zsquarer‘   r   Zvariancer|   r~   r.   r.   r/   Ú_kl_continuous_bernoulli_normal  s    
22rÙ   c              	   C   sV   |   ¡  |j|j  ¡  }t t t |j| jj	¡t 
|j| jj¡¡t |¡t |¡S r'   )r•   rÈ   rÉ   rV   rS   ÚwhereÚmaxÚgerÃ   rÄ   ÚlerÕ   Ú	ones_liker   rÊ   r.   r.   r/   Ú _kl_continuous_bernoulli_uniform  s    ÿ þrß   c                 C   s
   t | jƒS r'   ©rU   r…   r–   r.   r.   r/   Ú_kl_exponential_infinity  s    rá   c                 C   sB   |j | j  }|j t |¡ }|| |j ¡  |jt  dt  S rƒ   )r…   r€   rS   rV   ro   r’   )rd   re   Zratiorj   r.   r.   r/   Ú_kl_exponential_gamma  s    râ   c                 C   sR   | j |j }|j|j }| ¡ d }t |¡| |d  }| ¡ }|| | | S rƒ   )r…   r   r‘   rV   rS   r“   Ú
reciprocal)rd   re   Úscale_rate_prodÚloc_scale_ratiorj   rk   rq   r.   r.   r/   Ú_kl_exponential_gumbel$  s    ræ   c                 C   sp   |j  d¡}| j d¡}dt || d tj ¡ }| ¡ }|j| j }|j d¡d }|d || | |  S r¿   )	r   r\   r…   rS   rV   rÑ   rÒ   rã   r‘   )rd   re   rÓ   Zrate_sqrrj   rk   rq   rr   r.   r.   r/   Ú_kl_exponential_normal0  s    rç   c                 C   s
   t | jƒS r'   )rU   r€   r–   r.   r.   r/   Ú_kl_gamma_infinity;  s    rè   c                 C   s&   |   ¡  |j ¡  |j| j | j  S r'   )r•   r…   rV   r€   r–   r.   r.   r/   Ú_kl_gamma_exponentialC  s    ré   c                 C   s~   | j |j }|j|j }| jd | j ¡  | j ¡  | j }| ¡ | j|  }t |¡d| 	¡   
| j ¡ | }|| | S rƒ   )r…   r   r‘   r€   rp   ro   rV   rS   r“   rã   r\   )rd   re   Zbeta_scale_prodrå   rj   rk   rq   r.   r.   r/   Ú_kl_gamma_gumbelH  s    $$rê   c                 C   s¨   |j  d¡}| j d¡}dt || d tj ¡ | j | j ¡  }d| j d¡| j  | }|j	| j | j }d|j	 d¡ }|| jd | j 
¡   || | |  S r¿   )r   r\   r…   rS   rV   rÑ   rÒ   r€   ro   r‘   rp   )rd   re   rÓ   Zbeta_sqrrj   rk   rq   rr   r.   r.   r/   Ú_kl_gamma_normalT  s    ,rë   c                 C   s
   t | jƒS r'   ©rU   r‘   r–   r.   r.   r/   Ú_kl_gumbel_infinity_  s    rí   c                 C   sx   | j |j  }|t dtj ¡  ¡ }tj| d  d¡d }| j| j t  |j |j   d¡d }| | | td  S )NrZ   r    é   r   )r   rÑ   r©   rÒ   rV   r\   r‘   r’   )rd   re   Zparam_ratiorj   rk   rq   r.   r.   r/   Ú_kl_gumbel_normalk  s
    &rï   c                 C   s
   t | jƒS r'   rì   r–   r.   r.   r/   Ú_kl_laplace_infinityt  s    rð   c                 C   s~   |j  d¡}| j  d¡| }dt d| tj ¡ }d| j d¡ }| j|j }d|j d¡ }| | || | |  d S r¿   )r   r\   rS   rV   rÑ   rÒ   r‘   )rd   re   rÓ   Zscale_sqr_var_ratiorj   rk   rq   rr   r.   r.   r/   Ú_kl_laplace_normal~  s    rñ   c                 C   s
   t | jƒS r'   rì   r–   r.   r.   r/   Ú_kl_normal_infinity‰  s    rò   c                 C   s|   | j |j }| j|j  d¡}|j |j }| ¡ d }|| }t | d|  | ¡}| | | ddt dtj ¡   S r¿   )r‘   r   r\   rV   rS   r“   rÑ   rÒ   )rd   re   Zmean_scale_ratioZvar_scale_sqr_ratiorå   rj   rk   rq   r.   r.   r/   Ú_kl_normal_gumbel“  s    ró   c                 C   sš   | j |j  }| j|j }|| j }t |¡}t dtj ¡| j t d| d¡ ¡ }|t 	t d¡| ¡ }| || |j  ddt dtj ¡   S )NrZ   g      à¿r    r   )
r‘   r   rS   rV   rÑ   r©   rÒ   r“   r\   Úerf)rd   re   Zloc_diffrœ   Zloc_diff_scale_ratiorj   rk   rq   r.   r.   r/   Ú_kl_normal_laplacež  s    

*rõ   c                 C   s
   t | jƒS r'   )rU   r   r–   r.   r.   r/   Ú_kl_pareto_infinity©  s    rö   c                 C   sZ   | j |j }| j|  ¡ }| j ¡ }| j| | jd  }|| | d }t|| jdk< |S rƒ   )r   r…   rÂ   rV   rã   r   )rd   re   rä   rj   rk   rq   r‹   r.   r.   r/   Ú_kl_pareto_exponential°  s    
r÷   c                 C   sŒ   | j  ¡ | j ¡  }| j ¡ | }|j ¡ |j|j ¡   }d|j | }|j| j | j  | jd  }|| | | d }t|| jdk< |S rƒ   )r   rV   rÂ   rã   r€   ro   r…   r   ©rd   re   Úcommon_termrj   rk   rq   rr   r‹   r.   r.   r/   Ú_kl_pareto_gamma»  s    rú   c           	      C   sª   d|j  d¡ }| j | jd  }t dtj ¡|j  | j | j   ¡ }| j ¡ }| j| d¡ | jd  }| j| |j  d¡}|| || |  d }t	|| jdk< |S )NrZ   r   )
r   r\   rÂ   rÑ   r©   rÒ   rV   rã   r‘   r   )	rd   re   rÓ   rù   rj   rk   rq   rr   r‹   r.   r.   r/   Ú_kl_pareto_normalÉ  s    &
rû   c                 C   s
   t | jƒS r'   rà   r–   r.   r.   r/   Ú_kl_poisson_infinityÖ  s    rü   c                 C   sÂ   | j | j }t |¡}|jd t| j ƒt| jƒ |  | }|jd td| j  ƒtd| j ƒ |  | }|j ¡ |j ¡  |j|j  ¡  }|| | | }t|| j |j	j
k| j|j	jk B < |S rƒ   )rÈ   rÉ   rS   rV   rm   rW   rn   ro   r   rÃ   rÕ   rÄ   rø   r.   r.   r/   Ú_kl_uniform_betaÜ  s    
&.$ rý   c              	   C   sh   |   ¡  | j|j  t |j ¡ | ¡  }t t t 	| j
|jj¡t | j|jj¡¡t |¡t |¡S r'   )r•   r|   rv   rS   rw   rh   r}   rÚ   rÛ   rÜ   rÈ   rÃ   rÕ   rÝ   rÉ   rÄ   rÞ   r   rÊ   r.   r.   r/   Ú _kl_uniform_continuous_bernoulliè  s    ,ÿ þrþ   c                 C   sB   |j | j| j  d | j| j |j   ¡  }t|| j|jjk < |S )NrZ   )r…   rÈ   rÉ   rV   r   rÃ   rÄ   rÊ   r.   r.   r/   Ú_kl_uniform_exponetialð  s    ,rÿ   c                 C   s’   | j | j }| ¡ }|j ¡ |j|j ¡   }d|j t| j ƒt| jƒ |  | }|j| j | j  d }| | | | }t|| j|jj	k < |S )Nr   rZ   )
rÈ   rÉ   rV   r€   ro   r…   rW   r   rÃ   rÄ   rø   r.   r.   r/   Ú_kl_uniform_gamma÷  s    &r   c                 C   sn   |j | j| j  }| j|j |j  }| j|j |j  }| ¡ d||   }|t | ¡t | ¡  }|| S )Nr    )r   rÈ   rÉ   r‘   rV   rS   r“   )rd   re   rù   Zhigh_loc_diffZlow_loc_diffrj   rk   r.   r.   r/   Ú_kl_uniform_gumbel  s    r  c                 C   st   | j | j }t tjd ¡|j |  ¡ }| d¡d }| j | j d|j  d  d¡}|d||  |j d¡  S )NrZ   é   r    )	rÈ   rÉ   rÑ   r©   rÒ   r   rV   r\   r‘   )rd   re   rù   rj   rk   rq   r.   r.   r/   Ú_kl_uniform_normal  s
     r  c                 C   sl   | j | j }|j|j |j¡ |  ¡ }t| j ƒt| jƒ | | }||jd  | }t|| j|jj	k < |S rƒ   )
rÈ   rÉ   rÂ   r   r\   rV   rW   r   rÃ   rÄ   )rd   re   Zsupport_uniformrj   rk   r‹   r.   r.   r/   Ú_kl_uniform_pareto  s    r  c                 C   s*   | j |j krt‚t| j|jƒ}t|| j ƒS r'   )Zreinterpreted_batch_ndimsrb   rf   r™   r$   rÊ   r.   r.   r/   Ú_kl_independent_independent"  s    r  c                 C   sD   | j |j   d¡| j|j  d¡  ¡ }d| j  |j   ¡ }|| S )NrZ   é   rÀ   ri   r.   r.   r/   Ú_kl_cauchy_cauchy*  s    (r  c                  C   sX   dg} t tdd„ dD ]\}}|  d |j|j¡¡ qd | ¡}tjrTt j|7  _dS )zHAppends a list of implemented KL functions to the doc for kl_divergence.zLKL divergence is currently implemented for the following distribution pairs:c                 S   s   | d j | d j fS rg   )rB   )Zp_qr.   r.   r/   Ú<lambda>5  ó    z_add_kl_info.<locals>.<lambda>)ÚkeyzG* :class:`~torch.distributions.{}` and :class:`~torch.distributions.{}`z
	N)Úsortedr(   Úappendr5   rB   Újoinrf   Ú__doc__)Zrowsrd   re   Zkl_infor.   r.   r/   Ú_add_kl_info1  s    ÿ ÿ
r  )ŽrÑ   rN   Ú	functoolsr   Útypingr   r   r   r   rS   Z
torch._sixr   Z	bernoullir	   Úbetar
   Zbinomialr   Zcategoricalr   Zcauchyr   Zcontinuous_bernoullir   Z	dirichletr   Údistributionr   Zexponentialr   Z
exp_familyr   Úgammar   Z	geometricr   Zgumbelr   Zhalf_normalr   Zindependentr   Zlaplacer   Zlowrank_multivariate_normalr   r   r   Zmultivariate_normalr   r   Únormalr   Zone_hot_categoricalr   Zparetor    Zpoissonr!   Ztransformed_distributionr"   Úuniformr#   Úutilsr$   r%   r’   r(   r&   Ú__annotations__r6   Úobjectr7   rR   rU   rW   r`   rf   rl   rt   ry   r{   r   r‚   r†   r   r   r”   r—   rš   r   r²   r·   r½   r¾   r˜   rÁ   rÅ   rÆ   rÇ   rË   rÌ   rÍ   rÎ   rÏ   rÐ   rÔ   rÖ   r×   rØ   rÙ   rß   rá   râ   ræ   rç   rè   ré   rê   rë   rí   rï   rð   rñ   rò   ró   rõ   rö   r÷   rú   rû   rü   rý   rþ   rÿ   r   r  r  r  r  r  r  r.   r.   r.   r/   Ú<module>   s€   '
%































































	

