U
    3dG                     @   s  d dl Z d dlZd dlZd dlmZ d dlmZ d dlmZ d dlmZ d dlm	Z	 d dlm
Z
 d dlmZ d d	lmZ d d
lmZ d dlmZ d dlmZ d dlmZ d dlmZ d dlmZ d dlmZ d dlmZ d dlmZ d dlmZ d dlmZmZmZ eeeeeeegZdd Zdd Z dd Z!dd Z"dd Z#dd  Z$d!d" Z%d#d$ Z&d^d'd(Z'd)d* Z(d+d, Z)d-d. Z*d/d0 Z+d1d2 Z,d3d4 Z-d5d6 Z.d7d8 Z/d_d:d;Z0d<d= Z1d>d? Z2ej34d@dAgdB dCdCd d dCdCgfdCgdB dCdCd d dCdCgfdCdCd d dCdCgdAgdB fdCdCd d dCdCgdCgdB fdAgdB dAgdB fgdDdE Z5dFdG Z6dHdI Z7dJdK Z8dLdM Z9ej34dNe:e;dOe:e;dOfe<dPe<dPfgdQdR Z=dSdT Z>dUdV Z?ej34dWdXdYdZd[gd\d] Z@dS )`    N)adjusted_mutual_info_score)adjusted_rand_score)
rand_score)completeness_score)contingency_matrix)pair_confusion_matrix)entropy)expected_mutual_information)fowlkes_mallows_score)"homogeneity_completeness_v_measure)homogeneity_score)mutual_info_score)normalized_mutual_info_score)v_measure_score_generalized_average)check_clusterings)assert_all_finite)assert_almost_equal)assert_array_equalassert_array_almost_equalassert_allclosec               
   C   s   t D ]} d}tjt|d | ddgdddg W 5 Q R X d}tjt|d" | ddgddggdddg W 5 Q R X d}tjt|d" | dddgddgddgg W 5 Q R X qd S )NzDFound input variables with inconsistent numbers of samples: \[2, 3\]matchr      z$labels_true must be 1D: shape is \(2z$labels_pred must be 1D: shape is \(2)score_funcspytestraises
ValueError)
score_funcexpected r!   Q/tmp/pip-unpacked-wheel-zrfo1fqw/sklearn/metrics/cluster/tests/test_supervised.py"test_error_messages_on_wrong_input'   s    &r#   c                     s   d\ ddddg}  fdd| D }|d |d	   krV|d
   krV|d ks\n t d\fdd| D }|d |d	   kr|d
   kr|d ksn t d S )N)r      min	geometric
arithmeticmaxc                    s   g | ]}t  |qS r!   r   .0method)abr!   r"   
<listcomp>;   s     z,test_generalized_average.<locals>.<listcomp>r   r   r$      )   r0   c                    s   g | ]}t  |qS r!   r   r)   )cdr!   r"   r.   >   s     )AssertionError)methodsmeansr!   )r,   r-   r1   r2   r"   test_generalized_average8   s    4r6   c                  C   s8  t D ]} | g g tdks t| dgdgtdks<t| dddgdddgtdks`t| dddgdddgtdkst| dddgdddgtdkst| ddd	gddd	gtdkst| ddd
gddd
gtdkstqttg}ddddh}|D ]&} |D ]}| g g |dtdks8t| dgdg|dtdksZt| dddgdddg|dtdkst| dddgdddg|dtdkst| dddgdddg|dtdkst| ddd	gddd	g|dtdkst| ddd
gddd
g|dtdkstqq
d S )N      ?r   r   *                    E@g      @       @r$   r%   r&   r'   r(   average_method)r   r   approxr3   r   r   )r   Zscore_funcs_with_changing_meansr5   Zmeanr!   r!   r"   test_perfect_matchesB   sf    $$$$&

"  
  
  
  
  r@   c                  C   sP   t ddddddgddddddg\} }}t| dd t|dd t|dd d S )Nr   r   r$   r7   gGz?gQ?r   r   hr1   vr!   r!   r"   *test_homogeneous_but_not_complete_labelinge   s    (rE   c                  C   sP   t ddddddgddddddg\} }}t| dd t|dd t|dd d S )Nr   r   r$   g(\?r7   g\(\?rA   rB   r!   r!   r"   *test_complete_but_not_homogeneous_labelingm   s    (rF   c                  C   sP   t ddddddgddddddg\} }}t| dd t|dd t|dd d S )Nr   r   r$   q=
ףp?zG?p=
ף?rA   rB   r!   r!   r"   .test_not_complete_and_not_homogeneous_labelingu   s    (rJ   c                  C   s   d} d}d}d|  | | | | |  }t ddddddgddddddg| d\}}}t||d t||d t||d tddddddgddddddg| d}t||d d S )Ng?rG   rH   r   r   r$   )beta)r   r   r   )Z	beta_testZh_testZc_testZv_testrC   r1   rD   r!   r!   r"   test_beta_parameter}   s      &rL   c                  C   sT  t ddddddgddddddg\} }}t| dd t|dd t|dd t ddddddgddddddg\} }}t| dd t|dd t|dd tddddddgddddddg}tddddddgddddddg}t|dd t|dd tddddddgddddddg}tddddddgddddddg}t|d	d t|d	d d S )
Nr   r$   r   rG   rH   rI      gQ?gQ?)r   r   r   r   )rC   r1   rD   Zari_1Zari_2Zri_1Zri_2r!   r!   r"   test_non_consecutive_labels   s     ((""""rN   
   r8   c                 C   sr   t j|j}t t||f}t|D ]D\}}t|D ]2}	|d||d}
|d||d}| |
||||	f< q8q(|S )Nr   )lowhighsize)nprandomRandomStaterandintzeroslen	enumeraterange)r   	n_samplesZk_rangen_runsseedZrandom_labelsscoresikjlabels_alabels_br!   r!   r"   uniform_labelings_scores   s    rd   c                  C   sL   ddddg} d}d}t t|| |}t|jdd}t|dd	d	dgd d S )
Nr$   rO   2   Z   d   r   )Zaxisg{Gz?gQ?)rd   r   rS   absr(   r   )Zn_clusters_ranger[   r\   r^   Zmax_abs_scoresr!   r!   r"   test_adjustment_for_chance   s       ri   c            	      C   sf  t dddddddddddddddddg} t dddddddddddddddddg}t| |}t|dd t| |dd}t| ||d}t|dd t| |}t| ||d}t|dd | }t||}t|d	d t| |}t|d
d tddddgddddg}|t	dkst
t t| d g }t t|d g }t||}t|dd d S )Nr   r$   r/   gS
cA?   Tsparse)ZcontingencygpUj@?gP1?r7   n   gRQ?)rS   arrayr   r   r   sumr	   r   r   r?   r3   listflatten)	rb   rc   miCr[   ZemiZamiZa110Zb110r!   r!   r"   test_adjusted_mutual_info_score   s*    ,,




rt   c                   C   s    t tdggddkstd S )Nip r   )r	   rS   rn   r3   r!   r!   r!   r"   "test_expected_mutual_info_overflow   s    ru   c                  C   s   t dgd dgd  dgd  dgd  d	gd
  } t dgd dgd  dgd  dgd  dgd  dgd  dgd  dgd  dgd  dgd  }tt| | tt| | d S )Nr   iy  r$   i]<  r/   i  rM   iU  rj   iP  r   i  i	  iD9  i  i     i.  '   i<     )rS   rn   r   r   r
   )xyr!   r!   r"   3test_int_overflow_mutual_info_fowlkes_mallows_score   sD    	r{   c                  C   sD   t dddg} t| dd tt g d t ddddgdks@td S )Nr   r;   g,^R^?rj   r   )r   r   r3   )entr!   r!   r"   test_entropy  s    r}   c                  C   s   t dddddddddddddddddg} t dddddddddddddddddg}t| |}t j| |t ddt ddfdd }t|| t| |dd}t||d  d S )	Nr   r$   r/   rj   )Zbinsr   g?)eps)rS   rn   r   Zhistogram2daranger   )rb   rc   rs   ZC2r!   r!   r"   test_contingency_matrix  s    ,,
(
r   c                  C   s   t dddddddddddddddddg} t dddddddddddddddddg}t| |}t| |dd }t|| tjtdd t| |ddd	 W 5 Q R X d S )
Nr   r$   r/   Trk   z!Cannot set 'eps' when sparse=Truer   g|=)r~   rl   )rS   rn   r   Ztoarrayr   r   r   r   )rb   rc   rs   ZC_sparser!   r!   r"   test_contingency_matrix_sparse  s    ,,

r   c                  C   s   t dddtD ]} t j| tdt j| td }}t||tdksNt	t
||tdksft	t||tdks~t	t||tdkst	dD ]<}t|||dtdkst	t|||dtdkst	qqd S )Nr   rM   dtyper:   )r%   r&   r'   r(   r=   )rS   logspaceastypeintZonesr   r   r   r?   r3   r   r   )r_   rb   rc   r+   r!   r!   r"   test_exactly_zero_info_score  s*        r   $   c                 C   s   t dddtD ]x}t j| }|dd||dd| }}tt||dt	|| t
|t
|  d d}tt||t|||d qd S )Nr   rM   r   rO   r<   r'   r=   )rS   r   r   r   rT   rU   rV   r   r   r   r   r   )r]   r_   Zrandom_staterb   rc   avgr!   r!   r"   %test_v_measure_and_mutual_information-  s&    r   c                  C   s   t ddddddgddddddg} t| dtd  t ddddddgddddddg}t|d t ddddddgdddddd	g}t|d
 d S )Nr   r   r$   g      @g      R@r7   r/   rM   rj   r:   )r
   r   rS   sqrt)ZscoreZperfect_scoreZworst_scorer!   r!   r"   test_fowlkes_mallows_scoreC  s    ""
"r   c                  C   s   t ddddddg} t ddddddg}dt d }t| |}t|| t|| }t|| t| d d |}t|| t|| d d }t|| d S )Nr   r   r$   r7   g      (@r/   )rS   rn   r   r
   r   )rb   rc   r    Zscore_originalZscore_symmetricZscore_permutedZ
score_bothr!   r!   r"   %test_fowlkes_mallows_score_propertiesQ  s    




r   zlabels_true, labels_predr,      r   c                 C   s   t | |dkstd S )Nr   )r   r3   )Zlabels_trueZlabels_predr!   r!   r"   .test_mutual_info_score_positive_constant_labelh  s    r   c               	   C   sT   t jd} | d}t dddd }d}tjt|d t|| W 5 Q R X d S )Nr8   i  g{Gz?r   gư>zuClustering metrics expects discrete values but received continuous values for label, and continuous values for targetr   )	rS   rT   rU   ZrandZlinspacer   ZwarnsUserWarningr   )rngZnoiseZ
wavelengthmsgr!   r!   r"   test_check_clustering_errorx  s    
r   c                  C   sF   d} t t| }|}t| | d  dgddgg}tt||| d S )Nrg   r   r   )rp   rZ   rS   rn   r   r   Nclustering1clustering2r    r!   r!   r"   *test_pair_confusion_matrix_fully_dispersed  s
    r   c                  C   sF   d} t | f}|}t ddgd| | d  gg}tt||| d S )Nrg   r   r   )rS   rW   rn   r   r   r   r!   r!   r"   )test_pair_confusion_matrix_single_cluster  s
    r   c                     s   d  d } t  fddt D }t  fddt D d |  }t jdt jd}tt|D ]Z}tt|D ]H}||krxt|| || k}t|| || k}|||f  d7  < qxqhtt||| d S )	NrO   r$   c                    s   g | ]}|d  g  qS r   r!   r*   r_   nr!   r"   r.     s     z.test_pair_confusion_matrix.<locals>.<listcomp>c                    s   g | ]}|d  g d   qS r   r!   r   r   r!   r"   r.     s     )r$   r$   )shaper   r   )	rS   ZhstackrZ   rW   Zint64rX   r   r   r   )r   r   r   r    r_   ra   Zsame_cluster_1Zsame_cluster_2r!   r   r"   test_pair_confusion_matrix  s    $r   zclustering1, clustering2rg   )rg   c                 C   s   t t| |d d S )Nr7   r   r   )r   r   r!   r!   r"   test_rand_score_edge_cases  s    r   c            	      C   sp   ddddddg} ddddddg}d}d}d}d| | | }|| }|| | | }|| }t t| || d S )Nr   r   r$   rM         r   )	r   r   ZD11ZD10ZD01ZD00Zexpected_numeratorZexpected_denominatorr    r!   r!   r"   test_rand_score  s    r   c               	   C   sb   t jd} | jdddt jd}| jdddt jd}t  tdt t	|| W 5 Q R X dS )zCheck that large amount of data will not lead to overflow in
    `adjusted_rand_score`.
    Non-regression test for:
    https://github.com/scikit-learn/scikit-learn/issues/20305
    r   r$   i r   errorN)
rS   rT   rU   rV   Zint8warningscatch_warningssimplefilterRuntimeWarningr   )r   Zy_trueZy_predr!   r!   r"   !test_adjusted_rand_score_overflow  s    
r   r>   r%   r'   r&   r(   c                 C   sv   dgd }dg|dd  }ddg|dd  }t ||| d}|dksJtt ||| d}d|  krldk srn tdS )zCheck that nmi returns a score between 0 (included) and 1 (excluded
    for non-perfect match)

    Non-regression test for issue #13836
    r   i  r   Nr$   r=   )r   r3   )r>   Zlabels1Zlabels2Zlabels3Znmir!   r!   r"   )test_normalized_mutual_info_score_bounded  s    
r   )rO   r8   )r   )Ar   ZnumpyrS   r   Zsklearn.metrics.clusterr   r   r   r   r   r   r   r	   r
   r   r   r   r   r   Z#sklearn.metrics.cluster._supervisedr   r   Zsklearn.utilsr   Zsklearn.utils._testingr   Znumpy.testingr   r   r   r   r#   r6   r@   rE   rF   rJ   rL   rN   rd   ri   rt   ru   r{   r}   r   r   r   r   r   r   markZparametrizer   r   r   r   r   rp   rZ   rW   r   r   r   r   r!   r!   r!   r"   <module>   s   
#





		*
