U
    3dM                  	   @   s  d Z ddlZddlmZ ddlZddlZddlmZ ddlZ	ddl
mZ ddlmZ ddlmZ ddlmZ dd	lmZ dd
lmZmZ ddlmZ ddlmZ ddlmZ ddlmZmZ ddlmZmZm Z m!Z! ddl"m#Z# ddl$m%Z% ddl&m'Z'm(Z(m)Z)m*Z* ddlm+Z+ ddl,m-Z- ddl.m/Z/m0Z0m1Z1 ddl2m3Z3 ddlm4Z4 ddl5m6Z6m7Z7 dd Z8dd Z9dd Z:dd  Z;d!d" Z<ej=>d#d$d%gej=>d&d'd(gej=>d)d*d+d,d-gd.d/ Z?d0d1 Z@d2d3 ZAd4d5 ZBd6d7 ZCd8d9 ZDd:d; ZEd<d= ZFej=Gd>ej=>d?ed@dA ZHdBdC ZIdDdE ZJdFdG ZKdHdI ZLdJdK ZMdLdM ZNdNdO ZOdPdQ ZPdRdS ZQdTdU ZRdVdW ZSej=>d)d*d+d,gdXdY ZTdZd[ ZUd\d] ZVej=>d)d*d+d,gej=>d^d_d`dgfdad`dgfdbddgfgdcdd ZWdedf ZXdgdh ZYdidj ZZdkdl Z[dS )mz=
Several basic tests for hierarchical clustering procedures

    N)mkdtemp)partial)sparse)	hierarchy)connected_components)adjusted_rand_score)METRICS_DEFAULT_PARAMS)assert_almost_equalcreate_memmap_backed_data)assert_array_almost_equal)ignore_warnings)	ward_tree)AgglomerativeClusteringFeatureAgglomeration)_hc_cut_TREE_BUILDERSlinkage_tree_fix_connectivity)grid_to_graph)DistanceMetric)PAIRED_DISTANCEScosine_distancesmanhattan_distancespairwise_distances)normalized_mutual_info_score)kneighbors_graph)average_merge	max_mergemst_linkage_core)IntFloatDict)assert_array_equal)
make_moonsmake_circlesc               	   C   s   t jd} | jdd}tt t|dd W 5 Q R X tt t|t dd W 5 Q R X t	 
| t|}t|dd	}t|d
 t|dd	d
  t|td	}t|d
 t|dd	d
  d S )N*   )   r$   sizeZfoo)linkage   r)   connectivityprecomputedaffinityr   cosine	manhattan)nprandomRandomStatenormalpytestraises
ValueErrorr   onesr   fitr   r    r   )rngXdisres r>   K/tmp/pip-unpacked-wheel-zrfo1fqw/sklearn/cluster/tests/test_hierarchical.pytest_linkage_misc5   s    r@   c            
   
   C   s   t jd} t jddgtd}d|ddddf< | dd}t|j }t	 D ]}||j
|d\}}}}d	|jd
  d
 }	t|| |	ksttt ||j
t dd W 5 Q R X tt ||j
d d |d W 5 Q R X qPd S )Nr   
   Zdtyper)      2   d   r*         r(   )r1   r2   r3   r8   boolrandnr   shaper   valuesTlenAssertionErrorr5   r6   r7   )
r:   maskr;   r+   tree_builderchildrenn_componentsn_leavesparentn_nodesr>   r>   r?   test_structured_linkage_treeN   s     
 rV   c                  C   s  t jd} | dd}||d fD ]h}t 2 tt t|j	dd\}}}}W 5 Q R X W 5 Q R X d|j
d  d }t|| |ks$tq$t D ]z}||d fD ]h}t 2 tt ||j	dd\}}}}W 5 Q R X W 5 Q R X d|j
d  d }t|| |kstqqd S )Nr   rD   rE   rA   )
n_clustersrF   rG   )r1   r2   r3   rI   r   r5   warnsUserWarningr   rL   rJ   rM   rN   r   rK   )r:   r;   Zthis_XrQ   rU   rS   rT   rP   r>   r>   r?   test_unstructured_linkage_treee   s$    * "rZ   c            	      C   s   t jd} t jddgtd}| dd}t|j }t	 D ]@}||j
|d\}}}}d|jd  d }t|| |ks<tq<d S )	Nr   rA   rB   rD   rE   r*   rF   rG   )r1   r2   r3   r8   rH   rI   r   rJ   r   rK   rL   rM   rN   )	r:   rO   r;   r+   linkage_funcrQ   rU   rS   rT   r>   r>   r?   test_height_linkage_tree}   s    
 r\   c               	   C   sD   t ddgddgg} d}tjt|d t| dd W 5 Q R X d S )Nr   rG   z;Cosine affinity cannot be used when X contains zero vectorsmatchr/   r-   )r1   arrayr5   r6   r7   r   )r;   msgr>   r>   r?   test_zero_cosine_linkage_tree   s    ra   zn_clusters, distance_threshold)N      ?)rA   Ncompute_distancesTFr'   wardcompleteaveragesinglec                 C   s   t jd}t jddgtd}d}||d}t|j }t| ||||d}	|		| |s`|d k	rt
|	dsnt|	jjd }
|
d }|	jj|d fkstnt
|	drtd S )	Nr   rA   rB   rE   rD   )rW   r+   r'   distance_thresholdrc   
distances_rG   )r1   r2   r3   r8   rH   rI   r   rJ   r   r9   hasattrrN   	children_ri   )rW   rc   rh   r'   r:   rO   	n_samplesr;   r+   
clusteringZ
n_childrenrU   r>   r>   r?   'test_agglomerative_clustering_distances   s&    

rn   c              
   C   s2  t j| }t jddgtd}d}||d}t|j }dD ]}td||d}|	| zBt }td|||d}|	| |j}	t t |	dkstW 5 t
| X td||d}d|_|	| tt|j|	d	 d |_|	| t t |jdksttdt| d dd df |d}tt |	| W 5 Q R X q<td| d
dd}tt |	| W 5 Q R X t D ]X}
tdt ||f|
dd}|	| tdd |
dd}|	| tt|j|jd	 qtd|dd}|	| t|}td|ddd}|	| t|j|j d S )NrA   rB   rE   rD   )rd   re   rf   rg   rW   r+   r'   )rW   r+   Zmemoryr'   FrG   r0   rd   )rW   r+   metricr'   re   r,   )r1   r2   r3   r8   rH   rI   r   rJ   r   r9   shutilrmtreer   labels_r&   uniquerN   compute_full_treer	   r   r+   r   Z
lil_matrixZtoarrayr5   r6   r7   r   keysr   r    )global_random_seedr:   rO   rl   r;   r+   r'   rm   tempdirlabelsrp   Zclustering2X_distr>   r>   r?   test_agglomerative_clustering   s    

  

  


   
   

r{   c                  C   s2   t jd} t| dd}tddd| dS )zhAgglomerativeClustering must work on mem-mapped dataset.

    Non-regression test for issue #19875.
    r   rD   rE   	euclideanrg   rp   r'   N)r1   r2   r3   r
   rI   r   r9   )r:   Xmmr>   r>   r?   +test_agglomerative_clustering_memory_mapped  s    r   c              	   C   s   t j| }t jddgtd}|dd}t|j }td|d}|	| t 
t |jdksdt||}|jd dkst||}t |d j
dkstt||| tt |	|d d  W 5 Q R X d S )	NrA   rB   rD   rE   r$   rW   r+   rG   r   )r1   r2   r3   r8   rH   rI   r   rJ   r   r9   r&   rt   rs   rN   Z	transformZinverse_transformr   r5   r6   r7   )rw   r:   rO   r;   r+   ZaggloZX_redZX_fullr>   r>   r?   test_ward_agglomeration  s    



r   c                  C   sv   t ddd\} }tddd}||  tt|j|d tdd	dd
\}}tddd}|| tt|j|d d S )Ng?r#   )noiserandom_staterF   rg   )rW   r'   rG   rb   g?)Zfactorr   r   )r!   r   r9   r	   r   rs   r"   )ZmoonsZmoon_labelsrm   ZcirclesZcircle_labelsr>   r>   r?   test_single_linkage_clustering0  s    

 

 r   c                 C   sv   g }| |fD ]L}t |}| d }t||f}d|t||f< |t||j q|d |d k srt	dS )zUtil for comparison with scipyrG   r   N)
rM   maxr1   zerosarangeappenddotrL   allrN   )Zcut1Zcut2Zco_clustcutnkZecutr>   r>   r?   assess_same_labellingA  s    r   c              	   C   s>  d\}}}t j| }t ||f}t D ]}tdD ]}d|j||fd }|dt |d d t j	f  8 }||j
ddd d t j	f 8 }tj||d}	|	d d d d	f jtd
d}
t| ||d\}}}}|jdd t||
d|  t|||}t||
|}t|| q8q,tt t|d || W 5 Q R X d S )NrA   r$      r$   皙?r%         @rG   ZaxismethodrF   Fcopyr*   z2linkage tree differs from scipy impl for linkage: )r1   r2   r3   r8   r   rv   ranger4   r   newaxismeanr   r'   astypeintsortr    r   r   r5   r6   r7   )rw   r   pr   r:   r+   r'   ir;   outrk   rQ   _rS   r   Zcut_r>   r>   r?   test_sparse_scikit_vs_scipyM  s2    
  r   c                 C   s   d\}}}t j| }d|j||fd }|dt |d d t jf  8 }||jddd d t jf 8 }tj|dd}|d d d d	f 	t
}td |\}}	}
}	|jdd t||d
 t|||
}t|||
}t|| d S )Nr   r   r%   r   rG   r   rg   r   rF   z8linkage tree differs from scipy impl for single linkage.)r1   r2   r3   r4   r   r   r   r   r'   r   r   r   r   r    r   r   )rw   rl   
n_featuresrW   r:   r;   r   Zchildren_scipyrQ   r   rS   r   Z	cut_scipyr>   r>   r?   )test_vector_scikit_single_vs_scipy_singleu  s"    
 r   z/ignore:WMinkowskiDistance:FutureWarning:sklearnmetric_param_gridc                 C   s   t jjdd}|jdd}t|}| \}}| }tj|  D ]B}t	t
||}tj|f|}	t||	}
t||	}t j|
| q@dS )zoThe MST-LINKAGE-CORE algorithm must work on mem-mapped dataset.

    Non-regression test for issue #19875.
    rG   )seed)   r)   r%   N)r1   r2   r3   r4   r
   rv   	itertoolsproductrK   dictzipr   Z
get_metricr   ZtestingZassert_equal)r   r:   r;   r~   rp   Z
param_gridrv   valskwargsZdistance_metricZmstZmst_mmr>   r>   r?   #test_mst_linkage_core_memory_mapped  s    

r   c               
   C   s   t dddgdddgdddgdddgdddgdddgg} t ddddddg}t| ddd}d||j  }t| |d\}}d	D ].}td||d
}||  tt|j	|d qd S )Nr   rG   rF   r   FZn_neighborsinclude_selfrb   r|   )rg   rf   rf   rd   )rW   r'   r+   )
r1   r_   r   rL   r   r   r9   r	   r   rs   )r;   Ztrue_labelsr+   rR   r'   rm   r>   r>   r?   test_identical_points  s     :  

 r   c                  C   sR   t dddddddddddddddg} t| d	d
d}td|dd}||  d S )N)y&1?gQ?)r   gMbX?)r   gEԸ?)g rh?/$?);On?r   )r   g~jt?)r   gOn?)r   g;On?rA   Fr   r)   rd   ro   )r1   r_   r   r   r9   )r;   r+   rd   r>   r>   r?   test_connectivity_propagation  s2      r   c           	      C   s   d\}}t j| }t ||f}tdD ]|}d|j||fd }|dt |d d t jf  8 }||jddd d t jf 8 }t	|}t	||d}t
|d	 |d	  q*d S )
NrA   r$   r$   r   r%   r   rG   r   r*   r   )r1   r2   r3   r8   r   r4   r   r   r   r   r    )	rw   r   r   r:   r+   r   r;   out_unstructuredout_structuredr>   r>   r?   test_ward_tree_children_order  s     r   c              
   C   sx  d\}}t j| }t ||f}tdD ]}d|j||fd }|dt |d d t jf  8 }||jddd d t jf 8 }t	|dd	}t	||dd
}|d }	|d }
t
|	|
 |d }|d }t|| dD ]^}t|||ddd }t||ddd }|d }|d }|d }|d }t|| t|| qq*t ddgddgddgddgddgddgg}t ddddgdd d!dgd"dd#dgd$d%d&dgd'd(d)d$gg}t ddddgdd d!dgd"dd#dgd$d%d*dgd'd(d+d$gg}t ddddgdd d!dgd"dd#dgd$d%d,dgd'd(d-d$gg}t |\}}t ||f}t	|dd	}t	||dd
}t
|d d d d.f |d  t
|d d d d.f |d  t|d d d.f |d/  t|d d d.f |d/  d0d1d2g}||g}t||D ]\}}t|d|d3}t|||dd}t
|d d d d.f |d  t
|d d d d.f |d  t|d d d.f |d/  t|d d d.f |d/  qd S )4Nr   r$   r   r%   r   rG   r   T)return_distance)r+   r   r   )rf   re   rg   )r+   r'   r   )r'   r   gя?geGgw7@g})J@gZ!E@gn]#g!܄@g,8g!Yz @gRա&<agڎF@gT!@g      @g0rq5?g       @      ?g      @gAVJS?g        gL/u@g      @g       @g6SHD4"@g      @g      "@gwʴG8@gwfۣ@g63C2@go;@g_ .@rF   r)   re   rf   rg   )r   r'   )r1   r2   r3   r8   r   r4   r   r   r   r   r    r   r   r_   rJ   r   )rw   r   r   r:   r+   r   r;   r   r   Zchildren_unstructuredZchildren_structuredZdist_unstructuredZdist_structuredr'   Zstructured_itemsZunstructured_itemsZstructured_distZunstructured_distZstructured_childrenZunstructured_childrenZlinkage_X_wardZlinkage_X_completeZlinkage_X_averagerl   r   Zconnectivity_XZout_X_unstructuredZout_X_structuredZlinkage_optionsZX_linkage_truthZX_truthr>   r>   r?   &test_ward_linkage_tree_return_distance  s     

   



















   r   c               	   C   sj   t ddgddgg} t ddgddgg}tdd|d}t|dd}tt ||  W 5 Q R X d S )	Nr   rG   TFrF   )n_xn_yrO   rd   r+   r'   )r1   r_   r   r   r5   rX   rY   r9   )xmcwr>   r>   r?    test_connectivity_fixing_non_lil`  s    r   c            	      C   s   t jd} t | jdddjt jdd}| t|}t	||}t
||D ]\}}|| |ksNtqNt jdt jdd d d	 }t dd
d d d	 }t	||}t||t jdt jdddd t||t jdt jdddd d S )Nr   rE   rA   r%   Fr   rD   rB   rF   rb   rG   )rO   Zn_aZn_b)r1   r2   r3   rt   randintr   ZintprandrM   r   r   rN   r   fullr   r8   r   )	r:   rv   rK   dkeyvalueZ
other_keysZother_valuesotherr>   r>   r?   test_int_float_dictm  s     

r   c                  C   sj   t jd} | dd}t|ddd}t|d}tttdddd}|| || t|j	|j	 d S )	Nr   r   r$   r   Fr   r*   r   )
r1   r2   r3   r   r   r   r   r9   r    rs   )r:   r;   r+   aglc1aglc2r>   r>   r?   test_connectivity_callable~  s    


r   c                  C   sn   t jd} | dd}t|ddd}t|ddd}t|d}t|d}|| || t|j|j d S )	Nr   r   r$   r   Fr   Tr*   )	r1   r2   r3   r   r   r   r9   r    rs   )r:   r;   r+   Zconnectivity_include_selfr   r   r>   r>   r?   "test_connectivity_ignores_diagonal  s    



r   c                  C   s   t jd} | dd}t|ddd}td|d}|| |jd }|jjd }||d ksbt	d	}| d
d}t|ddd}t||d}|| |jd }|jjd }||| kst	d S )Nr   rA   rF   r$   Fr   r   rG   e      )
r1   r2   r3   rI   r   r   r9   rJ   rk   rN   )r:   r;   r+   Zagcrl   rU   rW   r>   r>   r?   test_compute_full_tree  s     



r   c                  C   sP   t jd} | dd}t d}t D ] }t|||dd dks*tq*d S )Nr   r$   r*   rG   )	r1   r2   r3   r   Zeyer   rK   r   rN   )r:   r;   r+   r[   r>   r>   r?   test_n_components  s
    
r   c                  C   sv   d} t jd}|| | }t ddddg}t| | |t jd}G dd d}| }t|||jd |j	d	ksrt
d S )
NrF   r   TF)r   r   rO   Z	return_asc                   @   s   e Zd Zdd Zdd ZdS )z>test_affinity_passed_to_fix_connectivity.<locals>.FakeAffinityc                 S   s
   d| _ d S )Nr   counter)selfr>   r>   r?   __init__  s    zGtest_affinity_passed_to_fix_connectivity.<locals>.FakeAffinity.__init__c                 _   s   |  j d7  _ | j S )NrG   r   )r   argsr   r>   r>   r?   	increment  s    zHtest_affinity_passed_to_fix_connectivity.<locals>.FakeAffinity.incrementN)__name__
__module____qualname__r   r   r>   r>   r>   r?   FakeAffinity  s   r   )r+   r.   r   )r1   r2   r3   rI   r_   r   Zndarrayr   r   r   rN   )r&   r:   r;   rO   r+   r   far>   r>   r?   (test_affinity_passed_to_fix_connectivity  s    r   c                 C   s   t j|}t jddgtd}d}||d}t|j }d}d |fD ]}td ||| d}	|		| |	j
}
tt |	j
}t|  }|||d dd\}}}}}t ||kd }||kstt|||d	}t |
|sDtqDd S )
NrA   rB   rE   rD   )rW   rh   r+   r'   T)r+   rW   r   rG   )rW   rQ   rS   )r1   r2   r3   r8   rH   rI   r   rJ   r   r9   rs   rM   rt   r   Zcount_nonzerorN   r   Zarray_equiv)r'   rw   r:   rO   rl   r;   r+   rh   connrm   Zclusters_producedZnum_clusters_producedrP   rQ   rR   rS   rT   Z	distancesZnum_clusters_at_thresholdZclusters_at_thresholdr>   r>   r?   5test_agglomerative_clustering_with_distance_threshold  s@    

     r   c                 C   sx   t j| }d}|jdd|dfd}td ddd|}t|d	d
d}t |t j t 	|dksft
|j|kstt
d S )NrA   ii,  r   r%   r   rg   rW   rh   r'   	minkowskirF   rp   r   r   )r1   r2   r3   r   r   r9   r   fill_diagonalinfr   rN   Zn_clusters_)rw   r:   rl   r;   rm   Zall_distancesr>   r>   r?   test_small_distance_threshold  s      r   c                 C   s   t j| }d}|jdd|dfd}d}td |dd|}|j}t|d	d
d}t |t j	 t 
|D ]r}||k}	||	 d d |	f jdd }
||	 d d |	 f jdd }|	 dkr|
|k st||kshtqhd S )NrE   irA   r   r%   r)   rg   r   r   rF   r   r   r   rG   )r1   r2   r3   r   r   r9   rs   r   r   r   rt   minr   sumrN   )rw   r:   rl   r;   rh   rm   ry   DlabelZin_cluster_maskZmax_in_cluster_distanceZmin_out_cluster_distancer>   r>   r?   .test_cluster_distances_with_distance_threshold  s.       r   )	thresholdy_truerb   rG   r   g      ?c                 C   s:   dgdgg}t d || d}||}t||dks6td S )Nr   rG   r   )r   fit_predictr   rN   )r'   r   r   r;   	clustererZy_predr>   r>   r?   ?test_agglomerative_clustering_with_distance_threshold_edge_case*  s      
r   c               	   C   s   dgdgg} t jtdd td d d|  W 5 Q R X t jtdd tddd|  W 5 Q R X dgdgg} t jtdd td ddd	|  W 5 Q R X d S )
Nr   rG   zExactly one of r]   )rW   rh   rF   z!compute_full_tree must be True ifF)rW   rh   ru   )r5   r6   r7   r   r9   )r;   r>   r>   r?   &test_dist_threshold_invalid_parameters:  s      r   c               	   C   sH   t jd} | dd}tjtdd tddd| W 5 Q R X d S )	Nr   r$   r   z>Distance matrix should be square, got matrix of shape \(5, 3\)r]   r,   re   r}   )	r1   r2   r3   r   r5   r6   r7   r   r9   )r:   r;   r>   r>   r?   *test_invalid_shape_precomputed_dist_matrixI  s    r   c                  C   s   t dddddgdddddgdddddgdddddgdddddgg} t| d dksZtt jd}|dd}t|}td| dd}d	}t	j
t|d
 || W 5 Q R X t| dd}t	j
t|d
 || W 5 Q R X t|j|j t|j|j dS )zCheck that connecting components works when connectivity and
    affinity are both precomputed and the number of connected components is
    greater than 1. Non-regression test for #16151.
    r   rG   rF   r$   rA   r,   re   )r.   r+   r'   z.Completing it to avoid stopping the tree earlyr]   r   N)r1   r_   r   rN   r2   r3   rI   r   r   r5   rX   rY   r9   r    rs   rk   )Zconnectivity_matrixr:   r;   rz   Zclusterer_precomputedr`   r   r>   r>   r?   Btest_precomputed_connectivity_affinity_with_2_connected_componentsU  s8    
   r   c               	   C   s   t jd} | dd}tdd}d}tjt|d || W 5 Q R X tjt|d |	| W 5 Q R X tddd}d	}tj
t|d || W 5 Q R X tj
t|d |	| W 5 Q R X d S )
Nr#   rD   rA   r|   r-   zcAttribute `affinity` was deprecated in version 1.2 and will be removed in 1.4. Use `metric` insteadr]   )rp   r.   z;Both `affinity` and `metric` attributes were set. Attribute)r1   r2   r3   rI   r   r5   rX   FutureWarningr9   r   r6   r7   )r:   r;   afr`   r>   r>   r?   test_deprecate_affinity}  s    
r   )\__doc__r   tempfiler   rq   r5   	functoolsr   Znumpyr1   Zscipyr   Zscipy.clusterr   Zscipy.sparse.csgraphr   Zsklearn.metrics.clusterr   Z'sklearn.metrics.tests.test_dist_metricsr   Zsklearn.utils._testingr	   r
   r   r   Zsklearn.clusterr   r   r   Zsklearn.cluster._agglomerativer   r   r   r   Z sklearn.feature_extraction.imager   Zsklearn.metricsr   Zsklearn.metrics.pairwiser   r   r   r   r   Zsklearn.neighborsr   Z"sklearn.cluster._hierarchical_fastr   r   r   Zsklearn.utils._fast_dictr   r    Zsklearn.datasetsr!   r"   r@   rV   rZ   r\   ra   markZparametrizern   r{   r   r   r   r   r   r   filterwarningsr   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r>   r>   r>   r?   <module>   s   	^
(
v
&  (