U
    3‰d‰^  ã                
   @   sÆ  d dl Zd dlZd dlmZ d dlZd dlZd dlm	Z	 d dl
mZ d dlmZ d dlmZ d dlmZ d dlmZ e ¡ Zd	d
ddgZej de¡ej dedejjd ƒ¡dd„ ƒƒZdd„ Zej dddg¡ej de¡dd„ ƒƒZej dd
dg¡dd„ ƒZejjdej d ¡  dd¡ej!dddd d d  gd!d"gd#ej de¡d$d%„ ƒƒZ"ej dd
dg¡d&d'„ ƒZ#ej de¡d(d)„ ƒZ$ej de¡d*d+„ ƒZ%ej de¡d,d-„ ƒZ&ej dd	d
dg¡ej d.ddg¡d/d0„ ƒƒZ'ej d1e (d dd gdd d gg¡e (d dd gdd d gg¡j)g¡ej d2d3d4d5d6g¡d7d8„ ƒƒZ*ej d9d	e+ejjƒfd
e+ejjƒd fde+ejjƒfg¡ej d1ejejj)g¡d:d;„ ƒƒZ,ej ddd	g¡d<d=„ ƒZ-ej dd
dg¡d>d?„ ƒZ.d@dA„ Z/dBdC„ Z0dDdE„ Z1dFdG„ Z2ej dHejdIdJfejdKdfej d ¡ 3dLdM¡dNdJfg¡dOdP„ ƒZ4ej de¡dQdR„ ƒZ5dSdT„ Z6ej de¡dUdV„ ƒZ7ej dd
dg¡dWdX„ ƒZ8ej dd	dg¡dYdZ„ ƒZ9ej d[ej d ¡j:d\d]dNd	fej d ¡j:d^d]dLd	fej d ¡j:d\d]d_d	fej d ¡j:d\d]d`dfg¡dadb„ ƒZ;ej de¡dcdd„ ƒZ<ej de¡dedf„ ƒZ=ej de¡dgdh„ ƒZ>didj„ Z?dkdl„ Z@dmdn„ ZAdodp„ ZBdqdr„ ZCdsdt„ ZDdudv„ ZEdwdx„ ZFdydz„ ZGd{d|„ ZHd}d~„ ZIej dddg¡dd€„ ƒZJdS )é    N)Úassert_array_equal)Úassert_allclose)Údatasets)ÚPCA)Ú	load_iris©Ú_assess_dimension)Ú_infer_dimensionÚfullÚarpackÚ
randomizedÚautoÚ
svd_solverÚn_componentsé   c                 C   s’   t j}t|| d}| |¡ |¡}|jd |ks4t‚| |¡}t||ƒ | |¡}t||ƒ | 	¡ }| 
¡ }tt ||¡t |jd ¡dd d S )N©r   r   r   çê-™—q=©Zatol)ÚirisÚdatar   ÚfitÚ	transformÚshapeÚAssertionErrorÚfit_transformr   Zget_covarianceZget_precisionÚnpÚdotZeye)r   r   ÚXÚpcaZX_rZX_r2ÚcovZ	precision© r    úH/tmp/pip-unpacked-wheel-zrfo1fqw/sklearn/decomposition/tests/test_pca.pyÚtest_pca   s    



r"   c               	   C   sZ   d} | d }t jjdd| |fd}t| d}t ¡  t dt¡ | |¡ W 5 Q R X d S )Né
   é   éÿÿÿÿr   ©Úsize©r   Úerror)	r   ÚrandomÚuniformr   ÚwarningsÚcatch_warningsÚsimplefilterÚRuntimeWarningr   )r   Ú
n_featuresr   r   r    r    r!   Útest_no_empty_slice_warning*   s    

r1   ÚcopyTFÚsolverc                 C   sˆ  t j d¡}d}d}d}d}t  | ||¡t  t  t  dd|¡¡| ||¡¡¡}|d d …d d…f  d9  < |j||fks~t‚|j	dd	 	¡ d
ks–t‚| 
¡ }t|d|| ddd}	|	 | 
¡ ¡}
|
j||fksÒt‚|	 |¡}t|
|dd t|
j	dddt  |¡ƒ t|
jdd	t  |¡dd | 
¡ }t|d|| d | 
¡ ¡}	|	 |¡}|j||fks`t‚|j	dd	 	¡ tjdddks„t‚d S )Nr   éd   éP   é   é2   g      $@ç      ð?é   ©ÚaxisgfffffæE@Té   )r   Úwhitenr2   r   Úrandom_stateZiterated_powergü©ñÒMb@?©Zrtolr   ©Zddofr;   r   r   F)r   r=   r2   r   gfffff†R@çš™™™™™¹?)Úrel)r   r*   ÚRandomStater   ÚrandnZdiagZlinspacer   r   Ústdr2   r   r   r   r   ÚonesÚmeanÚzerosr   ÚpytestÚapprox)r3   r2   ÚrngÚ	n_samplesr0   r   Úrankr   ZX_r   Z
X_whitenedZX_whitened2ZX_unwhitenedr    r    r!   Útest_whitening5   sN    
"þú	
   ÿþ
rN   c                 C   sv   t j d¡}d\}}| ||¡}tddd}td| dd}| |¡ | |¡ t|j|jdd t|j|jdd d S )	Nr   ©r4   r5   r$   r
   r   ©r   r   r>   çš™™™™™©?r?   )	r   r*   rC   rD   r   r   r   Úexplained_variance_Úexplained_variance_ratio_©r   rK   rL   r0   r   Úpca_fullÚ	pca_otherr    r    r!   Ú.test_pca_explained_variance_equivalence_solverl   s"    

  ÿýrW   r   r4   r5   éN   )Ún_informativer>   zrandom-datazcorrelated-data)Úidsc                 C   sr   t d|dd}| | ¡}t|jtj|dddƒ tj tj| dd¡d }t	|dd	d d… }t|j|d
d d S )Nr$   r   rP   r   r@   F)ZrowvarT)Úreverseç{®Gázt?r?   )
r   r   r   rR   r   ÚvarÚlinalgZeigr   Úsorted)r   r   r   ZX_pcaZexpected_resultr    r    r!   Ú%test_pca_explained_variance_empirical‚   s    

r`   c                 C   sf   t j d¡}d\}}| ||¡}tdd|d}td| |d}| |¡ | |¡ t|j|jdd d S )Nr   rO   r$   r
   rP   r\   r?   )r   r*   rC   rD   r   r   r   Úsingular_values_rT   r    r    r!   Ú$test_pca_singular_values_consistency•   s    

rb   c                 C   s"  t j d¡}d\}}| ||¡}td| |d}| |¡}tt  |jd ¡t j	 
|d¡d ƒ t|jt  t j|d dd¡ƒ d\}}| ||¡}td| |d}| |¡}|t  t j|d dd¡ }|d d …df  d	9  < |d d …d
f  d9  < t  ||j¡}| |¡ t|jd	ddgƒ d S )Nr   rO   r$   rP   Zfror:   )r4   én   r9   g‰A`åÐ"	@r   gX9´Èv¾@r8   )r   r*   rC   rD   r   r   r   Úsumra   r^   ZnormÚsqrtr   Úcomponents_r   )r   rK   rL   r0   r   r   ÚX_transZX_hatr    r    r!   Útest_pca_singular_values¤   s(    
 ÿ 

rh   c                 C   s²   t j d¡}d\}}| ||¡d }|d d…  t  dddg¡7  < d| d|¡ t  dddg¡ }td	| d
 |¡ |¡}|t  |d	  	¡ ¡ }t
t  |d d ¡ddd d S )Nr   ©r4   r9   rA   r#   r9   é   é   r   r$   r   r8   r\   r?   )r   r*   rC   rD   Úarrayr   r   r   re   rd   r   Úabs)r   rK   ÚnÚpr   ÚXtZYtr    r    r!   Útest_pca_check_projectionÂ   s      rq   c                 C   s^   ddgddgg}t d| dd}| |¡}|js6tdƒ‚t| ¡ ddd t| ¡ d	d
d d S )Nr8   g        r   r   rP   )r$   r   r   r   g¸…ëQ¸æ?r\   r?   )r   r   r   r   r   rG   rE   )r   r   r   rg   r    r    r!   Útest_pca_check_projection_listÑ   s    
rr   r=   c           	      C   s€   t j d¡}d\}}| ||¡}|d d …df  d9  < |dddg7 }td| |d	 |¡}| |¡}| |¡}t||d
d d S )Nr   )r7   r9   r   gñhãˆµøä>rk   rj   r9   r$   )r   r   r=   çñhãˆµøÔ>r?   )	r   r*   rC   rD   r   r   r   Zinverse_transformr   )	r   r=   rK   rn   ro   r   r   ÚYZ	Y_inverser    r    r!   Útest_pca_inverseÜ   s    

ru   r   z!svd_solver, n_components, err_msg)r   r   ú2must be between 1 and min\(n_samples, n_features\))r   r   rv   )r   r$   zmust be strictly less than min)r   r9   zZn_components=3 must be between 0 and min\(n_samples, n_features\)=2 with svd_solver='full'c              	   C   s|   d}t || d}tjt|d | |¡ W 5 Q R X | dkrx|}d ||¡}tjt|d t || d |¡ W 5 Q R X d S )Nr$   ©r   ©Úmatchr   zgn_components={}L? must be strictly less than min\(n_samples, n_features\)={}L? with svd_solver='arpack')r   rI   ÚraisesÚ
ValueErrorr   Úformat)r   r   r   Úerr_msgZ
smallest_dZ
pca_fittedr    r    r!   Útest_pca_validationî   s     þÿr~   zsolver, n_components_c                 C   s&   t |d}| | ¡ |j|ks"t‚d S )Nrw   )r   r   Ún_components_r   )r   r3   r   r   r    r    r!   Útest_n_components_none  s    


r€   c                 C   sH   t j d¡}d\}}| ||¡}td| d}| |¡ |jdksDt‚d S )Nr   ©iX  r#   Úmler   r   )r   r*   rC   rD   r   r   r   r   )r   rK   rL   r0   r   r   r    r    r!   Útest_n_components_mle&  s    
rƒ   c              	   C   s^   t j d¡}d\}}| ||¡}td| d}d | ¡}tjt|d | 	|¡ W 5 Q R X d S )Nr   r   r‚   r   z:n_components='mle' cannot be a string with svd_solver='{}'rx   )
r   r*   rC   rD   r   r|   rI   rz   r{   r   )r   rK   rL   r0   r   r   r}   r    r    r!   Útest_n_components_mle_error1  s    ÿr„   c               
   C   sz   t j d¡} d\}}|  ||¡d }|d d…  t  ddddd	g¡7  < td
dd |¡}|jd
ksht‚|j	dksvt‚d S )Nr   ©r4   rk   rA   r#   r9   rj   rk   r   r$   r‚   r
   r   )
r   r*   rC   rD   rl   r   r   r   r   r   )rK   rn   ro   r   r   r    r    r!   Útest_pca_dim@  s    $r†   c               	      s´   d\‰ } t j d¡}| ˆ | ¡d | ˆ d¡t  dddddg¡  t  ddd	dd
g¡ }t| dd}| |¡ |j‰t  ‡ ‡fdd„td| ƒD ƒ¡}|d | 	¡ dˆ   ks°t
‚d S )N©éè  rk   r   rA   r   r9   rj   rk   r$   r<   é   r
   r   c                    s   g | ]}t ˆ|ˆ ƒ‘qS r    r   )Ú.0Úk©rn   Úspectr    r!   Ú
<listcomp>X  s     z$test_infer_dim_1.<locals>.<listcomp>ç{®Gáz„?)r   r*   rC   rD   rl   r   r   rR   ÚrangeÚmaxr   )ro   rK   r   r   Úllr    rŒ   r!   Útest_infer_dim_1K  s    ÿþÿ
 r“   c               
   C   sž   d\} }t j d¡}| | |¡d }|d d…  t  ddddd	g¡7  < |dd
…  t  dddd	dg¡7  < t|dd}| |¡ |j}t|| ƒdksšt	‚d S )Nr‡   r   rA   r#   r9   rj   rk   r   r$   é   r‰   r<   r%   r
   r   ©
r   r*   rC   rD   rl   r   r   rR   r	   r   ©rn   ro   rK   r   r   r   r    r    r!   Útest_infer_dim_2\  s    $$
r—   c                  C   sÆ   d\} }t j d¡}| | |¡d }|d d…  t  ddddd	g¡7  < |dd
…  t  dddd	dg¡7  < |dd…  d	t  dddddg¡ 7  < t|dd}| |¡ |j}t|| ƒd	ksÂt	‚d S )Nr…   r   rA   r#   r9   rj   rk   r   r$   r”   r‰   r<   r%   r6   é(   r
   r   r•   r–   r    r    r!   Útest_infer_dim_3j  s    $$(
r™   z'X, n_components, n_components_validatedgffffffî?r$   r   rk   r”   g      à?c                 C   s<   t |dd}| | ¡ |jt |¡ks*t‚|j|ks8t‚d S )Nr
   r   )r   r   r   rI   rJ   r   r   )r   r   Zn_components_validatedr   r    r    r!   Ú$test_infer_dim_by_explained_variancew  s    	
rš   c           	      C   sð   d\}}t j d¡}| ||¡d t  dddg¡ }td| d}| |¡ | |¡}d	t  dt j	 t  
d
¡ d ¡ | }t|| d
dd | | ||¡d t  dddg¡ ¡}||ks¾t‚tdd| d}| |¡ | |¡}||ksìt‚d S )N)rˆ   r9   r   rA   r9   rj   rk   r$   r   g      à¿r   g|®Gáz„?rQ   r?   gš™™™™™É?T)r   r=   r   )r   r*   rC   rD   rl   r   r   ÚscoreÚlogÚpiÚexpr   r   )	r   rn   ro   rK   r   r   Zll1ÚhZll2r    r    r!   Útest_pca_score†  s     

&&

r    c                  C   sÔ   d\} }t j d¡}| | |¡| | d¡t  dddg¡  t  dddg¡ }| | |¡| | d¡t  dddg¡  t  dddg¡ }t  |¡}t|ƒD ](}t|dd	}| |¡ | 	|¡||< q–| 
¡ dksÐt‚d S )
N)éÈ   r9   r   r   r9   rj   rk   r<   r
   r   )r   r*   rC   rD   rl   rH   r   r   r   r›   Zargmaxr   )rn   ro   rK   ZXlrp   r’   r‹   r   r    r    r!   Útest_pca_score3œ  s    88

r¢   c                 C   sF   t jdd\}}td| dd}| |¡ t |j|j dk¡sBt‚d S )NT©Z
return_X_yr6   r   rP   )	r   Úload_digitsr   r   r   ÚallrR   Únoise_variance_r   )r   r   Ú_r   r    r    r!   Útest_pca_sanity_noise_variance«  s    
r¨   c                 C   s^   t jdd\}}tdddd}td| dd}| |¡ | |¡ t| |¡| |¡dd d S )	NTr£   r6   r
   r   rP   rs   r?   )r   r¤   r   r   r   r›   )r   r   r§   rU   rV   r    r    r!   Ú"test_pca_score_consistency_solvers·  s    

r©   c                 C   sŒ   d\}}t j d¡}| ||¡d t  dddg¡ }t|| d}| |¡ |jdksXt‚| 	|¡ | |j
¡ |jdks|t‚| 	|j
¡ d S )Nri   r   rA   r9   rj   rk   r   )r   r*   rC   rD   rl   r   r   r¦   r   r›   ÚT)r   rn   ro   rK   r   r   r    r    r!   Ú'test_pca_zero_noise_variance_edge_casesÃ  s     

r«   z#data, n_components, expected_solver)rˆ   r7   r&   )r#   r7   r7   r#   c                 C   s@   t |dd}t ||dd}| | ¡ | | ¡ t|j|jƒ d S )Nr   )r   r>   rP   )r   r   r   rf   )r   r   Zexpected_solverZpca_autoZpca_testr    r    r!   Útest_pca_svd_solver_autoÙ  s      ÿ

r¬   c              	   C   s`   t j d¡ dd¡}tj |¡}tj |¡s0t‚t	d| d}t
 t¡ | |¡ W 5 Q R X d S )Nr   rk   rj   r9   r   )r   r*   rC   ÚrandÚspÚsparseZ
csr_matrixÚissparser   r   rI   rz   Ú	TypeErrorr   )r   r   r   r    r    r!   Útest_pca_sparse_inputï  s    r²   c                 C   s‚   t j d¡}| dd¡}t  d¡}tdƒD ],}td| |d}| |¡d ||d d …f< q*t|t  	|dd d …f d¡ 
dd¡ƒ d S )Nr   r#   )r”   r$   r”   r$   rP   )r   r*   rC   r­   rH   r   r   r   r   ZtileZreshape)r   rK   r   Ztransformed_XÚir   r    r    r!   Útest_pca_deterministic_outputú  s    
r´   c                 C   s   t | ƒ t| ƒ d S )N)Ú"check_pca_float_dtype_preservationÚ$check_pca_int_dtype_upcast_to_doublerw   r    r    r!   Útest_pca_dtype_preservation  s    r·   c                 C   sº   t j d¡ dd¡jt jdd}| t j¡}td| dd |¡}td| dd |¡}|j	j
t jksft‚|j	j
t jksxt‚| |¡j
t jksŽt‚| |¡j
t jks¤t‚t|j	|j	dd	 d S )
Nr   rˆ   rj   F©r2   r9   rP   g-Cëâ6*?r?   )r   r*   rC   r­   ÚastypeÚfloat64Zfloat32r   r   rf   Údtyper   r   r   )r   ZX_64ZX_32Úpca_64Úpca_32r    r    r!   rµ     s     rµ   c                 C   sÄ   t j d¡ ddd¡}|jt jdd}|jt jdd}td| dd |¡}td| dd |¡}|j	j
t jkspt‚|j	j
t jks‚t‚| |¡j
t jks˜t‚| |¡j
t jks®t‚t|j	|j	dd	 d S )
Nr   rˆ   )rˆ   rj   Fr¸   r9   rP   g-Cëâ6?r?   )r   r*   rC   Úrandintr¹   Zint64Zint32r   r   rf   r»   rº   r   r   r   )r   ZX_i64ZX_i32r¼   r½   r    r    r!   r¶     s    r¶   c                  C   sT   t dd\} }tƒ  | |¡}|j ¡ d }t|d | |¡}|j| jd ksPt‚d S )NTr£   éþÿÿÿr(   r   )r   r   r   rS   Zcumsumr   r   r   )r   ÚyZpca1r   Zpca2r    r    r!   Ú5test_pca_n_components_mostly_explained_variance_ratio0  s
    rÁ   c               
   C   sJ   t  ddddg¡} d}dD ]*}tjtdd t| ||ƒ W 5 Q R X qd S )Nr   ç ÂëþKH´9r#   )r   rk   z"should be in \[1, n_features - 1\]rx   )r   rl   rI   rz   r{   r   )ÚspectrumrL   rM   r    r    r!   Útest_assess_dimension_bad_rank<  s
    rÄ   c                  C   sd   t  ddddg¡} t| dddt j ks,t‚dD ]}t| |dƒt j ks0t‚q0t| dƒdks`t‚d S )Nr   rÂ   r#   ©rM   rL   )r$   r9   )r   rl   r   Úinfr   r	   )rÃ   rM   r    r    r!   Útest_small_eigenvalues_mleE  s
    rÇ   c                  C   s<   t jddddddd\} }tdd | ¡}|jdks8t‚d S )Nr”   r   é   é*   )r0   rY   Z
n_repeatedZn_redundantZn_clusters_per_classr>   r‚   r(   )r   Úmake_classificationr   r   r   r   ©r   r§   r   r    r    r!   Útest_mle_redundant_dataR  s    ú
rÌ   c               	   C   sH   t jdddd\} }tddd}tjtdd	 | | ¡ W 5 Q R X d S )
Nr”   é   rÉ   )rL   r0   r>   r‚   r
   r   z?n_components='mle' is only supported if n_samples >= n_featuresrx   )r   rÊ   r   rI   rz   r{   r   rË   r    r    r!   Útest_fit_mle_too_few_samplesa  s    þrÎ   c                  C   sr   d\} }t j d¡ | |¡}t j|d d …d d…f dd|d d …df< tddd}| |¡ |j|d ksnt‚d S )	N)rˆ   r#   r   r%   r:   r‚   r
   rw   r   )	r   r*   rC   rD   rG   r   r   r   r   )rL   Zn_dimr   Zpca_sklr    r    r!   Útest_mle_simple_casen  s    *
rÏ   c                  C   s   d\} }t  | |f¡}t jj|dd\}}}t|dd … t  |d ¡dd t  t|d| d¡sdt‚t	d|ƒD ]}t||| ƒt j
 ksnt‚qnd S )	N)é	   r‰   T)Zfull_matricesr   r   r   rÅ   r$   )r   rF   r^   Zsvdr   rH   Úisfiniter   r   r   rÆ   )rL   r0   r   r§   ÚsrM   r    r    r!   Útest_assess_dimesion_rank_oney  s     rÓ   c                  C   s   t j d¡} d}|  d|¡}tdd|dd |¡}tddd |¡}tdd	dd
 |¡}tt  |j¡t  |j¡ƒ tt  |j¡t  |j¡ƒ dS )zßCheck that exposing and setting `n_oversamples` will provide accurate results
    even when `X` as a large number of features.

    Non-regression test for:
    https://github.com/scikit-learn/scikit-learn/issues/20589
    r   r4   rˆ   r   r   )r   r   Zn_oversamplesr>   r
   r   r   rP   N)	r   r*   rC   rD   r   r   r   rm   rf   )rK   r0   r   Zpca_randomizedrU   Z
pca_arpackr    r    r!   Ú%test_pca_randomized_svd_n_oversamples†  s    üûrÔ   c                  C   s6   t dd tj¡} |  ¡ }tdd„ tdƒD ƒ|ƒ dS )z Check feature names out for PCA.r$   r(   c                 S   s   g | ]}d |› ‘qS )r   r    )rŠ   r³   r    r    r!   rŽ   ¥  s     z*test_feature_names_out.<locals>.<listcomp>N)r   r   r   r   Zget_feature_names_outr   r   )r   Únamesr    r    r!   Útest_feature_names_out   s    rÖ   c                 C   sV   t j d¡}| dd¡}tƒ  |¡}|j|j }t j|ddd 	¡ }t j
 ||¡ dS )z9Check the accuracy of PCA's internal variance calculationr   rˆ   r¡   r   r@   N)r   r*   rC   rD   r   r   rR   rS   r]   rd   Ztestingr   )r2   rK   r   r   Zpca_varZtrue_varr    r    r!   Útest_variance_correctness¨  s    r×   )KZnumpyr   Zscipyr®   Znumpy.testingr   rI   r,   Zsklearn.utils._testingr   Zsklearnr   Zsklearn.decompositionr   Zsklearn.datasetsr   Zsklearn.decomposition._pcar   r	   r   ZPCA_SOLVERSÚmarkZparametrizer   r   r   r"   r1   rN   rW   r*   rC   rD   rÊ   r`   rb   rh   rq   rr   ru   rl   rª   r~   Úminr€   rƒ   r„   r†   r“   r—   r™   r­   rš   r    r¢   r¨   r©   r«   r+   r¬   r²   r´   r·   rµ   r¶   rÁ   rÄ   rÇ   rÌ   rÎ   rÏ   rÓ   rÔ   rÖ   r×   r    r    r    r!   Ú<module>   sè   5
þú





 4ÿüþýþ




ýþ




ùþ





	