U
    3d4A                  	   @   s  U d dl Z d dlmZmZ d dlZd dlZd dlmZ	 d dl
Z
d dlmZ d dlmZ d dlmZ d dlmZ d dlmZ d dlmZ d d	lmZ d d
lmZ d dlmZ d dlmZ d dlmZ d dlmZ egZee ed< egZee ed< ee ZegZee ed< egZ ee ed< ee  Z!dUddZ"dd Z#d\Z$Z%e&e$e% d Z'e"e$e%e'\Z(Z)e
j*+dddddgdd Z,d d! Z-d"d# Z.d$d% Z/d&d' Z0d(d) Z1e
j*+d*ed+d, Z2e
j*+d*ed-d. Z3d/d0 Z4d1d2 Z5d3d4 Z6d5d6 Z7d7d8 Z8d9d: Z9d;d< Z:d=d> Z;d?d@ Z<dAdB Z=dCdD Z>e
j*+dEe!dFdG Z?e
j*+dHdIe
j*+dJdIe
j*+dEe!e
j*+dKdLdMgdNdO Z@e
j*+dEe!e
j*+dPejAejAfejBejBfejCejBfejDejBffdQdR ZEe
j*+dEe!dSdT ZFdS )V    N)ListAny)euclidean_distances)johnson_lindenstrauss_min_dim)_gaussian_random_matrix)_sparse_random_matrix)SparseRandomProjection)GaussianRandomProjection)assert_allclose)assert_allclose_dense_sparse)assert_array_equal)assert_almost_equal)assert_array_almost_equal)DataDimensionalityWarningall_sparse_random_matrixall_dense_random_matrixall_SparseRandomProjectionall_DenseRandomProjectionc                 C   sP   t j|}tj|||j| |d|j||dff| |fd}| | fS )Nsize)shape)	nprandomRandomStatespZ
coo_matrixZrandnrandinttoarrayZtocsr)	n_samples
n_features
n_nonzerosrandom_staterngZdata_coo r"   H/tmp/pip-unpacked-wheel-zrfo1fqw/sklearn/tests/test_random_projection.pymake_sparse_random_data#   s    
r$   c                 C   s   t | s| S |  S d S N)r   issparser   )Zmatrixr"   r"   r#   densify2   s    
r'   )
     g      Y@zn_samples, eps)d   皙?)r*           )r*   g)r         ?c              	   C   s&   t t t| |d W 5 Q R X d S )Neps)pytestraises
ValueErrorr   )r   r/   r"   r"   r#   test_invalid_jl_domainC   s    r3   c                	   C   sT   t t tddg ddg d W 5 Q R X ttjjdddd	tdd
d d S )N   r*      g?r.      r(   )r(   r(   r   r-   )r0   r1   r2   r   r   r   r   fullr"   r"   r"   r#   test_input_size_jl_min_dimK   s    " 
r8   c              
   C   s@   dddddg}|D ](\}}t t | || W 5 Q R X qd S )N)r   r   )r6   )r6   r9   )r6   r   )r9   r   r0   r1   r2   random_matrixinputsn_componentsr   r"   r"   r#   check_input_size_random_matrixW   s    r?   c                 C   s6   ddddg}|D ] \}}| ||j ||fkstqd S )N)r6      )r@   r6   )r@   r@   )r6   r6   )r   AssertionErrorr;   r"   r"   r#   check_size_generated^   s    
rB   c                 C   s<   t | dddd}tdt|d tdtj|d d S )Ni'  r6   r   r    r4         ?)r'   r   r   meanZlinalgZnorm)r<   Ar"   r"   r#   check_zero_mean_and_unit_normg   s    rG   c              
   C   s:   d\}}dD ](}t t | |||d W 5 Q R X qd S )N)r@   r(   )g      r,   r+   densityr:   )r<   r>   r   rI   r"   r"   r#   %check_input_with_sparse_random_matrixq   s    rJ   r<   c                 C   s   t |  t|  t|  d S r%   )r?   rB   rG   )r<   r"   r"   r#   $test_basic_property_of_random_matrixy   s    rK   c                 C   s"   t |  tj| dd}t| d S )NrD   rH   )rJ   	functoolspartialrG   )r<   Zrandom_matrix_denser"   r"   r#   +test_basic_property_of_sparse_random_matrix   s    rN   c                  C   sF   d} d}t | |dd}tdt|d ttj|ddd|  d d S )	Nr*   r)   r   rC   r,   r5   r6   Zddof)r   r   r   rE   var)r>   r   rF   r"   r"   r#   test_gaussian_random_matrix   s
    rQ   c               	   C   s  d} d}dD ]}d| }t | ||dd}t|}t|}t|t|  |ksXtt| t|  |ksvt|dkrt|dkstnd	|kstt|d
ksttt|d	kdd|  dd tt|t|t|  kdd|  dd tt|t| t|  kdd|  dd ttj	|d	kdddd|  d | dd ttj	|t|t|  kddddd|   d d|  dd ttj	|t| t|  kddddd|   d d|  dd qd S )Nr*   i  )g333333?rD   r6   r   )rI   r    rD   r5   r,   r4   )decimalrO   )
r   r'   r   uniquesqrtrA   r   r   rE   rP   )r>   r   rI   srF   valuesr"   r"   r#   test_sparse_random_matrix   sP    
   
	  
  
 , "rW   c               
   C   sD   d} dddgg}t D ]*}tt || d| W 5 Q R X qd S )Nautor   r6   r5   r>   )all_RandomProjectionr0   r1   r2   fit)r>   Zfit_dataRandomProjectionr"   r"   r#   0test_random_projection_transformer_invalid_input   s
    r]   c               
   C   s4   t D ]*} tt | ddt W 5 Q R X qd S )NrX   rY   )rZ   r0   r1   r2   	transformdata)r\   r"   r"   r#    test_try_to_transform_before_fit   s    r`   c               
   C   sR   t ddd\} }tD ]8}|ddd}d}tjt|d ||  W 5 Q R X qd S )Nr)   r*   rX   g?)r>   r/   z~eps=0.100000 and n_samples=1000 lead to a target dimension of 5920 which is larger than the original space with n_features=100)match)r$   rZ   r0   r1   r2   r[   )r_   _r\   rpZexpected_msgr"   r"   r#   .test_too_many_samples_to_find_a_safe_embedding   s    rd   c            
      C   s   t ddd\} }d}t| dd}| }|dk}|| }tD ]h}|d|d	d
}|| }t|dd}| }|| }|| }	|	 d| k std| |	 k s<tq<d S )N   i  i:  g?T)Zsquaredr,   rX   r   )r>   r/   r    r6   )r$   r   ZravelrZ   fit_transformmaxrA   min)
r_   rb   r/   Zoriginal_distancesZnon_identicalr\   rc   	projectedZprojected_distancesZdistances_ratior"   r"   r#   (test_random_projection_embedding_quality   s    
rj   c                  C   s   t D ]} | dddd}|t t|ttjs6tt	t}t||tjsVt| dddd}|t}t|ttjstt
||stqd S )Nr(   Tr   )r>   Zdense_outputr    F)r   r[   r_   
isinstancer^   r   ZndarrayrA   r   
csr_matrixr&   )ZSparseRandomProjrc   Zsparse_datar"   r"   r#   +test_SparseRandomProj_output_representation  s    


rm   c               
   C   sd  t D ]X} | ddddt}|jdks,t|jdks:t| tkr^|jdksPtt|j	dd |j
jdtfksrt|t}|jtdfkst|t}t|| | ddd}|t}t|| tt  |td d d	d
f  W 5 Q R X | tkr| dddd}|t}|jtdfks&t|j
jdtfks<t|j
jdk sNtd|j
jk stqd S )NrX   r   r-   )r>   r    r/   n   gQ?r5   )r    r/   r6   r@   r*   gMbP?)r>   rI   r    s   U   )rZ   r[   r_   r>   rA   n_components_r   rI   r   Zdensity_components_r   r   r^   r   r   rf   r0   r1   r2   Znnz)r\   rc   Zprojected_1Zprojected_2Zrp2Zprojected_3ri   r"   r"   r#   2test_correct_RandomProjection_dimensions_embedding   s0    





$
rs   c               
   C   sT   d} t d| t| d \}}tD ].}tt || d d| W 5 Q R X q d S )N   r@      r6   rY   )r$   intrZ   r0   Zwarnsr   r[   )r   r_   rb   r\   r"   r"   r#   1test_warning_n_components_greater_than_n_featuresJ  s
    rw   c                  C   sj   d} t d| t| d \}}tD ]D}|ddd|}|dddt|}tt|jt|j q d S )Nrt   r@   ru   r4   r6   )r>   r    )	r$   rv   rZ   r[   r   rl   r   r'   rr   )r   r_   rb   r\   Zrp_denseZ	rp_sparser"   r"   r#   test_works_with_sparse_dataS  s     rx   c                   C   s   t ddddkstdS )zyTest Johnson-Lindenstrauss for small eps.

    Regression test for #17111: before #19374, 32-bit systems would fail.
    r*   h㈵>r.   l   JWN)r   rA   r"   r"   r"   r#   "test_johnson_lindenstrauss_min_dima  s    rz   random_projection_clsc                    sV   | dd}| t | }| j  tj fddt|jD t	d}t
|| d S )Nr5   rY   c                    s   g | ]}  | qS r"   r"   ).0iZclass_name_lowerr"   r#   
<listcomp>p  s     z<test_random_projection_feature_names_out.<locals>.<listcomp>)dtype)r[   r_   Zget_feature_names_out__name__lowerr   arrayrangerq   objectr   )r{   random_projectionZ	names_outZexpected_names_outr"   r~   r#   (test_random_projection_feature_names_outi  s    


r   r   )r5   	   r(      r)   r   compute_inverse_componentsTFc              
   C   s   d}||||d}t | || | d d |d\}}||fD ]}	t   tjddtd ||	}
W 5 Q R X |rt|d	s~t|j}|j	||fkst|
|
}|j	|	j	kst||}t|
d
r|
 }
t|
|ddd q:d S )Nr(   )r>   r   r    r*   r6   rC   ignorez>The number of components is higher than the number of features)messagecategoryinverse_components_r   gHz>g|=)Zrtolatol)r$   warningscatch_warningsfilterwarningsr   rf   hasattrrA   r   r   Zinverse_transformr^   r   r
   )r   r   r{   r   Zglobal_random_seedr>   r   ZX_denseZX_csrXri   Zinv_componentsZprojected_backZprojected_againr"   r"   r#   test_inverse_transformw  s<    




r   zinput_dtype, expected_dtypec                 C   sT   t jd}|dd}| dd}|||}|jj|ksBt|j|ksPtd S )N*        r   rC   )	r   r   r   randrf   astyperr   r   rA   )r{   Zinput_dtypeZexpected_dtyper!   r   rc   Ztransformedr"   r"   r#   "test_random_projection_dtype_match  s    
r   c                 C   st   d}t jd}|dd}| dd}| dd}||t j}||t j}t|||d t	|j
|j
 d S )Nry   r   r   r   r   rC   )r   )r   r   r   r   rf   r   float32float64r
   r   rr   )r{   r   r!   r   Zrp_32Zrp_64Zprojection_32Zprojection_64r"   r"   r#   ,test_random_projection_numerical_consistency  s    

r   )r   )GrL   typingr   r   r   Znumpyr   Zscipy.sparsesparser   r0   Zsklearn.metricsr   Zsklearn.random_projectionr   r   r   r   r	   Zsklearn.utils._testingr
   r   r   r   r   Zsklearn.exceptionsr   r   __annotations__r   Zall_random_matrixr   r   rZ   r$   r'   r   r   rv   r   r_   Zdata_csrmarkZparametrizer3   r8   r?   rB   rG   rJ   rK   rN   rQ   rW   r]   r`   rd   rj   rm   rs   rw   rx   rz   r   r   r   r   Zint32Zint64r   r   r"   r"   r"   r#   <module>   s    
 

	


9*	
/



	