U
    3dL                     @   s  d dl Zd dlmZ d dlZd dlZd dlmZm	Z	m
Z
 d dlmZmZ d dlmZ d dlmZ d dlmZ d dlmZ d dlmZ d d	lmZ d d
lmZ d dlmZ d dlmZ dd Zdd Z dd Z!dd Z"dd Z#ej$%dddddgej$%dddgdd  Z&d!d" Z'd#d$ Z(d%d& Z)d'd( Z*ej$%dddddgd)d* Z+d+d, Z,d-d. Z-d/d0 Z.d1d2 Z/ej$%dddddgd3d4 Z0ej$%d5ddd6gd7d8 Z1d9d: Z2d;d< Z3d=d> Z4d?d@ Z5dS )A    N)assert_array_almost_equalassert_array_equalassert_allclose)PCA	KernelPCA)make_circles)
make_blobs)NotFittedError)
Perceptron)Pipeline)StandardScaler)GridSearchCV)
rbf_kernel)_check_psd_eigenvaluesc                  C   s   t jd} | d}| d}dd }dD ]}ddd	|fD ]}t| }td
|||d}||}|||}	t	t 
|t 
|	 |jdkst||}
|
jd |jd kst|r<||
}|j|jks<tq<q,dS )zNominal test for all solvers and all known kernels + a custom one

    It tests
     - that fit_transform is equivalent to fit+transform
     - that the shapes of transforms and inverse transforms are correct
    r            r   c                 [   s   |i kst t| | S )N)AssertionErrornpZminimumsum)xykwargs r   O/tmp/pip-unpacked-wheel-zrfo1fqw/sklearn/decomposition/tests/test_kernel_pca.py	histogram#   s    z"test_kernel_pca.<locals>.histogram)autodensearpack
randomizedlinearrbfpolyr   )kerneleigen_solverfit_inverse_transform   N)r   randomRandomStaterandom_samplecallabler   fit_transformfit	transformr   abssizer   shapeinverse_transform)rngX_fitX_predr   r&   r%   invkpcaX_fit_transformedX_fit_transformed2X_pred_transformedZX_pred2r   r   r   test_kernel_pca   s2    


   
 

r<   c               	   C   sD   t dddd} d}tjt|d | tjdd W 5 Q R X dS )zCheck that kPCA raises an error if the parameters are invalid

    Tests fitting inverse transform with a precomputed kernel raises a
    ValueError.
    
   Tprecomputed)n_componentsr'   r%   z6Cannot fit_inverse_transform with a precomputed kernelmatchN)r   pytestraises
ValueErrorr.   r   r)   randn)Z	estimatorZerr_msr   r   r   "test_kernel_pca_invalid_parametersF   s      rF   c                  C   sb   t jd} | dd}t| d|}||}| }d|dddf< ||}t|| dS )zCheck robustness to mutations in the original training array

    Test that after fitting a kPCA model, it stays independent of any
    mutation of the values of the original data object by relying on an
    internal copy.
    r   r=   )random_statei  N)	r   r)   r*   randr   r.   r/   copyr   )stateXr8   Ztransformed1ZX_copyZtransformed2r   r   r   $test_kernel_pca_consistent_transformT   s    

rL   c               	   C   s   t jd} | dd}d}|D ]j}t d}tdD ],}td|| d}||d ||ddf< q6t|t 	|dddf d
dd q dS )	zTest that Kernel PCA produces deterministic output

    Tests that the same inputs and random state produce the same output.
    r   r=   )r    r   )   r   rM   r   )r?   r&   rG   N)r   r)   r*   rH   zerosranger   r-   r   Ztilereshape)r4   rK   r&   solverZtransformed_Xir8   r   r   r   $test_kernel_pca_deterministic_outputg   s    
rS   c            	      C   s   t jd} t| d}t| d}dD ]}dD ]}td||ddd}||}||	|}t
t |t | |	|}|jd	 |jd	 ksttt || W 5 Q R X q8q0d
S )zTest that kPCA works on a sparse data input.

    Same test as ``test_kernel_pca except inverse_transform`` since it's not
    implemented for sparse matrices.
    r   r   r   )r   r    r!   )r"   r#   r$   r   F)r%   r&   r'   rG   r(   N)r   r)   r*   spZ
csr_matrixr+   r   r-   r.   r/   r   r0   r2   r   rB   rC   r	   r3   )	r4   r5   r6   r&   r%   r8   r9   r:   r;   r   r   r   test_kernel_pca_sparsex   s,    
 
rU   rQ   r   r   r    r!   
n_featuresr   r=   c                 C   s   t jd}|d|f}|d|f}| dkr4dnd}tt t|| d||t t	|| dkrj| nd	d
|| dS )zTest that kPCA with linear kernel is equivalent to PCA for all solvers.

    KernelPCA with linear kernel should produce the same output as PCA.
    r   r   r   r       r   )r&   r   full)Z
svd_solverN)
r   r)   r*   r+   r   r0   r   r.   r/   r   )rQ   rV   r4   r5   r6   Zn_compsr   r   r   test_kernel_pca_linear_kernel   s    rY   c                  C   sf   t jd} | d}| d}dD ]<}dD ]2}t||d}|||j}|d|fks,tq,q$dS )	zTest that `n_components` is correctly taken into account for projections

    For all solvers this tests that the output has the correct shape depending
    on the selected number of components.
    r   r   r   r   r    r!   )r(   r   r   )r?   r&   r   N)	r   r)   r*   r+   r   r.   r/   r2   r   )r4   r5   r6   r&   cr8   r2   r   r   r   test_kernel_pca_n_components   s    

r\   c                  C   s   t ddgddgddgg} t }|| }|jdks:ttdd}|| }|jdks\ttddd}|| }|jdkstd	S )
zCheck that the ``remove_zero_eig`` parameter works correctly.

    Tests that the null-space (Zero) eigenvalues are removed when
    remove_zero_eig=True, whereas they are not by default.
          ?r(   )rW   r   r   r?   )rW   r   T)r?   remove_zero_eigN)r   arrayr   r-   r2   r   )rK   r8   ZXtr   r   r   test_remove_zero_eig   s    



ra   c               
   C   s   t ddgddgg} t h tdt t jddD tdddd	}|| 	| }|
| }tt |t | W 5 Q R X W 5 Q R X d
S )zNon-regression test for issue #12141 (PR #12143)

    This test checks that fit().transform() returns the same result as
    fit_transform() in case of non-removed zero eigenvalue.
    r(   r   errorwarn)allr   Fr   )r?   r_   r&   N)r   r`   warningscatch_warningssimplefilterRuntimeWarningZerrstater   r.   r/   r-   r   r0   )r5   kABr   r   r   test_leave_zero_eig   s    

rl   c                  C   s   t jd} | d}| d}dD ]}td|dd||}td|dddt ||jt ||j}td|ddd	t ||j}td|dddt ||jt ||j}t
t |t | t
t |t | q$d	S )
z?Test that kPCA works with a precomputed kernel, for all solversr   r   r   rZ   r   r&   rG   r>   )r&   r%   rG   N)r   r)   r*   r+   r   r.   r/   dotTr-   r   r0   )r4   r5   r6   r&   X_kpcaZX_kpca2ZX_kpca_trainZX_kpca_train2r   r   r   test_kernel_pca_precomputed   sT    

         rq   c                 C   st   ddgddgg}t d| ddd}|| dd	gd	dgg}t d| ddd}|| t|j|j t|j|j d
S )zCheck that the kernel centerer works.

    Tests that a non symmetric precomputed kernel is actually accepted
    because the kernel centerer does its job correctly.
    r(   r   rW   (   r>   r   )r%   r&   r?   rG   	   iN)r   r.   r   Zeigenvectors_eigenvalues_)rQ   Kr8   ZKcZkpca_cr   r   r   )test_kernel_pca_precomputed_non_symmetric  s$    	   
   
rv   c                  C   s|   t ddddd\} }tddd}td	|fd
tddfg}tdtdd d}t|d|d}|| | |j	dksxt
dS )zCheck that kPCA works as expected in a grid search pipeline

    Test if we can do a grid-search to find parameters to separate
    circles with a perceptron model.
      333333?皙?r   	n_samplesZfactorZnoiserG   r#   r   r%   r?   
kernel_pcar
   r   Zmax_iter       @)Zkernel_pca__gammarW   Zcv
param_gridr(   N)r   r   r   r
   dictr   aranger   r.   best_score_r   )rK   r   r8   pipeliner   grid_searchr   r   r   test_gridsearch_pipeline9  s    r   c                  C   s   t ddddd\} }tddd}td	|fd
tddfg}ttddd}t|d|d}t| dd}|	|| |j
dkstdS )zCheck that kPCA works as expected in a grid search pipeline (2)

    Test if we can do a grid-search to find parameters to separate
    circles with a perceptron model. This test uses a precomputed kernel.
    rw   rx   ry   r   rz   r>   r   r|   r}   r
   r   r~   r(   )ZPerceptron__max_iterrW   r   r   )gammaN)r   r   r   r
   r   r   r   r   r   r.   r   r   )rK   r   r8   r   r   r   ZX_kernelr   r   r   $test_gridsearch_pipeline_precomputedH  s    r   c                  C   s~   t ddddd\} }tdd| || |}|dk s:ttd	d
ddd}|| }tdd||||}|dksztdS )a  Check that kPCA projects in a space where nested circles are separable

    Tests that 2D nested circles become separable with a perceptron when
    projected in the first 2 kPCA using an RBF kernel, while raw samples
    are not directly separable in the original space.
    rw   rx   ry   r   rz   r   r~   g?r#   r   Tr   )r%   r?   r'   r   r]   N)r   r
   r.   Zscorer   r   r-   )rK   r   Ztrain_scorer8   rp   r   r   r   test_nested_circlesX  s       
r   c                  C   s^   ddgddgddgg} t dddd	}||  |j dks@tt|jt|jksZtd
S )z}Check that ``_check_psd_eigenvalues`` is correctly called in kPCA

    Non-regression test for issue #12140 (PR #12145).
    r   r(   gw̫   @g:0yE>r   r"   r   T)r%   r?   r'   N)r   r.   rt   minr   r   rd   r   rK   r8   r   r   r   test_kernel_conditioningt  s
    
r   c                 C   s  ddddddddgdddd	d	d	d
dgddddddddgdd	ddddddgdd	ddddddgdd	ddddddgdd
ddddddgddddddddgg}t d| dd}tjtdd || W 5 Q R X t d| dd}| dkrtjtdd || W 5 Q R X n
|| dS )a  Check how KernelPCA works with non-PSD kernels depending on n_components

    Tests for all methods what happens with a non PSD gram matrix (this
    can happen in an isomap scenario, or with custom kernel functions, or
    maybe with ill-posed datasets).

    When ``n_component`` is large enough to capture a negative eigenvalue, an
    error should be raised. Otherwise, KernelPCA should run without error
    since the negative eigenvalues are not selected.
    gQ@g      gp=
# @gp=
ף@g
ףp=
g(\)gQg      @gףp=
gQg(\@g(\.@gQ @g333333&g{G:7g      @g333333gRQ?ggq=
ףp@g     #@g(\u5@r>      )r%   r&   r?   z*There are significant negative eigenvaluesr@   r   r!   N)r   rB   rC   rD   r.   )rQ   ru   r8   r   r   r   test_precomputed_kernel_not_psd  s(    
 r   r?   rM   c           
      C   s   d\}}t || dddd\}}|d|ddf ||dddf  }}t| ddd||}t| d	dd||}tt|t| t| d
dd||}	tt|	t| dS )zGCheck that 'dense' 'arpack' & 'randomized' solvers give similar results)i  d   rx   ry   r   rz   Nr   rm   r    r!   )r   r   r.   r/   r   r   r0   )
r?   Zn_trainZn_testrK   _r5   r6   Zref_predZa_predZr_predr   r   r   #test_kernel_pca_solvers_equivalence  s8       
*r   c                  C   s^   t dddd^} }tddddd	}|| }||}tj| | tj|  d
k sZtdS )zTest if the reconstruction is a good approximation.

    Note that in general it is not possible to get an arbitrarily good
    reconstruction because of kernel centering that does not
    preserve all the information of the original data.
    r   r   r   r{   rV   rG   rM   r#   TgMbP?)r?   r%   r'   alpha皙?N)r   r   r-   r3   r   ZlinalgZnormr   )rK   r   r8   ZX_transZ	X_reconstr   r   r   0test_kernel_pca_inverse_transform_reconstruction  s       

r   c               	   C   sH   t jddd} t }||  tt |	|  W 5 Q R X d S )N   r   rW   )
r   r)   rE   rP   r   r.   rB   rC   r	   r3   r   r   r   r   &test_kernel_pca_raise_not_fitted_error  s
    
r   c                  C   sj   t ddddgdddggddd\} }t | } | |  8 } t }|| j|| tjjksft	dS )zTest that the decomposition is similar for 32 and 64 bits data

    Non regression test for
    https://github.com/scikit-learn/scikit-learn/issues/18146
       r   r(   r   )r{   ZcentersrG   Zcluster_stdN)
r   r   r-   r   r   r2   Zastyper   Zfloat32r   )rK   r   r8   r   r   r   test_32_64_decomposition_shape  s       
r   c                  C   sF   t dddd^} }tdd| }| }tdd tdD | d	S )
z&Check feature names out for KernelPCA.r   r   r   r   r   r^   c                 S   s   g | ]}d | qS )Z	kernelpcar   ).0rR   r   r   r   
<listcomp>  s     z5test_kernel_pca_feature_names_out.<locals>.<listcomp>N)r   r   r.   Zget_feature_names_outr   rO   )rK   r   r8   namesr   r   r   !test_kernel_pca_feature_names_out  s    r   )6Znumpyr   Zscipy.sparsesparserT   rB   re   Zsklearn.utils._testingr   r   r   Zsklearn.decompositionr   r   Zsklearn.datasetsr   r   Zsklearn.exceptionsr	   Zsklearn.linear_modelr
   Zsklearn.pipeliner   Zsklearn.preprocessingr   Zsklearn.model_selectionr   Zsklearn.metrics.pairwiser   Zsklearn.utils.validationr   r<   rF   rL   rS   rU   markZparametrizerY   r\   ra   rl   rq   rv   r   r   r   r   r   r   r   r   r   r   r   r   r   r   <module>   sP   .%&

3
$