U
    2d%                     @   s  d dl Z d dlZd dlZd dlmZ d dlmZ d dlm	Z	 d dl
mZ d dlmZmZ d dlmZ d dlmZ d d	lmZ d d
lmZ dd Ze jde jdde jdddd Ze jdddd Ze jdddd Ze jde jdde jdddd Ze jdede jdde jddd dgfd dgfgd!d" Z e jdd#d$ Z!d%d& Z"d'd( Z#d)d* Z$e jdd+d,d- Z%e jd.d/d0d1ej&d2fd3d4 Z'dS )5    N)assert_array_equal)StandardScaler)make_pipeline)SequentialFeatureSelector)make_regression
make_blobs)LinearRegression)HistGradientBoostingRegressor)cross_val_score)KMeansc               	   C   sJ   d} t | d\}}tt | d}tjtdd ||| W 5 Q R X d S )N   
n_featuresn_features_to_selectz#n_features_to_select must be eithermatch)r   r   r   pytestraises
ValueErrorfit)r   Xysfs r   S/tmp/pip-unpacked-wheel-zrfo1fqw/sklearn/feature_selection/tests/test_sequential.pytest_bad_n_features_to_select   s
    r   z)ignore:Leaving `n_features_to_select` to 	direction)forwardZbackwardr   )   r   	   autoc                 C   s   d}t |dd\}}tt || dd}||| |dkrB|d }|jddjd |ks\t|j|ksjt||jd	 |kstd S )
N
   r   r   random_state   r   r   cv)r!   NTindicesr   	r   r   r   r   get_supportshapeAssertionErrorn_features_to_select_	transform)r   r   r   r   r   r   r   r   r   test_n_features_to_select   s    r0   c                 C   s   d}d}t |dd\}}tt d|| dd}||| |d }|jd	d
jd |ksZt|j|ksht||jd |kst|jd	d
jd |jkstdS )zlCheck the behaviour of `n_features_to_select="auto"` with different
    values for the parameter `tol`.
    r"   MbP?r   r#   r!   r%   r   tolr   r'   r   Tr(   Nr*   )r   r   r3   r   r   r   Zmax_features_to_selectr   r   r   test_n_features_to_select_auto0   s     r4   c                 C   s  t dddd\}}d}tt d|| dd}||| ||}tjd}tt	t
|jd	 t	|jd
d }t||dd||f ddtjf g}|tt
|j}	tj||	d	d}
tt ||dd }tt ||dd }tt ||dd }tt |
|dd }||ks*t| dkrZ|| |ksFt|| |ks~tn$|| |kslt|| |ks~tdS )av  Check the behaviour stopping criterion for feature selection
    depending on the values of `n_features_to_select` and `tol`.

    When `direction` is `'forward'`, select a new features at random
    among those not currently selected in selector.support_,
    build a new version of the data that includes all the features
    in selector.support_ + this newly selected feature.
    And check that the cross-validation score of the model trained on
    this new dataset variant is lower than the model with
    the selected forward selected features or at least does not improve
    by more than the tol margin.

    When `direction` is `'backward'`, instead of adding a new feature
    to selector.support_, try to remove one of those selected features at random
    And check that the cross-validation score is either decreasing or
    not improving by more than the tol margin.
    2   r"   r   )r   Zn_informativer$   r1   r!   r%   r2   r   Tr(   N)Zaxis)r'   r   )r   r   r   r   r/   nprandomRandomStatelistsetranger,   r+   ZhstackchoiceZnewaxisr.   deleter
   Zmeanr-   )r   r   r   r3   r   Z
selected_XrngZadded_candidatesZadded_XZremoved_candidateZ	removed_XZplain_cv_scoreZsfs_cv_scoreZadded_cv_scoreZremoved_cv_scorer   r   r   ,test_n_features_to_select_stopping_criterionJ   s<    
&"
r?   zn_features_to_select, expected))g?r   )g      ?r"   )g      ?r   c                 C   s>   t dd\}}tt || dd}||| |j|ks:td S )Nr"   r   r%   r&   )r   r   r   r   r.   r-   )r   r   expectedr   r   r   r   r   r   test_n_features_to_select_float   s    rA   seedr"   z0n_features_to_select, expected_selected_featuresr%   r   c           	      C   sx   t j| }d}||d}d|d d df  d|d d df   }tt ||dd}||| t|jdd| d S )	Nd      r   r"   r%   r&   Tr(   )	r6   r7   r8   Zrandnr   r   r   r   r+   )	rB   r   r   Zexpected_selected_featuresr>   	n_samplesr   r   r   r   r   r   test_sanity   s    (rF   c                  C   sD   t dd\} }tj| } tt ddd}|| | ||  d S )Nr"   r   r!   r%   r   r'   )r   scipysparseZ
csr_matrixr   r   r   r/   )r   r   r   r   r   r   test_sparse_support   s      rJ   c               	   C   s   t jd} d\}}t||dd\}}| jdd||ftd}t j||< tt ddd}|	|| |
| tjtdd	 tt ddd	|| W 5 Q R X d S )
Nr   )(      r$   r%   )sizeZdtyper!   rG   zInput X contains NaNr   )r6   r7   r8   r   randintboolnanr   r	   r   r/   r   r   r   r   )r>   rE   r   r   r   Znan_maskr   r   r   r   test_nan_support   s*    
  
   rR   c                  C   s   d\} }t | |dd\}}tt t }t|ddd}||| || tt ddd}tt |}||| || d S )Nr5   rD   r   rM   r!   r%   rG   )r   r   r   r   r   r   r/   )rE   r   r   r   piper   r   r   r   test_pipeline_support   s    
  rU   c               	   C   sN   d\} }t | |dd\}}d}tjt|d tt || W 5 Q R X dS )z@Check that we raise a FutureWarning with `n_features_to_select`.rS   r   rM   z4Leaving `n_features_to_select` to None is deprecatedr   N)r   r   ZwarnsFutureWarningr   r   r   )rE   r   r   r   Zwarn_msgr   r   r   test_raise_deprecation_warning   s
    rW   )r%   rD   c                 C   sF   t dd\}}ttdd| d}|| ||jd | ksBtd S )NrL   r   r   )Zn_initr   )r   r   r   r   r/   r,   r-   )r   r   r   r   r   r   r   test_unsupervised_model_fit   s    
rX   r   Zno_validationy              ?gX@rD   c              	   C   sF   t dd\}}tt dd}tttf |||  W 5 Q R X d S )N   r   rD   r   )r   r   r   r   r   	TypeErrorr   r   )r   r   Zclustersr   r   r   r   test_no_y_validation_model_fit  s    r[   )(r   rH   Znumpyr6   Znumpy.testingr   Zsklearn.preprocessingr   Zsklearn.pipeliner   Zsklearn.feature_selectionr   Zsklearn.datasetsr   r   Zsklearn.linear_modelr   Zsklearn.ensembler	   Zsklearn.model_selectionr
   Zsklearn.clusterr   r   markfilterwarningsZparametrizer0   r4   r?   rA   r;   rF   rJ   rR   rU   rW   rX   rQ   r[   r   r   r   r   <module>   sZ   


=





