U
    2dK                     @   s  d Z ddlmZ ddlZddlZddlmZmZm	Z	 ddl
mZ ddlmZmZ ddlmZmZmZ ddlmZmZ dd	lmZmZ dd
lmZ ddlmZmZmZ ddlm Z  ddl!m"Z" ddl#m$Z$ ddl#m%Z% ddl&m'Z' ddl(m)Z) ddl*m+Z+ ddl,m-Z- ddl.m/Z/ ddlm0Z0 ddlm1Z1 G dd dZ2dd Z3dd Z4dd Z5dd  Z6d!d" Z7d#d$ Z8d%d& Z9d'd( Z:d)d* Z;d+d, Z<d-d. Z=d/d0 Z>d1d2 Z?d3d4 Z@ejABd5ed6d6gejABd7ed8fed9fgd:d; ZCejABd<d=eDfd>eEfd?d@ eEfgejABdAeegdBdC ZFejABdDdd8gdEdF ZGdGdH ZHdIdJ ZIejABdKeegdLdM ZJejABdKeegejABdNeeegdOdP ZKdS )Qz'
Testing Recursive feature elimination
    )
attrgetterN)assert_array_almost_equalassert_array_equalassert_allclose)sparse)BaseEstimatorClassifierMixin)PLSCanonicalPLSRegressionCCA)RFERFECV)	load_irismake_friedman1)zero_one_loss)SVCSVR	LinearSVR)LogisticRegression)RandomForestClassifier)cross_val_score)
GroupKFold)TransformedTargetRegressor)make_pipeline)StandardScaler)check_random_state)ignore_warnings)make_scorer)
get_scorerc                   @   sZ   e Zd ZdZdddZdd Zdd ZeZeZeZ	dd
dZ
dddZdd Zdd Zd	S )MockClassifierz@
    Dummy classifier to test recursive feature elimination
    r   c                 C   s
   || _ d S N	foo_param)selfr"    r$   L/tmp/pip-unpacked-wheel-zrfo1fqw/sklearn/feature_selection/tests/test_rfe.py__init__&   s    zMockClassifier.__init__c                 C   s0   t |t |ksttj|jd tjd| _| S )N   )Zdtype)lenAssertionErrornpZonesshapeZfloat64coef_r#   Xyr$   r$   r%   fit)   s    zMockClassifier.fitc                 C   s
   |j d S )Nr   )r+   )r#   Tr$   r$   r%   predict.   s    zMockClassifier.predictNc                 C   s   dS )Ng        r$   r-   r$   r$   r%   score5   s    zMockClassifier.scoreTc                 C   s
   d| j iS )Nr"   r!   )r#   deepr$   r$   r%   
get_params8   s    zMockClassifier.get_paramsc                 K   s   | S r    r$   )r#   paramsr$   r$   r%   
set_params;   s    zMockClassifier.set_paramsc                 C   s   ddiS )N	allow_nanTr$   )r#   r$   r$   r%   
_more_tags>   s    zMockClassifier._more_tags)r   )NN)T)__name__
__module____qualname____doc__r&   r0   r2   Zpredict_probaZdecision_function	transformr3   r5   r7   r9   r$   r$   r$   r%   r   !   s   


r   c                  C   s   t d} t }tj|j| jt|jdfdf }|j}td| dd}t	|ddd	}|
|| t|j|jd
 ksvttdd}t	|ddd	}|
|| t| |  d S )Nr      size      )n_estimatorsrandom_stateZ	max_depth   皙?	estimatorn_features_to_selectstepr'   linearkernel)r   r   r*   c_datanormalr(   targetr   r   r0   ranking_r+   r)   r   r   Zget_support)	generatoririsr.   r/   clfrfeZclf_svcZrfe_svcr$   r$   r%   test_rfe_features_importanceB   s    "
rX   c                  C   s:  t d} t }tj|j| jt|jdfdf }t|}|j	}t
dd}t|ddd}||| ||}||| t|j|jd	 kstt
dd}t|ddd}	|	|| |	|}
|j|jjkstt|d d
 |jd d
  t||||j |||||j|j	ks(tt||
  d S )Nr   r?   r@   rL   rM   rF   rG   rH   r'   
   )r   r   r*   rO   rP   rQ   r(   r   
csr_matrixrR   r   r   r0   r>   rS   r+   r)   r   r2   r3   toarray)rT   rU   r.   X_sparser/   rV   rW   X_rZ
clf_sparseZ
rfe_sparse
X_r_sparser$   r$   r%   test_rfeW   s(    "




"r_   c               	   C   s   G dd dt t} tdd\}}tjtdd t|  d|| W 5 Q R X tjtdd& t|  dj||d	d
|| W 5 Q R X t|  dj||d	d
j||d	d
 d S )Nc                   @   s    e Zd ZdddZdddZdS )z0test_RFE_fit_score_params.<locals>.TestEstimatorNc                 S   s2   |d krt dtdd||| _| jj| _| S )Nfit: prop cannot be NonerL   rM   )
ValueErrorr   r0   svc_r,   r#   r.   r/   propr$   r$   r%   r0   z   s
    
z4test_RFE_fit_score_params.<locals>.TestEstimator.fitc                 S   s   |d krt d| j||S )Nscore: prop cannot be None)ra   rb   r3   rc   r$   r$   r%   r3      s    z6test_RFE_fit_score_params.<locals>.TestEstimator.score)N)N)r:   r;   r<   r0   r3   r$   r$   r$   r%   TestEstimatory   s   
rf   TZ
return_X_yr`   )matchrI   re   Zfoo)rd   )	r   r   r   pytestraisesra   r   r0   r3   )rf   r.   r/   r$   r$   r%   test_RFE_fit_score_paramsv   s    *rl   c                  C   s   t d} t }tj|j| jt|jdfdf }|j}tdd}t	|ddd}|
|| t	|d	dd}|
|| t|j|j t|j|j d S )
Nr   r?   r@   rL   rM   rF   rG   rH   g?)r   r   r*   rO   rP   rQ   r(   rR   r   r   r0   r   rS   support_)rT   rU   r.   r/   rV   Zrfe_numZrfe_percr$   r$   r%   test_rfe_percent_n_features   s    "
rn   c                  C   s   t d} t }tj|j| jt|jdfdf }|j}t }t	|ddd}|
|| ||}|
|| t|j|jd kst|j|jjkstd S )Nr   r?   r@   rF   rG   rH   r'   )r   r   r*   rO   rP   rQ   r(   rR   r   r   r0   r>   rS   r+   r)   )rT   rU   r.   r/   rV   rW   r]   r$   r$   r%   test_rfe_mockclassifier   s    "
ro   c                  C   s  t d} t }tj|j| jt|jdfdf }t|j}t	t
dddd}||| |j D ] }t|j| |jd ksbtqbt|j|jd kst||}t||j t	t
dddd}t|}||| ||}	t|	 |j ttdd	}
t	t
ddd|
d
}t|j|| ||}t||j td}t	t
ddd|d
}||| ||}t||j dd }t	t
ddd|d
}||| |jdkstt	t
dddd}||| |j D ]}t|j| dkstqt|j|jd kst||}t||j t	t
dddd}t|}||| ||}	t|	 |j t	t
dddd}t|}||| ||}	t|	 |j d S )Nr   r?   r@   rL   rM   r'   rI   rK   F)Zgreater_is_better)rI   rK   scoringaccuracyc                 S   s   dS )Ng      ?r$   )rI   r.   r/   r$   r$   r%   test_scorer   s    ztest_rfecv.<locals>.test_scorerrC   皙?)r   r   r*   rO   rP   rQ   r(   listrR   r   r   r0   cv_results_keysr+   r)   rS   r>   r   r   rZ   r[   r   r   r   r   n_features_)rT   rU   r.   r/   rfecvkeyr]   Zrfecv_sparser\   r^   rq   Zscorerrs   r$   r$   r%   
test_rfecv   s^    "










r{   c                  C   s   t d} t }tj|j| jt|jdfdf }t|j}t	t
 dd}||| |j D ] }t|j| |jd ks^tq^t|j|jd kstd S )Nr   r?   r@   r'   rp   )r   r   r*   rO   rP   rQ   r(   ru   rR   r   r   r0   rv   rw   r+   r)   rS   )rT   rU   r.   r/   ry   rz   r$   r$   r%   test_rfecv_mockclassifier  s    "
r|   c                  C   s   ddl m}  dd l}|  |_td}t }tj|j|j	t
|jdfdf }t|j}ttddddd}||| |j}|d t
| dkstd S )	Nr   )StringIOr?   r@   rL   rM   r'   )rI   rK   verbose)ior}   sysstdoutr   r   r*   rO   rP   rQ   r(   ru   rR   r   r   r0   seekreadliner)   )r}   r   rT   rU   r.   r/   ry   Zverbose_outputr$   r$   r%   test_rfecv_verbose_output  s    "

r   c           
      C   s   t | }t }tj|j|jt|jdfdf }t|j}ddgddgddgfD ]\}}t	t
 ||d}||| t|jd | | d }|j D ]}	t|j|	 |kstqt|j|jd kst|j|ksPtqPd S )Nr?   r@   rC   r'      )rI   rK   min_features_to_select)r   r   r*   rO   rP   rQ   r(   ru   rR   r   r   r0   ceilr+   rv   rw   r)   rS   rx   )
global_random_seedrT   rU   r.   r/   rK   r   ry   Z	score_lenrz   r$   r$   r%   test_rfecv_cv_results_size+  s     "
r   c                  C   sF   t tdd} | jdkstt }t| |j|j}| dksBtd S )NrL   rM   
classifiergffffff?)	r   r   Z_estimator_typer)   r   r   rP   rR   min)rW   rU   r3   r$   r$   r%   test_rfe_estimator_tagsD  s
    r   c                 C   s   d}t d|| d\}}|j\}}tdd}t|dd}|||}|j |d ksXtt|d	d}|||}|j |d kstt|d
d}|||}|j |d kstd S )NrY   2   	n_samples
n_featuresrE   rL   rM   g{Gz?rK   rC   rt      )r   r+   r   r   r0   rm   sumr)   )r   r   r.   r/   r   rI   selectorselr$   r$   r%   test_rfe_min_stepM  s"      


r   c                 C   s  dd }dd }ddg}ddg}ddg}t |||D ]\}}}t| }	|	jd|fd	}
|	d }ttd
d||d}||
| t	|j
||||kstt	|j
||||ks4tq4d}ddg}ddg}t ||D ]\}}t| }	|	jd|fd	}
|	d }ttd
d|d}||
| |j D ]F}t|j| ||||ksVtt|j| ||||ks2tq2qd S )Nc                 S   s   d| | | d |  S Nr'   r$   r   rJ   rK   r$   r$   r%   formula1n  s    z4test_number_of_subsets_of_features.<locals>.formula1c                 S   s   dt | | t|  S r   )r*   r   floatr   r$   r$   r%   formula2q  s    z4test_number_of_subsets_of_features.<locals>.formula2   r   rC   d   r@   rL   rM   rH   r'   rY   rp   )zipr   rQ   Zrandroundr   r   r0   r*   maxrS   r)   r   rv   rw   r(   )r   r   r   Zn_features_listZn_features_to_select_listZ	step_listr   rJ   rK   rT   r.   r/   rW   ry   rz   r$   r$   r%   "test_number_of_subsets_of_featurese  sR    	      r   c           	      C   s   t | }t }tj|j|jt|jdfdf }|j}tt	ddd}|
|| |j}|j}|jdd |
|| t|j| | |j kst| D ] }|| t|j| kstqd S )Nr?   r@   rL   rM   ri   rC   )Zn_jobs)r   r   r*   rO   rP   rQ   r(   rR   r   r   r0   rS   rv   r7   r   rw   r)   rj   Zapprox)	r   rT   rU   r.   r/   ry   Zrfecv_rankingZrfecv_cv_results_rz   r$   r$   r%   test_rfe_cv_n_jobs  s    "r   c                  C   s   t d} t }d}ttd|t|j}|j}|jdkt	}t
t| dddtddd}|j|||d	 |jdks|td S )
Nr   rF   rE   r'   rr   rC   )Zn_splits)rI   rK   rq   cv)groups)r   r   r*   floorZlinspacer(   rR   rP   Zastypeintr   r   r   r0   rx   r)   )rT   rU   Znumber_groupsr   r.   r/   Z
est_groupsr$   r$   r%   test_rfe_cv_groups  s    r   importance_getterzregressor_.coef_zselector, expected_n_featuresr   rF   c                 C   s\   t dddd\}}tdd}t|tjtjd}||| d}|||}|j |ksXt	d S )Nr   rY   r   r   r   Z	regressorfuncZinverse_funcr   )
r   r   r   r*   logexpr0   rm   r   r)   )r   r   Zexpected_n_featuresr.   r/   rI   log_estimatorr   r$   r$   r%   test_rfe_wrapped_estimator  s    
  r   zimportance_getter, err_typeautorandomc                 C   s   | j S r    )Z
importance)xr$   r$   r%   <lambda>      r   Selectorc              	   C   s\   t dddd\}}t }t|tjtjd}t| ||| d}||| W 5 Q R X d S )Nr   rY   *   r   r   r   )	r   r   r   r*   r   r   rj   rk   r0   )r   Zerr_typer   r.   r/   rI   r   modelr$   r$   r%   %test_rfe_importance_getter_validation  s    
  r   r   c                 C   sn   t  }|j}|j}tj|d d< tj|d d< t }| d k	rJt|| d}n
t|d}|	|| |
| d S )Nr   r'   )rI   r   ri   )r   rP   rR   r*   NaNZInfr   r   r   r0   r>   )r   rU   r.   r/   rV   rW   r$   r$   r%   test_rfe_allow_nan_inf_in_x  s    
r   c                  C   sR   t t t } tdd\}}t| ddd}||| ||jd dksNtd S )NTrg   rC   z$named_steps.logisticregression.coef_)rJ   r   r'   )	r   r   r   r   r   r0   r>   r+   r)   )ZpipelinerP   r/   Zsfmr$   r$   r%   test_w_pipeline_2d_coef_  s    r   c           
         s   t | }t }tj|j|jt|jdfdf }|j}tt	ddd  
|| t jd }dd t|D }t fd	d|D }tj|d
d}tj|d
d}	t jd | t jd |	 d S )Nr?   r@   rL   rM   ri   rC   c                 S   s   g | ]}d | dqS )splitZ_test_scorer$   ).0ir$   r$   r%   
<listcomp>  s     z+test_rfecv_std_and_mean.<locals>.<listcomp>c                    s   g | ]} j | qS r$   )rv   )r   rz   ry   r$   r%   r   !  s     r   )ZaxisZmean_test_scoreZstd_test_score)r   r   r*   rO   rP   rQ   r(   rR   r   r   r0   rv   rangeZasarrayZmeanZstdr   )
r   rT   rU   r.   r/   Zn_split_keysZ
split_keysZ	cv_scoresZexpected_meanZexpected_stdr$   r   r%   test_rfecv_std_and_mean  s    "r   ClsRFEc                 C   s@   t jjdd}t jjddd}tdd}| |}||| d S )N)rY   r   r@   rC   )rY   rC   r   )rD   )r*   r   rQ   randintr   r0   )r   r.   r/   rV   Zrfe_testr$   r$   r%   test_multioutput)  s
    
r   PLSEstimatorc                 C   sH   t dddd\}}|dd}| |dd||}|||dksDtd	S )
zCheck the behaviour of RFE with PLS estimators.

    Non-regression test for:
    https://github.com/scikit-learn/scikit-learn/issues/12410
    r   rY   r   r   r'   )Zn_componentsr   g      ?N)r   r0   r3   r)   )r   r   r.   r/   rI   r   r$   r$   r%   test_rfe_pls2  s    
r   )Lr=   operatorr   rj   Znumpyr*   Znumpy.testingr   r   r   Zscipyr   Zsklearn.baser   r   Zsklearn.cross_decompositionr	   r
   r   Zsklearn.feature_selectionr   r   Zsklearn.datasetsr   r   Zsklearn.metricsr   Zsklearn.svmr   r   r   Zsklearn.linear_modelr   Zsklearn.ensembler   Zsklearn.model_selectionr   r   Zsklearn.composer   Zsklearn.pipeliner   Zsklearn.preprocessingr   Zsklearn.utilsr   Zsklearn.utils._testingr   r   r   r   rX   r_   rl   rn   ro   r{   r|   r   r   r   r   r   r   r   markZparametrizer   ra   AttributeErrorr   r   r   r   r   r   r$   r$   r$   r%   <module>   sz   !Q	A 



