U
    3dz                     @   sX  d Z ddlmZ ddlZddlZddlZddlmZ ddl	m
Z
 ddl	mZ ddlmZmZ ddlmZmZ dd	lmZmZ dd
lmZmZ ddlmZmZ ddlmZmZ ddlmZm Z  ddl!m"Z" ddl#m$Z$ ddl%m&Z& ddlm'Z' ddl(m)Z)m*Z*m+Z+ ddl,m-Z- ddl.m/Z/m0Z0 ddlm1Z1 ddl2m3Z3m4Z4 e-dZ5e* Z6e57e6j8j9Z:e6j;e: e6_;e6j8e: e6_8e) Z<e57e<j8j9Z:e<j;e: e<_;e<j8e: e<_8dd Z=ej>?dee3e4gdddddddddddd dd!ddd d"gd#d$d%d&gd'd( Z@d)d* ZAd+d, ZBG d-d. d.eZCd/d0 ZDd1d2 ZEd3d4 ZFd5d6 ZGd7d8 ZHd9d: ZId;d< ZJd=d> ZKd?d@ ZLdAdB ZMdCdD ZNdEdF ZOG dGdH dHeZPdIdJ ZQdtdLdMZRdNdO ZSdPdQ ZTdRdS ZUdTdU ZVdVdW ZWdXdY ZXdZd[ ZYd\d] ZZd^d_ Z[d`da Z\dbdc Z]ddde Z^dfdg Z_dhdi Z`djdk Zaej>?dleefeefgdmdn Zbej>?doeegdpdq Zcdrds ZddS )uzE
Testing for the bagging ensemble module (sklearn.ensemble.bagging).
    )productN)BaseEstimator)assert_array_equal)assert_array_almost_equal)DummyClassifierDummyRegressor)GridSearchCVParameterGrid)BaggingClassifierBaggingRegressor)
PerceptronLogisticRegression)KNeighborsClassifierKNeighborsRegressor)DecisionTreeClassifierDecisionTreeRegressor)SVCSVR)SparseRandomProjection)make_pipeline)SelectKBest)train_test_split)load_diabetes	load_irismake_hastie_10_2)check_random_state)FunctionTransformerscale)cycle)
csc_matrix
csr_matrixc            	      C   s   t d} ttjtj| d\}}}}tddgddgddgddgd	}d t td
dtddt	 t
 g}t|t|D ],\}}tf || dd|||| qtd S )Nr   random_state      ?      ?      TFmax_samplesmax_features	bootstrapbootstrap_features   )Zmax_iter   )Z	max_depth)	estimatorr"   n_estimators)r   r   irisdatatargetr	   r   r   r   r   r   zipr   r
   fitpredict)	rngX_trainX_testy_trainy_testgrid
estimatorsparamsr.    r>   G/tmp/pip-unpacked-wheel-zrfo1fqw/sklearn/ensemble/tests/test_bagging.pytest_classification4   sB      	
 r@   zsparse_format, params, methodr#   r-   Tr'   r$   r&   Fr)   r*   r+   r(   r*   r+   r5   predict_probapredict_log_probadecision_functionc                    s   G dd dt }td}tttjtj|d\}}}}| |}	| |}
tf |ddddd	||	|}t	|||
}tf |ddddd	|||}t	|||}t
|| t|	 d
d |jD }t fdd|D std S )Nc                       s    e Zd ZdZ fddZ  ZS )z-test_sparse_classification.<locals>.CustomSVC7SVC variant that records the nature of the training setc                    s   t  || t|| _| S Nsuperr4   type
data_type_selfXy	__class__r>   r?   r4   r   s    
z1test_sparse_classification.<locals>.CustomSVC.fit__name__
__module____qualname____doc__r4   __classcell__r>   r>   rP   r?   	CustomSVCo   s   rX   r   r!   Zlinearovr)kerneldecision_function_shaper%   r.   r"   c                 S   s   g | ]
}|j qS r>   rK   .0ir>   r>   r?   
<listcomp>   s     z.test_sparse_classification.<locals>.<listcomp>c                    s   g | ]}| kqS r>   r>   r_   tZsparse_typer>   r?   ra      s     )r   r   r   r   r0   r1   r2   r
   r4   getattrr   rJ   estimators_allAssertionError)sparse_formatr=   methodrX   r6   r7   r8   r9   r:   X_train_sparseX_test_sparsesparse_classifiersparse_resultsZdense_classifierdense_resultstypesr>   rd   r?   test_sparse_classificationU   sB      
 
 
rq   c                  C   s   t d} ttjd d tjd d | d\}}}}tddgddgddgddgd}d t t t t	 fD ]0}|D ]&}t
f || d	|||| qrqjd S )
Nr   2   r!   r#   r$   TFr'   r\   )r   r   diabetesr1   r2   r	   r   r   r   r   r   r4   r5   )r6   r7   r8   r9   r:   r;   r.   r=   r>   r>   r?   test_regression   s4      
 rt   c                     s2  t d} ttjd d tjd d | d\}}}}G dd dt}ddddd	d
dddd	ddddddddg}ttfD ]}||}||}	|D ]}
tf | dd|
	||}|
|	}tf | dd|
	||
|}t| dd |jD }t|| t fdd|D s tt|| qqzd S )Nr   rr   r!   c                       s    e Zd ZdZ fddZ  ZS )z)test_sparse_regression.<locals>.CustomSVRrF   c                    s   t  || t|| _| S rG   rH   rL   rP   r>   r?   r4      s    
z-test_sparse_regression.<locals>.CustomSVR.fitrR   r>   r>   rP   r?   	CustomSVR   s   ru   r#   r-   Tr'   r$   r&   FrA   rB   r%   r\   c                 S   s   g | ]
}|j qS r>   r]   r^   r>   r>   r?   ra      s     z*test_sparse_regression.<locals>.<listcomp>c                    s   g | ]}| kqS r>   r>   rb   rd   r>   r?   ra      s     )r   r   rs   r1   r2   r   r   r    r   r4   r5   rJ   rf   r   rg   rh   )r6   r7   r8   r9   r:   ru   Zparameter_setsri   rk   rl   r=   rm   rn   ro   rp   r>   rd   r?   test_sparse_regression   s\      


  
 
rv   c                   @   s   e Zd Zdd Zdd ZdS )DummySizeEstimatorc                 C   s   |j d | _t|| _d S Nr   )shapetraining_size_joblibhashtraining_hash_rL   r>   r>   r?   r4      s    zDummySizeEstimator.fitc                 C   s   t |jd S rx   )nponesry   rM   rN   r>   r>   r?   r5      s    zDummySizeEstimator.predictNrS   rT   rU   r4   r5   r>   r>   r>   r?   rw      s   rw   c                  C   s   t d} ttjtj| d\}}}}t ||}tt dd| d||}||||||ksft	tt dd| d||}||||||kst	tt
 dd||}g }|jD ]$}|j|jd kst	||j qtt|t|kst	d S )Nr   r!   r$   F)r.   r(   r*   r"   T)r.   r*   )r   r   rs   r1   r2   r   r4   r   scorerh   rw   rf   rz   ry   appendr}   lenset)r6   r7   r8   r9   r:   r.   ensembleZtraining_hashr>   r>   r?   test_bootstrap_samples   sH         
r   c                  C   s   t d} ttjtj| d\}}}}tt dd| d||}|jD ]$}tjj	d t
|j	d ksBtqBtt dd| d||}|jD ]$}tjj	d t
|j	d kstqd S )Nr   r!   r$   F)r.   r)   r+   r"   r%   T)r   r   rs   r1   r2   r   r   r4   estimators_features_ry   r~   uniquerh   )r6   r7   r8   r9   r:   r   featuresr>   r>   r?   test_bootstrap_features  s8       
" 
r   c               	   C   s   t d} ttjtj| d\}}}}tjddd tt | d	||}t
tj||ddtt| t
||t|| tt | dd		||}t
tj||ddtt| t
||t|| W 5 Q R X d S )
Nr   r!   ignore)divideinvalidr\   r%   )Zaxis   )r.   r"   r(   )r   r   r0   r1   r2   r~   Zerrstater
   r   r4   r   sumrC   r   r   exprD   r   r6   r7   r8   r9   r:   r   r>   r>   r?   test_probability:  sL               r   c            	   
   C   s   t d} ttjtj| d\}}}}t t fD ]|}t|ddd| d||}|	||}t
||j dk sntd}tjt|d$ t|d	dd| d}||| W 5 Q R X q.d S )
Nr   r!   d   Tr.   r/   r*   	oob_scorer"   皙?{Some inputs do not have OOB scores. This probably means too few estimators were used to compute any reliable oob estimates.matchr%   )r   r   r0   r1   r2   r   r   r
   r4   r   abs
oob_score_rh   pytestwarnsUserWarning)	r6   r7   r8   r9   r:   r.   clf
test_scorewarn_msgr>   r>   r?   test_oob_score_classification]  s<       r   c            	   	   C   s   t d} ttjtj| d\}}}}tt ddd| d||}|||}t	||j
 dk s`td}tjt|d& tt d	dd| d}||| W 5 Q R X d S )
Nr   r!   rr   Tr   r   r   r   r%   )r   r   rs   r1   r2   r   r   r4   r   r   r   rh   r   r   r   )	r6   r7   r8   r9   r:   r   r   r   Zregrr>   r>   r?   test_oob_score_regression  s:       r   c                  C   sf   t d} ttjtj| d\}}}}tt ddd| d||}t ||}t|	||	| d S )Nr   r!   r%   F)r.   r/   r*   r+   r"   )
r   r   rs   r1   r2   r   r   r4   r   r5   )r6   r7   r8   r9   r:   Zclf1Zclf2r>   r>   r?   test_single_estimator  s$       r   c                  C   s2   t jt j } }t }tt|| |dr.td S )NrE   )r0   r1   r2   r   hasattrr
   r4   rh   )rN   rO   baser>   r>   r?   
test_error  s    r   c                  C   s  t tjtjdd\} }}}tt ddd| |}||}|jdd ||}t	|| tt ddd| |}||}t	|| tt
ddddd| |}||}|jdd ||}	t	||	 tt
ddddd| |}||}
t	||
 d S )	Nr   r!      n_jobsr"   r%   r   rY   )r[   )r   r0   r1   r2   r
   r   r4   rC   
set_paramsr   r   rE   )r7   r8   r9   r:   r   y1y2y3Z
decisions1Z
decisions2Z
decisions3r>   r>   r?   test_parallel_classification  sb         


   

   


   
r   c            	      C   s   t d} ttjtj| d\}}}}tt ddd||}|jdd |	|}|jdd |	|}t
|| tt ddd||}|	|}t
|| d S )Nr   r!   r   r   r%   r   r-   )r   r   rs   r1   r2   r   r   r4   r   r5   r   )	r6   r7   r8   r9   r:   r   r   r   r   r>   r>   r?   test_parallel_regression  s*       


 
r   c                  C   sD   t jt j } }d||dk< ddd}ttt |dd| | d S )Nr%   r-   )r%   r-   )r/   Zestimator__CZroc_auc)Zscoring)r0   r1   r2   r   r
   r   r4   )rN   rO   
parametersr>   r>   r?   test_gridsearch  s    
r   c                  C   s0  t d} ttjtj| d\}}}}td ddd||}t|jt	sHt
tt	 ddd||}t|jt	spt
tt ddd||}t|jtst
ttjtj| d\}}}}td ddd||}t|jtst
tt ddd||}t|jtst
tt ddd||}t|jts,t
d S )Nr   r!   r   r   )r   r   r0   r1   r2   r
   r4   
isinstanceZ
estimator_r   rh   r   rs   r   r   r   r   r>   r>   r?   test_estimator  sH             r   c                  C   sL   t ttddt dd} | tjtj t| d j	d d j
tsHtd S )Nr%   )kr-   )r)   r   )r
   r   r   r   r4   r0   r1   r2   r   stepsr"   intrh   )r.   r>   r>   r?   test_bagging_with_pipeline<  s     r   c                   @   s   e Zd Zdd Zdd ZdS )DummyZeroEstimatorc                 C   s   t || _| S rG   )r~   r   classes_rL   r>   r>   r?   r4   E  s    zDummyZeroEstimator.fitc                 C   s   | j tj|jd td S )Nr   )dtype)r   r~   zerosry   r   r   r>   r>   r?   r5   I  s    zDummyZeroEstimator.predictNr   r>   r>   r>   r?   r   D  s   r   c               	   C   sj   t t } td}| tjtjtj t	t
, | jtjtj|jdtjjd dd W 5 Q R X d S )Nr   
   )size)Zsample_weight)r
   r   r   r4   r0   r1   r2   r5   r   raises
ValueErrorrandintry   )r.   r6   r>   r>   r?   1test_bagging_sample_weight_unsupported_but_passedM  s    
r   *   c                 C   s   t ddd\}}d }dD ]D}|d kr4t|| dd}n|j|d ||| t||kstqtd| d	d}||| td
d |D tdd |D kstd S )Nr,   r%   Z	n_samplesr"   )r   r   T)r/   r"   
warm_startr/   r   Fc                 S   s   g | ]
}|j qS r>   r!   r_   treer>   r>   r?   ra   o  s     z#test_warm_start.<locals>.<listcomp>c                 S   s   g | ]
}|j qS r>   r!   r   r>   r>   r?   ra   p  s     )r   r
   r   r4   r   rh   r   )r"   rN   rO   clf_wsr/   Z	clf_no_wsr>   r>   r?   test_warm_startZ  s*        r   c               	   C   sZ   t ddd\} }tddd}|| | |jdd tt || | W 5 Q R X d S )	Nr,   r%   r   r   T)r/   r   r&   r   )r   r
   r4   r   r   r   r   rN   rO   r   r>   r>   r?   $test_warm_start_smaller_n_estimatorst  s    r   c            	   	   C   s   t ddd\} }t| |dd\}}}}tdddd	}||| ||}|d
7 }d}tjt|d ||| W 5 Q R X t||| d S )Nr,   r%   r   +   r!   r   TS   r/   r   r"   r$   z;Warm-start fitting without increasing n_estimators does notr   )	r   r   r
   r4   r5   r   r   r   r   )	rN   rO   r7   r8   r9   r:   r   Zy_predr   r>   r>   r?   "test_warm_start_equal_n_estimators~  s    
r   c            
      C   s   t ddd\} }t| |dd\}}}}tdddd	}||| |jd
d ||| ||}td
ddd	}||| ||}	t||	 d S )Nr,   r%   r   r   r!   r   TiE  r   r   r   F)r   r   r
   r4   r   r5   r   )
rN   rO   r7   r8   r9   r:   r   r   r   r   r>   r>   r?   test_warm_start_equivalence  s    

r   c               	   C   sD   t ddd\} }tdddd}tt || | W 5 Q R X d S )Nr,   r%   r   r   T)r/   r   r   )r   r
   r   r   r   r4   r   r>   r>   r?   $test_warm_start_with_oob_score_fails  s    r   c               	   C   sh   t ddd\} }tddd}|| | |jdddd	 || | tt t|d
 W 5 Q R X d S )Nr   r%   r   r   T)r/   r   Fr   )r   r   r/   r   )r   r
   r4   r   r   r   AttributeErrorre   r   r>   r>   r?   $test_oob_score_removed_on_warm_start  s    r   c                  C   sH   t ddd\} }tt ddddd}|| |j|| |jksDtd S )N   r%   r   r#   T)r(   r)   r   r"   )r   r
   r   r4   r   rh   rN   rO   baggingr>   r>   r?   test_oob_score_consistency  s    r   c                  C   s   t ddd\} }tt ddddd}|| | |j}|j}|j}t|t|ksVtt|d t| d ksrt|d j	j
d	kstd}|| }|| }|| }	| | d d |f }
|| }|	j}|	|
| |	j}t|| d S )
Nr   r%   r   r#   F)r(   r)   r"   r*   r   r-   r`   )r   r
   r   r4   estimators_samples_r   rf   r   rh   r   kindcoef_r   )rN   rO   r   Zestimators_samplesZestimators_featuresr<   Zestimator_indexZestimator_samplesZestimator_featuresr.   r7   r9   Z
orig_coefsZ	new_coefsr>   r>   r?   test_estimators_samples  s2    r   c                  C   s   t  } | j| j }}ttddt }t|ddd}||| |jd j	d d j
 }|jd }|jd }|jd }|| d d |f }	|| }
||	|
 t|j	d d j
| d S )Nr-   )Zn_componentsr#   r   )r.   r(   r"   r   r%   )r   r1   r2   r   r   r   r
   r4   rf   r   r   copyr   r   r   )r0   rN   rO   Zbase_pipeliner   Zpipeline_estimator_coefr.   Zestimator_sampleZestimator_featurer7   r9   r>   r>   r?   %test_estimators_samples_deterministic  s      


r   c                  C   sH   d} t d|  dd\}}tt | ddd}||| |j| ksDtd S )Nr   r-   r%   r   r#   )r(   r)   r"   )r   r
   r   r4   Z_max_samplesrh   )r(   rN   rO   r   r>   r>   r?   test_max_samples_consistency	  s    r   c                  C   s   d} dgdgdggd }dddgd }dddgd }dddgd }t d	| d
||j}t d	| d
||j}t d	| d
||j}||g||gkstd S )Nr   r   r   r%   ABCr-   T)r   r"   )r
   r4   r   rh   )r"   rN   ZY1ZY2ZY3x1Zx2Zx3r>   r>   r?   !test_set_oob_score_label_encoding  s*       r   c                 C   s"   | j ddd} d| t|  < | S )NfloatT)r   r   )Zastyper~   isfinite)rN   r>   r>   r?   replace2  s    r   c               
   C   s6  t dddgdd dgdt jdgdt jdgdt jdgg} t dddddgt dddgdddgdddgdddgdddggg}|D ]}t }ttt|}|	| |
|  t|}|	| |
| }|j|jkstt }t|}tt |	| | W 5 Q R X t|}tt |	| | W 5 Q R X qd S )Nr%   r   r   r-      	      )r~   arraynaninfNINFr   r   r   r   r4   r5   r   ry   rh   r   r   r   )rN   Zy_valuesrO   Z	regressorpipelineZbagging_regressory_hatr>   r>   r?   *test_bagging_regressor_with_missing_inputs8  s@    



r   c               	   C   s  t dddgdd dgdt jdgdt jdgdt jdgg} t dddddg}t }ttt|}|	| |
|  t|}|	| | |
| }|j|jkst||  ||  t }t|}tt |	| | W 5 Q R X t|}tt |	| | W 5 Q R X d S )Nr%   r   r   r-   r   )r~   r   r   r   r   r   r   r   r   r4   r5   r
   ry   rh   rD   rC   r   r   r   )rN   rO   
classifierr   Zbagging_classifierr   r>   r>   r?   +test_bagging_classifier_with_missing_inputsa  s2    


	


r   c                  C   sD   t ddgddgg} t ddg}tt ddd}|| | d S )Nr%   r-   r   r&   r   g333333?)r)   r"   )r~   r   r
   r   r4   r   r>   r>   r?   test_bagging_small_max_features  s    r   c                  C   sj   t jd} | dd}t d}G dd dt}t| ddd}||| t|j	d j
|jd  d S )Nr      r&   c                   @   s   e Zd ZdZdd ZdS )z8test_bagging_get_estimators_indices.<locals>.MyEstimatorz7An estimator which stores y indices information at fit.c                 S   s
   || _ d S rG   )_sample_indicesrL   r>   r>   r?   r4     s    z<test_bagging_get_estimators_indices.<locals>.MyEstimator.fitN)rS   rT   rU   rV   r4   r>   r>   r>   r?   MyEstimator  s   r   r%   )r.   r/   r"   )r~   randomZRandomStateZrandnZaranger   r   r4   r   rf   r   r   )r6   rN   rO   r   r   r>   r>   r?   #test_bagging_get_estimators_indices  s    
r   zBagging, Estimatorc              	   C   s`   t ddgddgg}t ddg}| | dd}d}tjt|d	 ||| W 5 Q R X d S )
Nr%   r-   r   r&   r   r   )base_estimatorr/   V`base_estimator` was renamed to `estimator` in version 1.2 and will be removed in 1.4.r   )r~   r   r   r   FutureWarningr4   )BaggingZ	EstimatorrN   rO   modelr   r>   r>   r?   'test_base_estimator_argument_deprecated  s    r  r   c              	   C   s^   t ddgddgg}t ddg}|  }||| d}tjt|d |j W 5 Q R X d S )Nr%   r-   r   r&   r   zoAttribute `base_estimator_` was deprecated in version 1.2 and will be removed in 1.4. Use `estimator_` instead.r   )r~   r   r4   r   r   r   Zbase_estimator_)r   rN   rO   r  r   r>   r>   r?   'test_base_estimator_property_deprecated  s    r  c               	   C   sp   t  } | j| j }}tt d}t|ds.td}tjt	|d |
|||}W 5 Q R X |jdksltdS )zSCheck that `BaggingClassifier` delegate to classifier with
    `decision_function`.)r   rE   r   r   )   r   N)r   r1   r2   r
   r   r   rh   r   r   r   r4   rE   ry   )r0   rN   rO   r   r   Z
y_decisionr>   r>   r?   4test_deprecated_base_estimator_has_decision_function  s    r  )r   )erV   	itertoolsr   Znumpyr~   r{   r   Zsklearn.baser   Zsklearn.utils._testingr   r   Zsklearn.dummyr   r   Zsklearn.model_selectionr   r	   Zsklearn.ensembler
   r   Zsklearn.linear_modelr   r   Zsklearn.neighborsr   r   Zsklearn.treer   r   Zsklearn.svmr   r   Zsklearn.random_projectionr   Zsklearn.pipeliner   Zsklearn.feature_selectionr   r   Zsklearn.datasetsr   r   r   Zsklearn.utilsr   Zsklearn.preprocessingr   r   r   Zscipy.sparser   r    r6   r0   Zpermutationr2   r   permr1   rs   r@   markZparametrizerq   rt   rv   rw   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r  r  r  r>   r>   r>   r?   <module>   s   !



):	*#%$),	

() 


