U
    3d$o                     @   s  d Z ddlZddlZddlmZ ddlmZ ddlm	Z	 ddlm
Z
 ddlmZ ddlmZ ddlmZ dd	lmZ dd
lmZ ddlmZ ddlmZ ddlmZ ddlmZ ddlmZ ddlmZ ddlmZ ddlmZ ddlmZ ddlmZ ddlmZ ddlm Z  ddlm!Z! ddl"m#Z# ddl"m$Z$ ddl%m&Z& ddl'm(Z( ddl)m*Z* ddl"m+Z+ ddl"m,Z, ddl-m.Z. dd l-m/Z/ dd!l-m0Z0 dd"l1m2Z2 dd#l3m4Z4 dd$l3m5Z5 dd%l3m6Z6 dd&lm7Z7 dd'l8m9Z9 e Z:e:j;e:j< Z=Z>e Z?e?j;e?j< Z@ZAed(d)d*\ZBZCed+d)d*\ZDZEejFGd,d(e/d(d-d)d.gejFGd/de#d)d0gejFGd1d2d-gd3d4 ZHd5d6 ZId7d8 ZJd9d: ZKejFGd,d(e0d(d-d)d.gejFGd;di fe$d)d0i fe d<d-ifgejFGd1d2d-gd=d> ZLejFGd?d@dAdBgdCdD ZMejFGd?d@dAdBgdEdF ZNdGdH ZOG dIdJ dJee	ZPG dKdL dLe
e	ZQejFGdMeAdNg ieRdOfeAdPe fdQe!dRdSfgdTdUeRdVfeAdNdPe fdWeQ fgieSdXfeAdPe fdWedRdSfgeQ dYeSdXfgdZd[ ZTejFGdMe>dNg ieRdOfe>dNdPe fdWeP fgieSdXfe>dPe fdWe  fgeP dYeSdXfgd\d] ZUejFjGd^e+dPedd0fdQedd0fgd_e@dd` eAdd` fe,dPe fdQe dd0fgd_e=e>fgdadbgdcddde ZVdfdg ZWejFjGdhe+dPe fdQed)d0fge e0d-d)didjfed-dke,dPe fdQe d)d0fge e0d-d)didje=e>fgdadbgdcdldm ZXdndo ZYejFZdpejFjGdhe+dPe fdQed)d0fge dYfed-dke,dPe fdQe d)d0fge dYe=e>fgdadbgdcdqdr Z[ejFGdse+edTed)d0e@eAfe,edte e=e>fgdudv Z\ejFGdhe+dPe fdQe! fgdwdxe@eAfe,dPe fdQe  fgdwdxe=e>fgdydz Z]ejFGd{ee+efee,efgd|d} Z^ejFjGd~e(d)d0e#d)d0gddgdcdd Z_dd Z`ejFGdddtgejFGd1d2d-gdd ZaejFjGde+dPedd0fdQedd0fgd_e?jbe@eAddddddgfe+dPedd0fddQedd0fgd_e?jbe@dd` eAdd` ddgfe,dPe fdQe dd0fgd_e:jbe=e>ddgfgdddbgdcejFGd1d-d2gdd Zcdd ZddS )z+Test the stacking classifier and regressor.    N)assert_array_equal)BaseEstimator)ClassifierMixin)RegressorMixin)clone)ConvergenceWarning)	load_iris)load_diabetes)load_breast_cancer)make_regression)make_classification)make_multilabel_classification)DummyClassifier)DummyRegressor)LogisticRegression)LinearRegression)Ridge)RidgeClassifier)	LinearSVC)	LinearSVR)SVC)RandomForestClassifier)RandomForestRegressor)KNeighborsClassifier)MLPClassifier)scale)StackingClassifier)StackingRegressor)train_test_split)StratifiedKFold)KFold)CheckingClassifier)assert_allclose)assert_allclose_dense_sparse)ignore_warnings)NotFittedError)Mock   *   )Z	n_classesrandom_state   cvT)Zn_splitsshuffler)   final_estimatorr)   passthroughFc                 C   sH  t ttttdd\}}}}dt fdt fg}t||| |d}||| || |	| |
||dkstt||}	|rdnd}
|	jd	 |
kst|rt||	d d d
d f  |jdd ||| || |	| |d kr|| ||}	|rdnd}|	jd	 |ks$t|rDt||	d d d
d f  d S )Nr(   Zstratifyr)   lrsvc
estimatorsr-   r+   r/   皙?
         dropr1      r'   )r   r   X_irisy_irisr   r   r   fitpredictpredict_probascoreAssertionError	transformshaper"   
set_paramsdecision_function)r+   r-   r/   X_trainX_testy_trainy_testr4   clfX_transexpected_column_countexpected_column_count_drop rP   H/tmp/pip-unpacked-wheel-zrfo1fqw/sklearn/ensemble/tests/test_stacking.pytest_stacking_classifier_irisB   sB    
   






rR   c            	      C   s   t dd\} }tt| ||dd\}}}}dt fdtddfg}t|dd	}||| ||}|jd
 dksvt	dt fdt
 fg}|j|d ||| ||}|jd
 dkst	d S )NTZ
return_X_yr(   r0   r1   rfr.   r'   r4   r+   r8   r*   r2   r4   )r
   r   r   r   r   r   r?   rD   rE   rC   r   rF   )	XyrH   rI   rJ   _r4   rL   rM   rP   rP   rQ   :test_stacking_classifier_drop_column_binary_classificationp   s&       

rZ   c                  C   s   t ttttdd\} }}}ddtddfg}tddd}tdtddfg|d	d
}t||d	d
}|| | || | t|	||	| t|
||
| t|||| d S )Nr(   r0   r1   r:   r2   r   r.   r6   Zn_estimatorsr)      r4   r-   r+   )r   r   r=   r>   r   r   r   r?   r"   r@   rA   rD   )rH   rI   rJ   rY   r4   rT   rL   Zclf_droprP   rP   rQ   'test_stacking_classifier_drop_estimator   s&         r_   c                  C   s   t tttdd\} }}}ddtddfg}tddd}tdtddfg|dd	}t||dd	}|| | || | t|	||	| t|
||
| d S )
Nr(   r.   r[   svrr   r6   r\   r]   r^   )r   r   
X_diabetes
y_diabetesr   r   r   r?   r"   r@   rD   )rH   rI   rJ   rY   r4   rT   regZreg_droprP   rP   rQ   &test_stacking_regressor_drop_estimator   s"        rd   zfinal_estimator, predict_paramsZ
return_stdc                 C   s.  t tttdd\}}}}dt fdt fg}t||| |d}	|	|| |	j|f|}
|r`dnd}|rxt	|
|ksxt
|	|}|rdnd}|jd |kst
|rt||d d d	d f  |	jd
d |	|| |	| |	|}|rdnd}|jd |ks
t
|r*t||d d d	d f  d S )Nr(   r.   r1   r`   r3   r*   r8      r:   r;      )r   r   ra   rb   r   r   r   r?   r@   lenrC   rD   rE   r"   rF   )r+   r-   Zpredict_paramsr/   rH   rI   rJ   rY   r4   rc   resultZexpected_result_lengthrM   rN   rO   rP   rP   rQ    test_stacking_regressor_diabetes   s<      


rj   fmtZcscZcsrZcooc           	      C   s   t ttt| tdd\}}}}dt fdt fg}t	ddd}t
||ddd	}||| ||}t||d d d
d f  t|st|j|jkstd S )Nr(   r.   r1   r`   r6   r\   r]   Tr3   rf   )r   sparse
coo_matrixr   ra   asformatrb   r   r   r   r   r?   rD   r#   issparserC   format	rk   rH   rI   rJ   rY   r4   rT   rL   rM   rP   rP   rQ   *test_stacking_regressor_sparse_passthrough   s$         
rr   c           	      C   s   t ttt| tdd\}}}}dt fdt fg}t	ddd}t
||ddd	}||| ||}t||d d d
d f  t|st|j|jkstd S )Nr(   r.   r1   r2   r6   r\   r]   Tr3   r9   )r   rl   rm   r   r=   rn   r>   r   r   r   r   r?   rD   r#   ro   rC   rp   rq   rP   rP   rQ   +test_stacking_classifier_sparse_passthrough   s$         
rs   c                  C   sh   t td d td d  } }dt fdt fg}t|d}|| | || }|jd dksdt	d S )Nd   r1   rT   rV   r8   r*   )
r   r=   r>   r   r   r   r?   rD   rE   rC   )ZX_Zy_r4   rL   ZX_metarP   rP   rQ   )test_stacking_classifier_drop_binary_prob  s    

ru   c                   @   s   e Zd Zdd Zdd ZdS )NoWeightRegressorc                 C   s   t  | _| j||S N)r   rc   r?   selfrW   rX   rP   rP   rQ   r?     s    zNoWeightRegressor.fitc                 C   s   t |jd S )Nr   )nponesrE   )ry   rW   rP   rP   rQ   r@     s    zNoWeightRegressor.predictN)__name__
__module____qualname__r?   r@   rP   rP   rP   rQ   rv     s   rv   c                   @   s   e Zd Zdd ZdS )NoWeightClassifierc                 C   s   t dd| _| j||S )NZ
stratified)Zstrategy)r   rL   r?   rx   rP   rP   rQ   r?     s    zNoWeightClassifier.fitN)r|   r}   r~   r?   rP   rP   rP   rQ   r     s   r   zy, params, type_err, msg_errr4   zInvalid 'estimators' attribute,r1   svmiP  Zmax_iterrA   )r4   stack_methodz+does not implement the method predict_probaZcorzdoes not support sample weightr4   r-   c              	   C   sP   t j||d8 tf |ddi}|jtt| ttjd d W 5 Q R X d S Nmatchr+   r'   r   Zsample_weight)	pytestraisesr   r?   r   r=   rz   r{   rE   )rX   paramstype_errmsg_errrL   rP   rP   rQ   test_stacking_classifier_error!  s    *r   c              	   C   sP   t j||d8 tf |ddi}|jtt| ttjd d W 5 Q R X d S r   )	r   r   r   r?   r   ra   rz   r{   rE   )rX   r   r   r   rc   rP   rP   rQ   test_stacking_regressor_errorP  s    r   zestimator, X, yrV   rt   r   r   )idsc                 C   s   t | }|jtdtjddd t | }|jdd |jtdtjddd t||||d d dd f |||| d S )NTr   r,   r)   r+   r:   r;   r8   )	r   rF   r    rz   randomZRandomStater"   r?   rD   )	estimatorrW   rX   Zestimator_fullZestimator_droprP   rP   rQ   test_stacking_randomnessk  s     r   c                  C   s2   t dtddfdtddfgd} | tt d S )Nr1   i'  r   r   rV   )r   r   r   r?   r=   r>   )rL   rP   rP   rQ   )test_stacking_classifier_stratify_default  s    r   zstacker, X, yr   r^   rS   c              	   C   s  t |d }tdg| dgt ||   }t|||dd\}}}}}	}ttd | || W 5 Q R X | |}
ttd | j||t|j	d W 5 Q R X | |}t
|
| ttd | j|||	d W 5 Q R X | |}t|
|  dkstd S )	Nr*   g?g?r(   r.   )categoryr   r   )rh   rz   arrayr   r$   r   r?   r@   r{   rE   r"   abssumrC   )stackerrW   rX   Zn_half_samplesZtotal_sample_weightrH   rI   rJ   rY   Zsample_weight_trainZy_pred_no_weightZy_pred_unit_weightZy_pred_biasedrP   rP   rQ    test_stacking_with_sample_weight  s*    !   
"


r   c                  C   s>   t dtddfgtddd} | jttttjd d d S )Nr1   T)Zexpected_sample_weightr   r   r   )r   r!   r?   r=   r>   rz   r{   rE   )r   rP   rP   rQ   0test_stacking_classifier_sample_weight_fit_param  s
    r   z-ignore::sklearn.exceptions.ConvergenceWarningc              	   C   s   t | }t | }|jdd |jdd ||| ||| t|j|jD ]\}}t|j|j qNtjt	dd t|j
j|j
j W 5 Q R X d S )Nr'   r   r]   z	Not equalr   )r   rF   r?   zipestimators_r"   Zcoef_r   r   rC   Zfinal_estimator_)r   rW   rX   Zstacker_cv_3Zstacker_cv_5Zest_cv_3Zest_cv_5rP   rP   rQ   test_stacking_cv_influence  s    ! r   z7Stacker, Estimator, stack_method, final_estimator, X, yr@   c                 C   s   t ||ddd\}}}}	d| ||fd| ||fg}
|
D ],\}}t |_t||}t||t|d q@| |
d|d}|||	 |jd	d
 |
D ksttdd |jD st|jD ]}t||}|| qdS )z2Check the behaviour of stacking when `cv='prefit'`r(   g      ?)r)   Z	test_sizeZd0d1)Zside_effectprefit)r4   r+   r-   c                 S   s   g | ]\}}|qS rP   rP   ).0rY   r   rP   rP   rQ   
<listcomp>H  s     z(test_stacking_prefit.<locals>.<listcomp>c                 s   s   | ]}|j jd kV  qdS )r   N)r?   Z
call_count)r   r   rP   rP   rQ   	<genexpr>J  s     z'test_stacking_prefit.<locals>.<genexpr>N)	r   r?   r&   getattrsetattrr   rC   allZassert_called_with)ZStacker	Estimatorr   r-   rW   rX   ZX_train1ZX_train2Zy_train1Zy_train2r4   rY   r   Z
stack_funcr   Zstack_func_mockrP   rP   rQ   test_stacking_prefit  s0       
  

r   r   rU   c              	   C   s&   t t | || W 5 Q R X d S rw   )r   r   r%   r?   )r   rW   rX   rP   rP   rQ   test_stacking_prefit_errorR  s    r   z!make_dataset, Stacking, Estimatorc              	   C   s   G dd d|}| ddd\}}|d| fgd}|j  d}tjt|d	 |j W 5 Q R X ||| d
}tjt|d	 |j W 5 Q R X d S )Nc                       s    e Zd ZdZ fddZ  ZS )z8test_stacking_without_n_features_in.<locals>.MyEstimatorz Estimator without n_features_in_c                    s   t  || | `d S rw   )superr?   n_features_in_rx   	__class__rP   rQ   r?     s    z<test_stacking_without_n_features_in.<locals>.MyEstimator.fit)r|   r}   r~   __doc__r?   __classcell__rP   rP   r   rQ   MyEstimator|  s   r   r   rt   )r)   Z	n_samplesr1   rV   z' object has no attribute n_features_in_r   z6'MyEstimator' object has no attribute 'n_features_in_')r|   r   r   AttributeErrorr   r?   )Zmake_datasetZStackingr   r   rW   rX   r   msgrP   rP   rQ   #test_stacking_without_n_features_inq  s    r   r   r   r   c           
      C   s   t tttdd\}}}}d}d| fg}t|t dd||}||}|j|jd |fks`tt	t
|jdd	d
r|t||}	|	j|jkstdS )zCheck the behaviour for the multilabel classification case and the
    `predict_proba` stacking method.

    Estimators are not consistent with the output arrays and we need to ensure that
    we handle all cases.
    r(   r0   r'   estrA   r4   r-   r   r   r8   )Zaxisg      ?N)r   X_multilabely_multilabelr   r   r?   rD   rE   rC   anyrz   iscloser   r@   )
r   rH   rI   rJ   rK   	n_outputsr4   r   rM   y_predrP   rP   rQ   1test_stacking_classifier_multilabel_predict_proba  s*       
 

r   c            	      C   s   t tttdd\} }}}d}dt fg}t|t dd| |}||}|j|jd |fksbt	|
|}|j|jks|t	dS )	zCheck the behaviour for the multilabel classification case and the
    `decision_function` stacking method. Only `RidgeClassifier` supports this
    case.
    r(   r0   r'   r   rG   r   r   N)r   r   r   r   r   r   r?   rD   rE   rC   r@   )	rH   rI   rJ   rK   r   r4   r   rM   r   rP   rP   rQ   5test_stacking_classifier_multilabel_decision_function  s(        

r   r   autoc                 C   s  t tttdd\}}}}| }d}dtddfdtddfdt fg}t }	t||	|| d	||}
t
|| |
|}|j|jkst| d	krd
d
dg}ndgt| }|
j|kst|t| }|r||jd 7 }|
|}|j|jd |fkstt
|
jtddgg|  dS )zCheck the behaviour for the multilabel classification case for stack methods
    supported for all estimators or automatically picked up.
    r(   r0   r'   Zmlpr.   rT   ridge)r4   r-   r/   r   r   rA   rG   r@   r8   r   N)r   r   r   copyr   r   r   r   r   r?   r   r@   rE   rC   rh   Zstack_method_rD   Zclasses_rz   r   )r   r/   rH   rI   rJ   rK   Zy_train_before_fitr   r4   r-   rL   r   Zexpected_stack_methodsZn_features_X_transrM   rP   rP   rQ   0test_stacking_classifier_multilabel_auto_predict  sF        


r   z,stacker, feature_names, X, y, expected_namesZstackingclassifier_lr0Zstackingclassifier_lr1Zstackingclassifier_lr2Zstackingclassifier_svm0Zstackingclassifier_svm1Zstackingclassifier_svm2)otherr:   Zstackingclassifier_lrZstackingclassifier_svmZstackingregressor_lrZstackingregressor_svmZStackingClassifier_multiclassZStackingClassifier_binaryc                 C   sF   | j |d | t|| |r.t||f}| |}t|| dS )z/Check get_feature_names_out works for stacking.)r/   N)rF   r?   r   rz   ZconcatenateZget_feature_names_outr   )r   feature_namesrW   rX   Zexpected_namesr/   Z	names_outrP   rP   rQ   test_get_feature_names_out   s    B
r   c                  C   sf   t ttttdd\} }}}tdt fgd}|| | || || |	||dksbt
dS )zNCheck that a regressor can be used as the first layer in `StackingClassifier`.r(   r0   r   rV   r5   N)r   r   r=   r>   r   r   r?   r@   rA   rB   rC   )rH   rI   rJ   rK   rL   rP   rP   rQ   'test_stacking_classifier_base_regressorL  s       

r   )er   r   Znumpyrz   Znumpy.testingr   Zscipy.sparserl   Zsklearn.baser   r   r   r   Zsklearn.exceptionsr   Zsklearn.datasetsr   r	   r
   r   r   r   Zsklearn.dummyr   r   Zsklearn.linear_modelr   r   r   r   Zsklearn.svmr   r   r   Zsklearn.ensembler   r   Zsklearn.neighborsr   Zsklearn.neural_networkr   Zsklearn.preprocessingr   r   r   Zsklearn.model_selectionr   r   r    Zsklearn.utils._mockingr!   Zsklearn.utils._testingr"   r#   r$   r%   Zunittest.mockr&   Zdiabetesdatatargetra   rb   Zirisr=   r>   r   r   ZX_binaryZy_binarymarkZparametrizerR   rZ   r_   rd   rj   rr   rs   ru   rv   r   
ValueError	TypeErrorr   r   r   r   r   r   filterwarningsr   r   r   r   r   r   r   r   r   r   rP   rP   rP   rQ   <module>   sx   
  '$

	)





	

	
	
	


-

5<