U
    3dr                     @   sz  d Z ddlZddlZddlZddlmZ ddlmZm	Z	m
Z
 ddlmZ ddlmZ ddlmZ ddlmZ dd	lmZ dd
lmZ ddlmZ ddlmZ ddlmZ ddlmZ ddlmZmZ ddlmZ ddlmZ ddlm Z  ddl!m"Z" ddl!m#Z# ddl!m$Z$ ddl!m%Z% ddl&m'Z' ddl(m)Z) ddl*m+Z+m,Z,m-Z- ddl.m/Z/ ddl0m1Z1 ddl0m2Z2 ddl3m4Z4 ddl5m6Z6 ddl7m8Z8 d d!gd!d!gd!d gd"d"gd"d#gd#d"ggZ9d!d!d!d"d"d"gZ:ed$dd%d"fZ;ed$d&d"dd'd&fZ<ed$dd%d"fZ=ed$d#dd(d#fZ>e Z?ej@Ad)ed*e;fed*e<fed+e;fed+e<fed*e=fed+e=fed+e=fed+e=fed+e>fed+e;fed+e<fed+e>fgej@Ad,d-ej@Ad.d"gd"d#gfej@Ad/d0d1d2 ZBd3d4 ZCej@Ad,d#d5gd6d7 ZDej@Ad,d&d5gd8d9 ZEej@Ad:d;d<d=d>d?d@dAgdBdC ZFej@AdDeGdEej@AdFe d+feddGd+feddGdHfeddGd+feddGdHfgdIdJ ZHej@AdKeGd"dLdM ZIej@AdNeddGeddGfej@AdDdOdPdQ ZJej@AdNe eddGedd"dd"dReddGfej@AdSdTdUdV ZKej@AdWejLjMejLjNejOjPejQjRejQjSejOjTfdXdY ZUG dZd[ d[e,e+ZVej@Wd\ej@Ad]edd*d^d.dgid_fe dgd`dadbfeddGdgd`dHdcddfeddGdgd`d*dcddfeddGdgdedadffeV dgd*dadgfeV dgd`dadhfeV dgdidadjfe dgdedkdlfe dgdHdmdndofe dgdHdpdndofe dgdHdkdqfgdrds ZXej@Adtdudvgdwdx ZYej@Adye eddGgej@Ad.d!dzgd{d| ZZej@Adye eddGgd}d~ Z[ej@Adye eddGgdd Z\dd Z]dd Z^dd Z_dd Z`ej@jAdyedddeddEdgddgdej@jAddee# dd dD fe$ dd dD fee# dd dD fddgdddgdej@jAd.dd#gdd dD gddgddd Zaej@jAdde?jbd dfdd#gdfdd dD dfddddgdfgdddddgddd Zcej@Adye e e e gdd Zdej@Adee>fee;fgdd ZedS )z,
Testing for the partial dependence module.
    N)partial_dependence)_grid_from_X_partial_dependence_brute_partial_dependence_recursion)GradientBoostingClassifier)GradientBoostingRegressor)RandomForestRegressor)HistGradientBoostingClassifier)HistGradientBoostingRegressor)LinearRegression)LogisticRegression)MultiTaskLasso)DecisionTreeRegressor)	load_iris)make_classificationmake_regression)KMeans)make_column_transformer)r2_score)PolynomialFeatures)StandardScaler)RobustScaler)scale)make_pipeline)DummyClassifier)BaseEstimatorClassifierMixinclone)NotFittedError)assert_allclose)assert_array_equal)	_IS_32BIT)check_random_state)assert_is_subtree      2   )	n_samplesrandom_state   )r)   	n_classesn_clusters_per_classr*   )r)   	n_targetsr*   zEstimator, method, dataautobrutegrid_resolution)   
   featureskind)average
individualbothc                    s  |  }|\\}}}	|j d }
||| t||||| d}||d  }}|	f fddtt|D }|	|
f fddtt|D }|dkr|jj |kstn:|dkr|jj |kstn |jj |kst|jj |kstt| f}|d k	stt	|j |kstd S )	Nr   )Xr4   methodr5   r1   valuesc                    s   g | ]} qS  r<   .0_r1   r<   T/tmp/pip-unpacked-wheel-zrfo1fqw/sklearn/inspection/tests/test_partial_dependence.py
<listcomp>q   s     z%test_output_shape.<locals>.<listcomp>c                    s   g | ]} qS r<   r<   r=   r@   r<   rA   rB   u   s     r6   r7   )
shapefitr   rangelenr6   AssertionErrorr7   npasarray)	Estimatorr:   datar1   r4   r5   estr9   yr.   Zn_instancesresultpdpaxesZexpected_pdp_shapeZexpected_ice_shapeZexpected_axes_shaper<   r@   rA   test_output_shapeC   s8    
 rQ   c                  C   sN  d} d}ddg}t ddgddgg}t|| ||\}}t|ddgddgddgddgg t||j t jd}d	}|jd
d}t|| ||d\}}|j|| |jd fkst	t |jd|fkst	d}d||d d df< |
| t|| ||d\}}|j|| |jd fkst	|d j|fks4t	|d j|fksJt	d S )N皙?ffffff?d   Fr&   r'   r+      r      )   r'   sizer@      90  )rH   rI   r   r    TrandomRandomStatenormalrC   rG   shuffle)percentilesr1   is_categoricalr9   gridrP   rngZn_unique_valuesr<   r<   rA   test_grid_from_X   s<    "   

   
rf   rU   c              
   C   sr   t d}d}dg}|dddddddd	gi}t|||| d
\}}|jd|jd fks\t|d jdksntdS )jCheck that `_grid_from_X` always sample from categories and does not
    depend from the percentiles.
    pandasrR   TZcat_featureABCDEr@   r2   r&   r   )r2   N)pytestimportorskip	DataFramer   rC   rG   )r1   pdrb   rc   r9   rd   rP   r<   r<   rA   !test_grid_from_X_with_categorical   s    
   
rr   c                 C   s   t d}d}ddg}|ddddddd	dddg
d
d
d
dddddddg
d}| }t|||| d\}}| dkr|jdkst|d jd |d kst|d
 jd | kstnB|jdkst|d jd |d kst|d
 jd |d kstdS )rg   rh   rR   TFri   rj   rk   rl   rm   r&   r'   r2         )catnumr@   r+   )rW   r'   r   rv   )   r'   ru   N)rn   ro   rp   nuniquer   rC   rG   )r1   rq   rb   rc   r9   rx   rd   rP   r<   r<   rA   #test_grid_from_X_heterogeneous_type   s,    
   
ry   z%grid_resolution, percentiles, err_msg)r'   )r   g-C6?zpercentiles are too close)rU   )r&   r'   r+   rV   .'percentiles' must be a sequence of 2 elements)rU   r\   rz   )rU   )r%   rT   ('percentiles' values must be in \[0, 1\])rU   )rS   r'   r{   )rU   )g?皙?z+percentiles\[0\] must be strictly less than)r&   rR   z1'grid_resolution' must be strictly greater than 1c              	   C   sH   t ddgddgg}dg}tjt|d t||||  W 5 Q R X d S )Nr&   r'   r+   rV   Fmatch)rH   rI   rn   raises
ValueErrorr   )r1   rb   err_msgr9   rc   r<   r<   rA   test_grid_from_X_error   s    r   target_featurer2   zest, methodr*   	recursionc                 C   s   t dddd\}}||  }| || tj|gtjd}tdgdgg}|dkrnt| |||dd	\}}nt| ||}g }	d
D ]0}
| }|
|d d |f< |		| 
|  q|d }|dkrdnd}tj||	|dstd S )Nr   r2   )r*   
n_featuresZn_informativedtype      ?{   r0   r/   )response_method)r   r   r   r|   gMbP?)rtol)r   meanrD   rH   arrayint32r   r   copyappendpredictZallcloserG   )rL   r:   r   r9   rM   r4   rd   rO   ZpredictionsZmean_predictionsvalZX_r   r<   r<   rA   test_partial_dependence_helpers   s,        r   seedc                 C   sr  t j| }d}d}|||}||d }||  }d}d}tdd d||d}t|t t j	j
}	tddd||	d	}
t||	d
}||| |
|| ||| z(t|j|
d j t|j|d j W n" tk
r   tstdY d S X |ddd}t|D ]X}t j|gt j	d}t|||}t|
||}t|||}t j|| t j|| qd S )N  r2   r3   r   r&   F)n_estimatorsZmax_featuresZ	bootstrap	max_depthr*   Zsquared_error)r   Zlearning_rateZ	criterionr   r*   )r   r*   )r   r   z)this should only fail on 32 bit platformsr(   r%   r   )rH   r^   r_   Zrandnr   r   r"   randintZiinfor   maxr   r   rD   r#   Ztree_rG   r!   reshaperE   r   r   Ztestingr   )r   re   r)   r   r9   rM   r   Z	tree_seedZforestZequiv_random_stateZgbdttreerd   fr4   Z
pdp_forestZpdp_gbdtZpdp_treer<   r<   rA   /test_recursion_decision_tree_vs_forest_and_gbdt(  sR    
r   rL   )r   r&   r'   r+   rV   r2   c                 C   sv   t dddd\}}t|dks$t| || t| ||gdddd}t| ||gdd	dd}t|d |d d
d d S )Nr'   r&   r,   r-   r*   r   decision_functionr   r6   )r   r:   r5   r0   gHz>)Zatol)r   rH   r   rG   rD   r   r   )rL   r   r9   rM   Zpreds_1Zpreds_2r<   r<   rA    test_recursion_decision_functiono  s(    	r   )r*   Zmin_samples_leafZmax_leaf_nodesmax_iterpower)r&   r'   c                 C   s   t jd}d}d}|j|dfd}|d d |f | }| || t| |g|ddd}|d	 d d
d}|d d }	t|d|}t	 ||	}
t
|	|
|}|dkstd S )Nr      r'   r2   rY   r   r6   )r4   r9   r1   r5   r;   r%   r&   )ZdegreeGz?)rH   r^   r_   r`   rD   r   r   r   fit_transformr   r   r   rG   )rL   r   re   r)   Ztarget_variabler9   rM   rO   Znew_XZnew_ylrZr2r<   r<   rA   #test_partial_dependence_easy_target  s&        r   rJ   c              	   C   s`   t dddd\}}t||gj}|  }||| tjtdd t||dg W 5 Q R X d S )Nr+   r&   r   r   z3Multiclass-multioutput estimators are not supportedr}   )	r   rH   r   r]   rD   rn   r   r   r   )rJ   r9   rM   rL   r<   r<   rA   test_multiclass_multioutput  s     r   c                   @   s   e Zd Zdd ZdS ) NoPredictProbaNoDecisionFunctionc                 C   s   ddg| _ | S )Nr   r&   )Zclasses_)selfr9   rM   r<   r<   rA   rD     s    
z$NoPredictProbaNoDecisionFunction.fitN)__name__
__module____qualname__rD   r<   r<   r<   rA   r     s   r   zignore:A Bunch will be returnedzestimator, params, err_msg)r*   Zn_initz4'estimator' must be a fitted regressor or classifierZpredict_proba)r4   r   z7The response_method parameter is ignored for regressors)r4   r   r:   zC'recursion' method, the response_method must be 'decision_function'Zblahblahz=response_method blahblah is invalid. Accepted response_methodzBThe estimator has no predict_proba and no decision_function methodz*The estimator has no predict_proba method.r   z.The estimator has no decision_function method.)r4   r:   zEblahblah is invalid. Accepted method names are brute, recursion, autor7   )r4   r:   r5   zCThe 'recursion' method only applies when 'kind' is set to 'average'r8   z=Only the following estimators support the 'recursion' method:c              	   C   sF   t dd\}}| || tjt|d t| |f| W 5 Q R X d S )Nr   r   r}   r   rD   rn   r   r   r   )	estimatorparamsr   r9   rM   r<   r<   rA   test_partial_dependence_error  s    Gr   zwith_dataframe, err_msg)T'Only array-like or scalar are supported)Fr   c              	   C   sh   t dd\}}| r&td}||}t ||}tjt|d t||t	dddd W 5 Q R X d S )Nr   r   rh   r}   r'   r&   r4   )
r   rn   ro   rp   r   rD   r   	TypeErrorr   slice)Zwith_dataframer   r9   rM   rq   r   r<   r<   rA   #test_partial_dependence_slice_error0  s    

r   r   i'  c              	   C   sJ   t dd\}}| || d}tjt|d t| ||g W 5 Q R X d S )Nr   r   zall features must be inr}   r   )r   r4   r9   rM   r   r<   r<   rA   /test_partial_dependence_unknown_feature_indicesB  s
    r   c              	   C   sb   t d}tdd\}}||}| || dg}d}t jt|d t| || W 5 Q R X d S )Nrh   r   r   r^   z/A given column is not a column of the dataframer}   )rn   ro   r   rp   rD   r   r   r   )r   rq   r9   rM   dfr4   r   r<   r<   rA   .test_partial_dependence_unknown_feature_stringO  s    

r   c                 C   s4   t dd\}}| || t| t|dgdd d S )Nr   r   r6   )r5   )r   rD   r   list)r   r9   rM   r<   r<   rA   test_partial_dependence_X_list^  s    r   c               	   C   sz   t t dd} | tt tjtdd t| tdgddd W 5 Q R X tjtdd t| tdgddd W 5 Q R X d S )Nr   )initr*   z9Using recursion method with a non-constant init predictorr}   r   r6   )r:   r5   )	r   r   rD   r9   rM   rn   ZwarnsUserWarningr   )Zgbcr<   r<   rA   (test_warning_recursion_non_constant_inith  s      r   c            	      C   s   d} t jd}|jd| td}|| }| }||   || < t j||f }t | }d||< t	ddd}|j
|||d	 t||dgd
d}t |d
 |d d dkstd S )Nr   i@ r'   )rZ   r   g     @@r3   r&   )r   r*   sample_weightr6   )r4   r5   r;   )r   r&   r   )rH   r^   r_   r   boolZrandr   Zc_onesr   rD   r   ZcorrcoefrG   )	Nre   maskxrM   r9   r   clfrO   r<   r<   rA   %test_partial_dependence_sample_weightz  s    

r   c               	   C   sR   t dd} | jtttttd tjt	dd t
| tdgd W 5 Q R X d S )Nr&   r   r   z#does not support partial dependencer}   r   )r
   rD   r9   rM   rH   r   rF   rn   r   NotImplementedErrorr   )r   r<   r<   rA   test_hist_gbdt_sw_not_supported  s    
 r   c                  C   s   t  } t }tdd}t||}||| j| j || j| j d}t|| j|gddd}t||	| j|gddd}t
|d |d  t
|d d |d d |j|  |j|   d S )N*   r   r   r3   r6   r4   r1   r5   r;   )r   r   r   r   rD   r   rK   targetr   Z	transformr   scale_mean_)irisscalerr   piper4   pdp_pipepdp_clfr<   r<   rA    test_partial_dependence_pipeline  s4    

    

r   r   r   r*   )r*   r   zestimator-brutezestimator-recursion)idspreprocessorc                 C   s   g | ]}t j| qS r<   r   feature_namesr>   ir<   r<   rA   rB     s     rB   r   r'   c                 C   s   g | ]}t j| qS r<   r   r   r<   r<   rA   rB     s     r&   r+   c                 C   s   g | ]}t j| qS r<   r   r   r<   r<   rA   rB     s     Zpassthrough)	remainderNonezcolumn-transformerzcolumn-transformer-passthroughc                 C   s   g | ]}t j| qS r<   r   r   r<   r<   rA   rB     s     zfeatures-integerzfeatures-stringc                 C   s  t d}|jttjtjd}t|| }||tj	 t
|||ddd}|d k	rjt||}ddg}n|}ddg}t| |tj	}	t
|	||d	ddd
}
t|d |
d  |d k	r|jd }t|d d |
d d |jd  |jd   nt|d d |
d d  d S )Nrh   columnsr3   r6   r   r   r&   r'   r0   )r4   r:   r1   r5   Zstandardscalerr;   )rn   ro   rp   r   r   rK   r   r   rD   r   r   r   r   r   Znamed_transformers_r   r   )r   r   r4   rq   r   r   r   ZX_procZfeatures_clfr   r   r   r<   r<   rA   !test_partial_dependence_dataframe  sB    

    
	

r   zfeatures, expected_pd_shape)r   r+   r3   r   )r+   r3   r3   c                 C   s   g | ]}t j| qS r<   r   r   r<   r<   rA   rB   	  s     TFz
scalar-intz
scalar-strzlist-intzlist-strr   c                 C   s   t d}|jtjtjd}tt dd dD ft dd dD f}t	|t
dd	d
}||tj t||| ddd}|d j|kstt|d t|d jd kstd S )Nrh   r   c                 S   s   g | ]}t j| qS r<   r   r   r<   r<   rA   rB     s     z8test_partial_dependence_feature_type.<locals>.<listcomp>r   c                 S   s   g | ]}t j| qS r<   r   r   r<   r<   rA   rB     s     r   r   r   r   r3   r6   r   r;   r&   )rn   ro   rp   r   rK   r   r   r   r   r   r   rD   r   r   rC   rG   rF   )r4   Zexpected_pd_shaperq   r   r   r   r   r<   r<   rA   $test_partial_dependence_feature_type  s(    
 
    r   c              	   C   s   t j}tt ddgft ddgf}t|| }tjtdd t	||ddgdd W 5 Q R X tjtdd t	| |ddgdd W 5 Q R X d S )	Nr   r'   r&   r+   zis not fitted yetr}   r3   )r4   r1   )
r   rK   r   r   r   r   rn   r   r   r   )r   r9   r   r   r<   r<   rA    test_partial_dependence_unfitted"  s    
 
r   zEstimator, datac           	      C   sj   |  }|\\}}}| || t||ddgdd}t||ddgdd}tj|d dd}t||d  d S )Nr&   r'   r6   )r9   r4   r5   r7   )Zaxis)rD   r   rH   r   r   )	rJ   rK   rL   r9   rM   r.   Zpdp_avgZpdp_indZavg_indr<   r<   rA   +test_kind_average_and_average_of_individual7  s    r   )f__doc__ZnumpyrH   rn   ZsklearnZsklearn.inspectionr   Z&sklearn.inspection._partial_dependencer   r   r   Zsklearn.ensembler   r   r   r	   r
   Zsklearn.linear_modelr   r   r   Zsklearn.treer   Zsklearn.datasetsr   r   r   Zsklearn.clusterr   Zsklearn.composer   Zsklearn.metricsr   Zsklearn.preprocessingr   r   r   r   Zsklearn.pipeliner   Zsklearn.dummyr   Zsklearn.baser   r   r   Zsklearn.exceptionsr   Zsklearn.utils._testingr   r    Zsklearn.utilsr!   Zsklearn.utils.validationr"   Zsklearn.tree.tests.test_treer#   r9   rM   Zbinary_classification_dataZmulticlass_classification_dataZregression_dataZmultioutput_regression_datar   markZparametrizerQ   rf   rr   ry   r   rE   r   r   r   r   r   ZDecisionTreeClassifierZExtraTreeClassifierZensembleZExtraTreesClassifierZ	neighborsZKNeighborsClassifierZRadiusNeighborsClassifierZRandomForestClassifierr   r   filterwarningsr   r   r   r   r   r   r   r   r   r   r   r   r   r   r<   r<   r<   rA   <module>   s(  (   -'



,
F   










E
 	 
 


+

	
