U
    3dJ                     @   sD
  U d Z ddlZddlZddlmZ ddlZddlmZ ddlmZ ddlm	Z	 ddl
mZmZ ddlZddlmZ dd	lmZ dd
lmZ ddlmZ ddlZddlZddlZddlmZ ddlmZ ddlmZ ddlmZ ddlmZ ddlm Z  ddlm!Z! ddlm"Z" ddl#m$Z$ ddlm%Z% ddl&m'Z' ddl(m)Z) ddl*m+Z+ ddl*m,Z, ddl*m-Z- ddl*m.Z. ddl*m/Z/ ddl0m1Z1m2Z2 ddl0m3Z3 ddl4m5Z5 dd l6m7Z7 dd!l8m9Z9 dd"lm:Z: dd#l;m<Z< d$d%gd%d%gd%d$gd&d&gd&d'gd'd&ggZ=d%d%d%d&d&d&gZ>d%d%gd'd'gd(d'ggZ?d%d&d&gZ@e%j)d)d*d(ddd+dd,\ZAZBe%C ZDe9dZEeEFeDjGjHZIeDjJeI eD_JeDjGeI eD_Ge%jKd)d*d&d-\ZLZMe%jNd.d&d/\ZOZPeOQejRZOejST d jUZVe+e-d0ZWe,e.d1ZXd2e/iZYeZ Z[ee\ef e]d3< e[^eW e[^eX e[^eY eW_ Z`ee\ef e]d4< e`^eX d5d6 Zaejbcd7eWd8d9 Zdd:d; Zeejbcd7eWejbcd<d=d>d? Zfd@dA Zgejbcd7eXejbcd<dBdCdD ZhdEdF Ziejbcd<dGdHdI ZjdJdK Zkejbcd7eXdLdM ZldNdO Zmejbcd7eWdPdQ ZndRdS ZoejbcdTejpejRfejbcdUeqe	eWdVdWge	eXdXdYdZgd[d\ Zrd]d^ Zsejbcd7e[d_d` ZtejbcdaeWu ejbcdbdcdddegejbcdfe%j)dgd'ddhde%j)djd(dkddldeDjJeDjGd' d& dmfe%jvdgdd/dgdodp ZwejbcdqeXu ejbcdbdcdddegejbcdre%jKd)d*d&ddsde%jKd)d*d'ddsdgdvdw Zxejbcdxe`u dydz Zyejbcdxe`u ejbcd{eDjJeDjGd|d+d}d~feDjJeEjzddeDjJj{d d'fdd|d|d}dfgdd Z|ejbcdd|d+gdd Z}dd Z~ejbcd7eWdd Zdd Zejbcd7e`dd Zdd Zejbcd7e`dd Zdd Zejbcd7e`dd Zejbcd7eWdd Zdd Zejbcd7eWdd Zdd Zdd Ze!dd Zdd Zdd Zdd Zdd Zejbcd7e[dd Zdd Zejbcd7e[dd Zdd Zejbcd7e[dd Zdd Zejbcd7e[dd Zdd Zejbcd7e[ejbcdeeefdd Zdd Zejbcd7e`ejbcdTejpejRfdd Ze!ddÄ Zejbcd7e[ddń ZddǄ Zejbcd7eWddɄ Zdd˄ Zejbcd7eWdd̈́ Zddτ Zejbcd7eWddф ZdddԄZejbcd7e[ddք Zdd؄ Zejbcd7e[ddڄ Zdd܄ Zejbcd7e[ddބ Zdd Zejbcd7e[dd Zdd Zejbcd7e`dd ZdddZdd Zejbcd7e`dd Zdd Zdd ZG dd deVZede e"dd Zdd Zdd Zejbcd7e`dd Zejbcd7e`dd Zejbcd7eXdd  Zejbcd7eWdd Zdd Zejbcde-e.gdd Zejbcde+e,e-e.gd	d
 ZejbcdeXdd Zdd Zejbcd7e[dd Zdd ZdS (  z:
Testing for the forest module (sklearn.ensemble.forest).
    N)defaultdict)partial)combinations)product)DictAny)
csr_matrix)
csc_matrix)
coo_matrix)comb)DummyRegressor)mean_poisson_deviance)assert_almost_equal)assert_array_almost_equal)assert_array_equal)_convert_container)ignore_warnings)skip_if_no_parallel)NotFittedError)datasets)TruncatedSVD)make_classification)ExtraTreesClassifier)ExtraTreesRegressor)RandomForestClassifier)RandomForestRegressor)RandomTreesEmbedding)train_test_splitcross_val_score)GridSearchCV)	LinearSVC)Parallel)check_random_state)mean_squared_error)SPARSE_SPLITTERS           
   F)	n_samples
n_featuresn_informativeZn_redundantZ
n_repeatedshufflerandom_stater,   r-   r0      r,   r0   )r   r   )r   r   r   FOREST_ESTIMATORSFOREST_CLASSIFIERS_REGRESSORSc                 C   s   t |  }|ddd}|tt t|tt dt|ks@t	|dddd}|tt t|tt dt|kszt	|
t}|jtt|jfkst	dS )z&Check classification on a toy dataset.r+   r'   n_estimatorsr0   )r7   max_featuresr0   N)FOREST_CLASSIFIERSfitXyr   predictTtrue_resultlenAssertionErrorapplyshaper7   )nameForestClassifierclfZleaf_indices rG   F/tmp/pip-unpacked-wheel-zrfo1fqw/sklearn/ensemble/tests/test_forest.pycheck_classification_toyx   s    
rI   rD   c                 C   s   t |  d S N)rI   rD   rG   rG   rH   test_classification_toy   s    rL   c                 C   s   t |  }|d|dd}|tjtj |tjtj}|dksNtd||f |d|ddd}|tjtj |tjtj}|dkstd||f d S )	Nr+   r'   r7   	criterionr0   ?z'Failed with criterion %s and score = %fr(   r7   rN   r8   r0         ?)r9   r:   irisdatatargetscorerA   )rD   rN   rE   rF   rU   rG   rG   rH   check_iris_criterion   s       rV   rN   )ginilog_lossc                 C   s   t | | d S rJ   )rV   rD   rN   rG   rG   rH   	test_iris   s    rZ   c                 C   s   t |  }|d|dd}|tt |tt}|dksFtd||f |d|ddd}|tt |tt}|dkstd	||f d S )
N   r'   rM   g(\?z:Failed with max_features=None, criterion %s and score = %f   rP   gq=
ףp?z7Failed with max_features=6, criterion %s and score = %f)FOREST_REGRESSORSr:   X_regy_regrU   rA   )rD   rN   ForestRegressorregrU   rG   rG   rH   check_regression_criterion   s.       rb   )squared_errorabsolute_errorfriedman_msec                 C   s   t | | d S rJ   )rb   rY   rG   rG   rH   test_regression   s    rf   c                  C   sF  t jd} d\}}}tj|| || d}| jdd|dt j|dd }| jt || d	}t	|||| d
\}}}	}
t
ddd| d}t
ddd| d}|||	 |||	 tdd||	}||	df||
dffD ]l\}}}t|||}t|t ||dd}t|||}|dkr0||k s0t|d| k stqdS )zTest that random forest with poisson criterion performs better than
    mse for a poisson target.

    There is a similar test for DecisionTreeRegressor.
    *   r*   r*   r+   r1   r%   r(   lowhighsizer   ZaxisZlam	test_sizer0   poissonr+   sqrt)rN   min_samples_leafr8   r0   rc   mean)ZstrategyZtraintestgư>Ng?)nprandomRandomStater   make_low_rank_matrixuniformmaxrq   expr   r   r:   r   r   r=   ZcliprA   )rngn_trainn_testr-   r;   coefr<   X_trainX_testy_trainy_testZ
forest_poiZ
forest_msedummyZ	data_nameZ
metric_poiZ
metric_mseZmetric_dummyrG   rG   rH   test_poisson_vs_mse   sP    
         
r   )rq   rc   c           	      C   s   t jd}d\}}}tj|| ||d}|jdd|dt j|dd }|jt || d	}t	| d
d|d}|
|| t ||tt |kstdS )z9 "Test that sum(y_pred)==sum(y_true) on the training set.rg   rh   r1   r%   r(   ri   r   rm   rn   r+   F)rN   r7   	bootstrapr0   N)rv   rw   rx   r   ry   rz   r{   rq   r|   r   r:   sumr=   pytestapproxrA   )	rN   r}   r~   r   r-   r;   r   r<   ra   rG   rG   rH   #test_balance_property_random_forest   s"    
     r   c                 C   sj   t |  dd}t|drtt|dr*t|dddgdd	d
ggddg t|drXtt|drftd S )Nr   r0   classes_
n_classes_r'   r(   r)      r[   r\   )r]   hasattrrA   r:   )rD   rrG   rG   rH   check_regressor_attributes  s     r   c                 C   s   t |  d S rJ   )r   rK   rG   rG   rH   test_regressor_attributes  s    r   c              	   C   s   t |  }tjddp |ddddd}|tjtj ttj|	tjddt
tjjd  t|	tjt|tj W 5 Q R X d S )Nignoredivider+   r'   )r7   r0   r8   	max_depthrm   r   )r9   rv   errstater:   rR   rS   rT   r   r   predict_probaonesrC   r|   predict_log_proba)rD   rE   rF   rG   rG   rH   check_probability!  s"        
 r   c                 C   s   t |  d S rJ   )r   rK   rG   rG   rH   test_probability1  s    r   c                 C   sH  t j|dd}tj|dd}t|  }|d|dd}||| |j}t|dk}	|jd dksdt	|	dkspt	t
|d d dkst	|j}|jdd	 |j}
t||
 tdd
dt|}|dd|d}|j|||d |j}t
|dkst	dD ]F}|dd|d}|j|||| d |j}t||  |k st	qd S )NFcopyr+   r   rM   皙?r)   r(   n_jobsr'   )r7   r0   rN   sample_weight        )rQ   d   )X_largeastypey_larger4   r:   feature_importances_rv   r   rC   rA   all
set_paramsr   r"   randintr@   absrt   )rD   rN   dtype	tolerancer;   r<   ForestEstimatorestimportancesZn_importantZimportances_parallelr   ZscaleZimportances_bisrG   rG   rH   check_importances6  s0    
r   r   zname, criterionrW   rX   rc   re   rd   c                 C   s*   d}|t kr|dkrd}t||| | d S )N{Gz?rd   g?)r]   r   )r   rD   rN   r   rG   rG   rH   test_importances\  s    	r   c            	         s  dd  dd  fdd} t ddddddddgdddddddd	gdddddddd
gddddddddgddddddddgddddddddgddddddddgddddddddgddddddddgddddddddgg
}t j|d d d df td|d d df  }}|jd }t |}t|D ]}| |||||< q(tddddd||}tdd |j	D |j
 }t|t| t ||  dk std S )Nc                 S   s*   | dk s| |krdS t t|t| ddS )Nr   T)exact)r   int)knrG   rG   rH   binomialp  s    z-test_importances_asymptotic.<locals>.binomialc                 S   sF   t | }d}t| D ]*}d| | }|dkr||t| 8 }q|S )Nr         ?r   )r@   rv   bincountlog2)Zsamplesr,   entropycountprG   rG   rH   r   s  s    z,test_importances_asymptotic.<locals>.entropyc              
      sf  j \}}tt|}||  fddt|D d}t|D ]}d||||   }t||D ] t fddt|D  D ]}	tj|td}
t|D ]$}|
d d  | f |	| kM }
q|
d d f ||
  }}t	|dkrg }|  D ](}|d d | f |k}|
||   q||d |  |tfdd|D   7 }qqhqB|S )	Nc                    s"   g | ]}t  d d |f qS rJ   )rv   unique).0i)r;   rG   rH   
<listcomp>  s     zGtest_importances_asymptotic.<locals>.mdi_importance.<locals>.<listcomp>r   r   c                    s   g | ]} |  qS rG   rG   )r   j)BvaluesrG   rH   r     s     r   r   c                    s    g | ]} |t |  qS rG   )r@   )r   c)r   n_samples_brG   rH   r     s   )rC   listrangepopr   r   rv   r   boolr@   appendr   )ZX_mr;   r<   r,   r-   featuresimpr   r   bZmask_br   ZX_Zy_childrenxiZmask_xir   r   )r   r;   r   r   rH   mdi_importance~  sB    

 "

z3test_importances_asymptotic.<locals>.mdi_importancer   r'   r(   r)   r   r[   r\         	   r   r*   rX   )r7   r8   rN   r0   c                 s   s   | ]}|j jd dV  qdS )F)	normalizeN)tree_Zcompute_feature_importancesr   treerG   rG   rH   	<genexpr>  s   z.test_importances_asymptotic.<locals>.<genexpr>r   )rv   arrayr   rC   zerosr   r   r:   r   estimators_r7   r   r   rt   rA   )	r   rS   r;   r<   r-   Ztrue_importancesr   rF   r   rG   r   rH   test_importances_asymptotick  sL    00

    	r   c              	   C   s8   d | }tjt|d tt|   d W 5 Q R X d S )NzfThis {} instance is not fitted yet. Call 'fit' with appropriate arguments before using this estimator.matchr   )formatr   raisesr   getattrr4   )rD   err_msgrG   rG   rH   !test_unfitted_feature_importances  s    r   rE   X_typer   Z
sparse_csrZ
sparse_csczX, y, lower_bound_accuracyi,  )r,   	n_classesr0   rO     r\   )r,   r   r.   r0   ?
ףp=
?c                 C   s  t ||d}t||ddd\}}}}| ddddd}	t|	drBtt|	d	rPt|	|| |	||}
t|
|	j d
ks~t|	j|kstt|	dstt|	drtt|	d	st|jdkr|j	d t
t|f}n*|j	d t
t|dddf |j	d f}|	jj	|kstdS )z5Check that OOB score is close to score on a test set.Zconstructor_namerQ   r   ro   (   Tr7   r   	oob_scorer0   
oob_score_oob_decision_function_r   oob_prediction_r'   N)r   r   r   rA   r:   rU   r   r   ndimrC   r@   setr   )rE   r;   r<   r   Zlower_bound_accuracyr   r   r   r   
classifier
test_scoreexpected_shaperG   rG   rH   test_forest_classifier_oob  s4    
*r   r`   zX, y, lower_bound_r2)r,   r-   	n_targetsr0   ffffff?皙?c                 C   s   t ||d}t||ddd\}}}}| ddddd}	t|	drBtt|	d	rPt|	|| |	||}
t|
|	j d
ks~t|	j|kstt|	dstt|	d	stt|	drt|jdkr|j	d f}n|j	d |jf}|	j
j	|kstdS )z\Check that forest-based regressor provide an OOB score close to the
    score on a test set.r   rQ   r   ro   2   Tr   r   r   r   r   r'   N)r   r   r   rA   r:   rU   r   r   r   rC   r   )r`   r;   r<   r   Zlower_bound_r2r   r   r   r   Z	regressorr   r   rG   rG   rH   test_forest_regressor_oob   s4    
r  r   c              	   C   s>   | ddddd}t jtdd |tjtj W 5 Q R X dS )zfCheck that a warning is raised when not enough estimator and the OOB
    estimates will be inaccurate.r'   Tr   )r7   r   r   r0   z"Some inputs do not have OOB scoresr   N)r   warnsUserWarningr:   rR   rS   rT   )r   	estimatorrG   rG   rH   test_forest_oob_warningX  s    r  zX, y, params, err_msgT)r   r   z6Out of bag estimation only available if bootstrap=Truer[   ri   z:The type of target cannot be used to compute OOB estimatesc              	   C   s4   | f |}t jt|d ||| W 5 Q R X d S )Nr   )r   r   
ValueErrorr:   )r   r;   r<   paramsr   r  rG   rG   rH   test_forest_oob_errorf  s    
r  r   c              	   C   sP   t jtdd t| d W 5 Q R X t jtdd t tt W 5 Q R X d S )Nz"got an unexpected keyword argumentr   r   zOOB score not supported)r   r   	TypeErrorr   NotImplementedErrorZ_set_oob_score_and_attributesr;   r<   r	  rG   rG   rH   +test_random_trees_embedding_raise_error_oob~  s    r  c                 C   s.   t |   }t|ddd}|tjtj d S )Nr'   r(   )r7   r   )r9   r   r:   rR   rS   rT   )rD   forestrF   rG   rG   rH   check_gridsearch  s    
r  c                 C   s   t |  d S rJ   )r  rK   rG   rG   rH   test_gridsearch  s    r  c                 C   sn   t |  }|dddd}||| t|dks2t|jdd ||}|jdd ||}t||d dS )	z-Check parallel computations in classificationr+   r)   r   r7   r   r0   r'   r   r(   N)r4   r:   r@   rA   r   r=   r   )rD   r;   r<   r   r  y1y2rG   rG   rH   check_parallel  s    

r  c                 C   s6   | t krtj}tj}n| tkr&t}t}t| || d S rJ   )r9   rR   rS   rT   r]   r^   r_   r  rD   r;   r<   rG   rG   rH   test_parallel  s    r  c           	      C   sl   t |  }|dd}||| |||}t|}t|}t||jksPt|||}||kshtd S )Nr   r   )	r4   r:   rU   pickledumpsloadstype	__class__rA   )	rD   r;   r<   r   objrU   Zpickle_objectobj2Zscore2rG   rG   rH   check_pickle  s    


r  c                 C   sJ   | t krtj}tj}n| tkr&t}t}t| |d d d |d d d  d S )Nr(   )r9   rR   rS   rT   r]   r^   r_   r  r  rG   rG   rH   test_pickle  s    r  c           	      C   s  ddgddgddgddgddgddgddgddgddgddgddgddgg}ddgddgddgddgddgddgddgddgddgddgddgddgg}ddgddgddgddgg}ddgddgddgddgg}t |  ddd}||||}t|| | tkrtjd	d
 ||}t|dks0t	|d j
dksDt	|d j
dksXt	||}t|dkstt	|d j
dkst	|d j
dkst	W 5 Q R X d S )Nr%   r&   r'   r(   r   r)   Fr0   r   r   r   r   r(   r   r   )r4   r:   r=   r   r9   rv   r   r   r@   rA   rC   r   	rD   r   r   r   r   r   Zy_predZprobaZ	log_probarG   rG   rH   check_multioutput  sR    



r$  c                 C   s   t |  d S rJ   )r$  rK   rG   rG   rH   test_multioutput  s    r%  c           	      C   s  ddgddgddgddgddgddgddgddgddgddgddgddgg}ddgddgddgddgddgddgddgddgddgdd	gdd	gdd	gg}ddgddgddgddgg}ddgddgddgdd	gg}t |  d
dd}||||}t|| tjdd ||}t|dks&t|d
 j	dks:t|d j	dksNt|
|}t|dksjt|d
 j	dks~t|d j	dkstW 5 Q R X d S )Nr%   r&   r'   r(   ZredZblueZgreenZpurpleZyellowr   Fr   r   r   r!  r"  )r4   r:   r=   r   rv   r   r   r@   rA   rC   r   r#  rG   rG   rH   test_multioutput_string   sX    


r&  c                 C   s   t |  }|ddtt}|jdks(tt|jddg t	tt
td fj}|ddt|}t|jddg t|jddgddgg d S )Nr   r   r(   r&   r'   r%   )r9   r:   r;   r<   r   rA   r   r   rv   vstackr   r>   )rD   rE   rF   _yrG   rG   rH   check_classes_shape8  s    r)  c                 C   s   t |  d S rJ   )r)  rK   rG   rG   rH   test_classes_shapeJ  s    r*  c                  C   s<   t ddd} tjdd\}}| |}t|tjks8td S )Nr+   F)r7   sparse_outputrQ   Zfactor)r   r   make_circlesfit_transformr  rv   ZndarrayrA   )hasherr;   r<   X_transformedrG   rG   rH   test_random_trees_dense_typeO  s    
r1  c                  C   sR   t dddd} t dddd}tjdd\}}| |}||}t| | d S )Nr+   Fr   )r7   r+  r0   TrQ   r,  )r   r   r-  r.  r   toarray)Zhasher_denseZhasher_sparser;   r<   ZX_transformed_denseX_transformed_sparserG   rG   rH   test_random_trees_dense_equal\  s        

r4  c                  C   s   t ddd} tjdd\}}| |}t ddd} t| || |  |jd |jd ksht	t|j
dd| j tdd	}||}t }||| |||d
kst	d S )N   r'   r6   rQ   r,  r   rm   r(   )Zn_componentsr   )r   r   r-  r.  r   r:   	transformr2  rC   rA   r   r7   r   r    rU   )r/  r;   r<   r0  ZsvdZ	X_reducedZ
linear_clfrG   rG   rH   test_random_hasherp  s    


r7  c                  C   sJ   t jdd\} }tddd}|| }|t| }t| |  d S )Nr   r   r5  r'   r6   )r   make_multilabel_classificationr   r.  r	   r   r2  )r;   r<   r/  r0  r3  rG   rG   rH   test_random_hasher_sparse_data  s
    
r9  c                     s   t d} d\}}| ||| dd|fdddD }| ||  fdd|D }t||d	d  D ]\}}t|| qnd S )
N!0  )P   r5  r   r(   c                    s"   g | ]}t d |dd qS )r2   i90  r  )r   r:   )r   r   )r   r   rG   rH   r     s
    z'test_parallel_train.<locals>.<listcomp>)r'   r(   r)   r          c                    s   g | ]}|  qS rG   )r   )r   rF   )r   rG   rH   r     s     r'   )r"   randnr   zipr   )r}   r,   r-   ZclfsZprobasZproba1Zproba2rG   )r   r   r   rH   test_parallel_train  s    r@  c                     s  t d} | jdddd}| d}d t dd	||}tt}|jD ]6}d
dd t	|j
j|j
jD }||  d7  < qHt fdd| D }t|dkstd|d d kstd|d d kstd|d d kstd|d d kst|d d dkst|d d dks&ttd}tjddd|d d df< tjddd|d d df< | d}tddd||}tt}|jD ]8}d
dd t	|j
j|j
jD }||  d7  < qdd | D }t|dkstd S )Nr:  r   r   )r   r'   )rl   r   r*   rg   r6    c                 s   s.   | ]&\}}|d kr"d|t |f ndV  qdS r   z%d,%d/-Nr   r   ftrG   rG   rH   r     s   z$test_distribution.<locals>.<genexpr>r'   c                    s    g | ]\}}d |   |fqS )r   rG   r   r   r   Zn_treesrG   rH   r     s     z%test_distribution.<locals>.<listcomp>r[   g?r(   r)   333333?z0,1/0,0/--0,2/--)r   r(   )r8   r0   c                 s   s.   | ]&\}}|d kr"d|t |f ndV  qdS rB  rD  rE  rG   rG   rH   r     s   c                 S   s   g | ]\}}||fqS rG   rG   rH  rG   rG   rH   r     s     r   )r"   r   randr   r:   r   r   r   joinr?  r   Zfeature	thresholdsorteditemsr@   rA   rv   emptyrw   )r}   r;   r<   ra   Zuniquesr   rG   rI  rH   test_distribution  s@    






rQ  c                 C   sp   t t }}t|  }|ddddd||}|jd  dks@t|dddd||}|jd  dksltd S )Nr'   r   r   )r   Zmax_leaf_nodesr7   r0   )r   r7   r0   )hastie_Xhastie_yr4   r:   r   Z	get_depthrA   rD   r;   r<   r   r   rG   rG   rH   check_max_leaf_nodes_max_depth  s    
    rU  c                 C   s   t |  d S rJ   )rU  rK   rG   rG   rH   test_max_leaf_nodes_max_depth  s    rV  c                 C   s   t t }}t|  }|dddd}||| |jd jjdk}|jd jj| }t	|t
|d d ksxtd| |dddd}||| |jd jjdk}|jd jj| }t	|t
|d d kstd| d S )Nr+   r'   r   )Zmin_samples_splitr7   r0   r&   rQ   Failed with {0})rR  rS  r4   r:   r   r   Zchildren_leftZn_node_samplesrv   minr@   rA   r   )rD   r;   r<   r   r   Znode_idxZnode_samplesrG   rG   rH   check_min_samples_split  s    
(rY  c                 C   s   t |  d S rJ   )rY  rK   rG   rG   rH   test_min_samples_split  s    rZ  c                 C   s   t t }}t|  }|dddd}||| |jd j|}t|}||dk }t	|dkspt
d| |dddd}||| |jd j|}t|}||dk }t	|t|d d kst
d| d S )Nr[   r'   r   )rs   r7   r0   r   rW  g      ?)rR  rS  r4   r:   r   r   rB   rv   r   rX  rA   r   r@   )rD   r;   r<   r   r   outZnode_countsZ
leaf_countrG   rG   rH   check_min_samples_leaf  s    


r\  c                 C   s   t |  d S rJ   )r\  rK   rG   rG   rH   test_min_samples_leaf  s    r]  c                 C   s   t t }}t|  }tjd}||jd }t|}t	dddD ]}||ddd}d| krfd|_
|j|||d |jd j|}	tj|	|d	}
|
|
dk }t|||j ksFtd
| |jqFd S )Nr   rQ   r\   r'   )min_weight_fraction_leafr7   r0   ZRandomForestFr   )weightsz,Failed with {0} min_weight_fraction_leaf={1})rR  rS  r4   rv   rw   rx   rK  rC   r   Zlinspacer   r:   r   r   rB   r   rX  r^  rA   r   )rD   r;   r<   r   r}   r_  Ztotal_weightfracr   r[  Znode_weightsZleaf_weightsrG   rG   rH   check_min_weight_fraction_leaf   s0    

   ra  c                 C   s   t |  d S rJ   )ra  rK   rG   rG   rH   test_min_weight_fraction_leaf?  s    rb  c                 C   s   t |  }|ddd||}|ddd||}t|||| | tksV| tkrzt|||| t|j|j | tkrt|||| t|	||	| | t
krt|| ||  t|| ||  d S )Nr   r(   )r0   r   )r4   r:   r   rB   r9   r]   r=   r   r   r   FOREST_TRANSFORMERSr6  r2  r.  )rD   r;   ZX_sparser<   r   ZdensesparserG   rG   rH   check_sparse_inputD  s2        re  sparse_matrixc                 C   s(   t jddd\}}t| |||| d S )Nr   r   )r0   r,   )r   r8  re  )rD   rf  r;   r<   rG   rG   rH   test_sparse_inputa  s    rg  c                 C   s  t |  ddd}tjtj|d}tj}t||||| tjtjd|d}tj}t||||| tjtjd|d}tj}t||||| tj	tj|d}tj}t||||| |j
jtkr^ttj|d}tj}t||||| ttj|d}tj}t||||| ttj|d}tj}t||||| tjtjd d d |d}tjd d d }t||||| d S )	Nr   Fr   r   C)orderr   Fr)   )r4   rv   ZasarrayrR   rS   rT   r   r:   r=   Zascontiguousarrayr  Zsplitterr$   r   r	   r
   )rD   r   r   r;   r<   rG   rG   rH   check_memory_layouti  s4    rk  c                 C   s   t | | d S rJ   )rk  )rD   r   rG   rG   rH   test_memory_layout  s    rl  c              	   C   s|   t |  }tt |ddd|| W 5 Q R X |dd}||| | tksX| tkrxtt || W 5 Q R X d S )Nr'   r   r6   r   )r4   r   r   r  r:   r9   r]   r=   )rD   r;   X_2dr<   r   r   rG   rG   rH   check_1d_input  s    
rn  c              	   C   sT   t jd d df }t jd d df d}t j}t  t| ||| W 5 Q R X d S )Nr   r&   r'   )rR   rS   reshaperT   r   rn  )rD   r;   rm  r<   rG   rG   rH   test_1d_input  s
    rq  c           	      C   s  t |  }|dd}|tjtj |ddd}|tjtj t|j|j ttjtjtjfj	}|ddddddddddddgdd}|tj| t|j|j |ddd}|tj| t|j|j t
tjj}|tjdk  d	9  < dd
dd}|dd}|tjtj| ||dd}|tjtj t|j|j |dd}|tjtj|d  ||dd}|tjtj| t|j|j d S )Nr   r   balancedclass_weightr0   g       @r   )r   r'   r(   r'   r   g      Y@r(   )r9   r:   rR   rS   rT   r   r   rv   r'  r>   r   rC   )	rD   rE   Zclf1Zclf2Z
iris_multiZclf3Zclf4r   rt  rG   rG   rH   check_class_weights  s@    





ru  c                 C   s   t |  d S rJ   )ru  rK   rG   rG   rH   test_class_weights  s    rv  c                 C   s~   t |  }ttttd fj}|ddd}|t| |ddddddgdd}|t| |d	dd}|t| d S )
Nr(   rr  r   rs  rQ   r   ro  )r%   r(   Zbalanced_subsample)r9   rv   r'  r<   r   r>   r:   r;   )rD   rE   r(  rF   rG   rG   rH   6check_class_weight_balanced_and_bootstrap_multi_output  s     rw  c                 C   s   t |  d S rJ   )rw  rK   rG   rG   rH   5test_class_weight_balanced_and_bootstrap_multi_output  s    rx  c              	   C   s   t |  }ttttd fj}|dddd}|tt d}tj	t
|d |t| W 5 Q R X |dd	d
gdd}tt |t| W 5 Q R X d S )Nr(   rr  Tr   )rt  
warm_startr0   JWarm-start fitting without increasing n_estimators does not fit new trees.r   rQ   r   ro  rs  )r9   rv   r'  r<   r   r>   r:   r;   r   r  r  r   r  )rD   rE   r(  rF   warn_msgrG   rG   rH   check_class_weight_errors  s    r|  c                 C   s   t |  d S rJ   )r|  rK   rG   rG   rH   test_class_weight_errors  s    r}  rg   c                 C   s   t t }}t|  }d }dD ]D}|d kr6|||dd}n|j|d ||| t||kstq|d|dd}||| tdd |D td	d |D kstt|	||	|d

| d d S )N)r[   r+   T)r7   r0   ry  r7   r+   Fc                 S   s   g | ]
}|j qS rG   r   r   rG   rG   rH   r   -  s     z$check_warm_start.<locals>.<listcomp>c                 S   s   g | ]
}|j qS rG   r   r   rG   rG   rH   r   .  s     rW  )r   )rR  rS  r4   r   r:   r@   rA   r   r   rB   r   )rD   r0   r;   r<   r   Zest_wsr7   Z	est_no_wsrG   rG   rH   check_warm_start  s6    
    
  r  c                 C   s   t |  d S rJ   )r  rK   rG   rG   rH   test_warm_start6  s    r  c                 C   s~   t t }}t|  }|ddddd}||| |ddddd}||| |jddd ||| t|||| d S )Nr[   r'   Fr7   r   ry  r0   Tr(   )ry  r0   )rR  rS  r4   r:   r   r   rB   )rD   r;   r<   r   r   est_2rG   rG   rH   check_warm_start_clear;  s    
   r  c                 C   s   t |  d S rJ   )r  rK   rG   rG   rH   test_warm_start_clearL  s    r  c              	   C   s^   t t }}t|  }|dddd}||| |jdd tt ||| W 5 Q R X d S )Nr[   r'   T)r7   r   ry  r   r~  )rR  rS  r4   r:   r   r   r   r  rT  rG   rG   rH   %check_warm_start_smaller_n_estimatorsQ  s    
r  c                 C   s   t |  d S rJ   )r  rK   rG   rG   rH   $test_warm_start_smaller_n_estimators\  s    r  c              	   C   s   t t }}t|  }|ddddd}||| |ddddd}||| |jdd d}tjt|d	 ||| W 5 Q R X t|	||	| d S )
Nr[   r)   Tr'   r  r(   r   rz  r   )
rR  rS  r4   r:   r   r   r  r  r   rB   )rD   r;   r<   r   r   r  r{  rG   rG   rH   #check_warm_start_equal_n_estimatorsa  s"    
   r  c                 C   s   t |  d S rJ   )r  rK   rG   rG   rH   "test_warm_start_equal_n_estimatorsz  s    r  c                 C   s   t t }}t|  }|ddddddd}||| |ddddddd}||| |jdddd ||| t|d	s|t|j|jkst|ddddddd}||| t|d	rt|jdd
 t|j|| |j|jkstd S )N   r)   Fr'   T)r7   r   ry  r0   r   r   r[   )ry  r   r7   r   r	  )	rR  rS  r4   r:   r   r   rA   r   r   )rD   r;   r<   r   r   r  Zest_3rG   rG   rH   check_warm_start_oob  sJ    
r  c                 C   s   t |  d S rJ   )r  rK   rG   rG   rH   test_warm_start_oob  s    r  r  c                 C   sX   t ddd}t| }dd dd |  D }||||}t|j| t|| d S )Nr   Fr   c                 S   s   g | ]}|qS rG   rG   )r   chrG   rG   rH   r     s     z&test_dtype_convert.<locals>.<listcomp>ZABCDEFGHIJKLMNOPQRSTU)r   rv   Zeyer:   r=   r   r   )r   r   r;   r<   resultrG   rG   rH   test_dtype_convert  s    
r  c                    s   t t }}|jd }t|  }|ddddd}||| ||\jd d ks\tjd |ksnttt	dd |j
D  ||}t|jd D ]<  fd	dt|d d  f D }t|tj|d
 qd S )Nr   r[   r'   Fr  r&   c                 S   s   g | ]}|j jqS rG   )r   
node_count)r   erG   rG   rH   r     s     z'check_decision_path.<locals>.<listcomp>c                    s$   g | ]\}}|  | f qS rG   rG   )r   r   r   Zest_idZ	indicatorZn_nodes_ptrrG   rH   r     s   )rC   )rR  rS  rC   r4   r:   Zdecision_pathrA   r   rv   Zdiffr   rB   r   	enumerater   r   )rD   r;   r<   r,   r   r   ZleavesZleave_indicatorrG   r  rH   check_decision_path  s$    

 
r  c                 C   s   t |  d S rJ   )r  rK   rG   rG   rH   test_decision_path  s    r  c                  C   s\   t jddd\} }ttttg}|D ]4}|dd}|| | |jD ]}|jdksBt	qBq"d S )Nr   r'   r3   r   )min_impurity_decrease)
r   make_hastie_10_2r   r   r   r   r:   r   r  rA   )r;   r<   Zall_estimators	Estimatorr   r   rG   rG   rH   test_min_impurity_decrease  s    

r  c               	   C   s   t dd} td}dddg}d}tjt|d | || W 5 Q R X d	d	d	g}d
}tjt|d | || W 5 Q R X d S )Nrq   )rN   )r)   r)   r&   r'   r)   zNSome value\(s\) of y are negative which is not allowed for Poisson regression.r   r   zLSum of y is not strictly positive which is necessary for Poisson regression.)r   rv   r   r   r   r  r:   )r   r;   r<   r   rG   rG   rH   test_poisson_y_positive_check  s    



r  c                       s(   e Zd Z fddZ fddZ  ZS )	MyBackendc                    s   d| _ t j|| d S )Nr   )r   super__init__)selfargskwargsr  rG   rH   r    s    zMyBackend.__init__c                    s   |  j d7  _ t  S )Nr'   )r   r  
start_call)r  r  rG   rH   r    s    zMyBackend.start_call)__name__
__module____qualname__r  r  __classcell__rG   rG   r  rH   r    s   r  testingc               	   C   sv   t ddd} td\}}| tt W 5 Q R X |jdks@ttd\}}| t W 5 Q R X |jdksrtd S )Nr+   r(   )r7   r   r  r   )	r   joblibZparallel_backendr:   r;   r<   r   rA   r   )rF   bar   _rG   rG   rH   test_backend_respected  s    r  c                  C   sH   t ddddd\} }tdddd| |}tjd|j d	d
sDtd S )Nr  r)   r'   )r,   r.   r0   r   r[   rg      )rs   r0   r7   gHz>)Zabs_tol)r   r   r:   mathiscloser   r   rA   )r;   r<   rF   rG   rG   rH   #test_forest_feature_importances_sum#  s       
   r  c                  C   sB   t d} t d}tdd| |}t|jt jdt jd d S )N)r+   r+   )r+   r+   r~  r   )rv   r   r   r   r:   r   r   float64)r;   r<   ZgbrrG   rG   rH   *test_forest_degenerate_feature_importances-  s    

r  c              	   C   s>   t |  ddd}d}tjt|d |tt W 5 Q R X d S )NFrQ   r   max_sampleszl`max_sample` cannot be set if `bootstrap=False`. Either switch to `bootstrap=True` or set `max_sample=None`.r   )r5   r   r   r  r:   r;   r<   )rD   r   r   rG   rG   rH   test_max_samples_bootstrap5  s
    r  c              	   C   sB   t |  dtdd}d}tjt|d |tt W 5 Q R X d S )NTg    eAr  z=`max_samples` must be <= n_samples=6 but got value 1000000000r   )r5   r   r   r   r  r:   r;   r<   )rD   r   r   rG   rG   rH    test_large_max_samples_exceptionB  s    r  c                 C   s   t ttdddd\}}}}t|  dddd}||||}t|  dd dd}||||}t||}	t||}
|	t|
kst	d S )Nr   rJ  r   )Z
train_sizerp   r0   Tr   r   r  r0   )
r   r^   r_   r]   r:   r=   r#   r   r   rA   )rD   r   r   r   r   
ms_1_modelZms_1_predictms_None_modelZms_None_predictZms_1_msZ
ms_None_msrG   rG   rH   $test_max_samples_boundary_regressorsK  s,            

r  c           	      C   sr   t ttdtd\}}}}t|  dddd}||||}t|  dd dd}||||}tj|| d S )Nr   )r0   ZstratifyTr   r  )	r   r   r   r9   r:   r   rv   r  Zassert_allclose)	rD   r   r   r   r  r  Z
ms_1_probar  Zms_None_probarG   rG   rH   %test_max_samples_boundary_classifiersa  s&           r  c               	   C   sN   dddgg} t dddg}t }d}tjt|d || | W 5 Q R X d S )	Nr'   r(   r)   r   r[   r\   z3sparse multilabel-indicator for y is not supported.r   )r   r   r   r   r  r:   )r;   r<   r   msgrG   rG   rH   test_forest_y_sparset  s    r  ForestClassc           	      C   s   t jd}|dd}|ddk}| d|d d}| d|dd}||| ||| |jd j}|jd j}d}|j|jkst|d S )Nr'   i'  r(   r   )r7   r0   r  z=Tree without `max_samples` restriction should have more nodes)	rv   rw   rx   r>  r:   r   r   r  rA   )	r  r}   r;   r<   Zest1Zest2Ztree1Ztree2r  rG   rG   rH   'test_little_tree_with_small_max_samples}  s&    r  r  c              	   C   s\   t ddgddgg}t ddg}| dd}d}tjt|d	 ||| W 5 Q R X d
S )z9Check warning raised for max_features="auto" deprecation.r'   r(   r)   r   r   auto)r8   a  `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features=(1.0|'sqrt')` or remove this parameter as it is also the default value for RandomForest(Regressors|Classifiers) and ExtraTrees(Regressors|Classifiers)\.r   N)rv   r   r   r  FutureWarningr:   )r  r;   r<   r   r   rG   rG   rH   test_max_features_deprecation  s    
	r  Forestc                 C   sN   ddl m} tdd}|j\}}|||}t|  dd|d}|t| d S )Nr   )MSEr&   r'   r(   )r7   r   rN   )Zsklearn.tree._criterionr  r_   rp  rC   r]   r:   r^   )r  r  r<   r,   Z	n_outputsZmse_criterionr   rG   rG   rH   -test_mse_criterion_object_segfault_smoke_test  s    

r  c                  C   sX   t jd} t | dd}tddddd|}| }dd d	D }t|| d
S )z3Check feature names out for Random Trees Embedding.r   r   r   r(   F)r7   r   r+  r0   c                 S   s    g | ]\}}d | d| qS )Zrandomtreesembedding_r  rG   )r   r   ZleafrG   rG   rH   r     s   zAtest_random_trees_embedding_feature_names_out.<locals>.<listcomp>))r   r(   )r   r)   )r   r[   )r   r\   r  )r'   r)   )r'   r[   )r'   r\   N)	rv   rw   rx   r   r>  r   r:   Zget_feature_names_outr   )r0   r;   r/  namesZexpected_namesrG   rG   rH   -test_random_trees_embedding_feature_names_out  s       r  c              	   C   sb   t ddgddgg}t ddg}t|   }||| d}tjt|d |j W 5 Q R X d S )Nr'   r(   r)   r   r   zoAttribute `base_estimator_` was deprecated in version 1.2 and will be removed in 1.4. Use `estimator_` instead.r   )rv   r   r4   r:   r   r  r  Zbase_estimator_)rD   r;   r<   modelr{  rG   rG   rH   'test_base_estimator_property_deprecated  s    
r  c                 C   sf   |  tjjdttdd tjjdd}t	dd|d\}}t
|dd	}td
|d}t|||d
d dS )zRandomForestClassifier must work on readonly sparse data.

    Non-regression test for: https://github.com/scikit-learn/scikit-learn/issues/25333
    r!   r   )Z
max_nbytesr   )seedr  r1   Tr   r(   )r   r0   )ZcvN)setattrsklearnZensembleZ_forestr   r!   rv   rw   rx   r   r   r   r   )Zmonkeypatchr}   r;   r<   rF   rG   rG   rH   test_read_only_buffer  s    
r  )rO   )r   )r   )r   )r   )rg   )r  )__doc__r  r  collectionsr   	itertools	functoolsr   r   r   typingr   r   Znumpyrv   Zscipy.sparser   r	   r
   Zscipy.specialr   r  r   r  Zsklearn.dummyr   Zsklearn.metricsr   Zsklearn.utils._testingr   r   r   r   r   r   Zsklearn.exceptionsr   r   Zsklearn.decompositionr   Zsklearn.datasetsr   Zsklearn.ensembler   r   r   r   r   Zsklearn.model_selectionr   r   r   Zsklearn.svmr    Zsklearn.utils.parallelr!   Zsklearn.utils.validationr"   r#   Zsklearn.tree._classesr$   r;   r<   r>   r?   r   r   Z	load_irisrR   r}   ZpermutationrT   rl   permrS   Zmake_regressionr^   r_   r  rR  rS  r   Zfloat32parallelZget_active_backendr  ZDEFAULT_JOBLIB_BACKENDr9   r]   rc  dictr4   str__annotations__updater   r5   rI   markZparametrizerL   rV   rZ   rb   rf   r   r   r   r   r   r   r   r  chainr   r   r   r   r8  r   r  r  r   rC   r  r  r  r  r  r  r  r  r$  r%  r&  r)  r*  r1  r4  r7  r9  r@  rQ  rU  rV  rY  rZ  r\  r]  ra  rb  re  rg  rk  rl  rn  rq  ru  rv  rw  rx  r|  r}  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  Zregister_parallel_backendr  r  r  r  r  r  r  r  r  r  r  r  r  r  rG   rG   rG   rH   <module>   sL  
(

 




 5


&m
	   $      %




3

7

6



/

	/






2






		