U
    3d"W                     @   sn  d Z ddlZddlZddlZddlmZ ddlmZ ddlmZ ddlm	Z	 ddlm
Z
 ddlmZmZ dd	lmZ dd
lmZ ddlmZ ddlmZmZ ddlmZ ddlmZ ddlmZ ddlmZ ddlmZ ddlmZ ddlm Z m!Z! ddl"m#Z#m$Z$ ddl%m&Z& ddl'm(Z( ddl)m*Z* ej+,dZ-ddgddgddgddgddgddggZ.ddddddgZ/ddddddgZ0ddgddgddggZ1dddgZ2dddgZ3e*4 Z5e-6e5j7j8Z9e&e5j:e5j7e-d\e5_:e5_7e*; Z<e&e<j:e<j7e-d\e<_:e<_7dd  Z=d!d" Z>ej?@d#d$d%gd&d' ZAd(d) ZBd*d+ ZCej?@d,d-d.d/gd0d1 ZDej?@d#d$d%gd2d3 ZEd4d5 ZFd6d7 ZGd8d9 ZHd:d; ZId<d= ZJd>d? ZKd@dA ZLdBdC ZMdDdE ZNdFdG ZOej?@d#d$d%gdHdI ZPdJdK ZQej?@d#d$d%gdLdM ZRej?@dNe e5j:e5j7fe e<j:e<j7fgdOdP ZSdQdR ZTej?@dSee#fee$fgdTdU ZUej?@dVeegdWdX ZVdS )Yz6Testing for the boost module (sklearn.ensemble.boost).    N)
csc_matrix)
csr_matrix)
coo_matrix)
dok_matrix)
lil_matrix)assert_array_equalassert_array_less)assert_array_almost_equal)BaseEstimator)clone)DummyClassifierDummyRegressor)LinearRegression)train_test_split)GridSearchCV)AdaBoostClassifier)AdaBoostRegressor)_samme_proba)SVCSVR)DecisionTreeClassifierDecisionTreeRegressor)shuffle)NoSampleWeightWrapper)datasets      foo   random_statec                     s   t dddgdddgddd	gddd
gg  t  jddd d t jf   G  fddd} |  }t|dt  }t|j j t 	|
 sttt j|ddddddg tt j|ddddddg d S )Nr   gư>r   gRQ?g333333?皙?igRQ?g      ?g&.>Zaxisc                       s   e Zd Z fddZdS )z'test_samme_proba.<locals>.MockEstimatorc                    s   t |j j  S N)r   shapeselfXZprobs O/tmp/pip-unpacked-wheel-zrfo1fqw/sklearn/ensemble/tests/test_weight_boosting.pypredict_probaC   s    z5test_samme_proba.<locals>.MockEstimator.predict_probaN)__name__
__module____qualname__r-   r+   r*   r+   r,   MockEstimatorB   s   r1   r    r   )nparrayabssumnewaxisr   	ones_liker   r&   isfiniteallAssertionErrorZargminargmax)r1   ZmockZsamme_probar+   r*   r,   test_samme_proba7   s    "$r<   c                  C   s>   t tt} t t| }t|tt ttdf d S )Nr   )r2   Zoneslenr)   r   fitr	   r-   )Zy_tclfr+   r+   r,   test_oneclass_adaboost_probaT   s    r@   	algorithmSAMMESAMME.Rc                 C   sz   t | dd}|tt t|tt tt	t
t|j |tjttdfks\t|tjttfksvtd S )Nr   rA   r"   r   )r   r>   r)   y_classr   predictT	y_t_classr2   uniqueasarrayclasses_r-   r&   r=   r:   decision_function)rA   r?   r+   r+   r,   test_classification_toy]   s    rM   c                  C   s*   t dd} | tt t| tt d S )Nr   r!   )r   r>   r)   y_regrr   rF   rG   y_t_regr)r?   r+   r+   r,   test_regression_toyh   s    
rP   c                  C   s  t tj} d  }}dD ]}t|d}|tjtj t| |j |	tj}|dkr^|}|}|j
d t| kstt|tjj
d t| kst|tjtj}|dkstd||f t|jdkstttdd |jD t|jkstqd	|_td
t |	tj|  d S )NrB   rC   rA   rB   r   g?z'Failed with algorithm %s and score = %fc                 s   s   | ]}|j V  qd S r%   r!   .0Zestr+   r+   r,   	<genexpr>   s     ztest_iris.<locals>.<genexpr>rC   r   )r2   rI   iristargetr   r>   datar   rK   r-   r&   r=   r:   rL   scoreestimators_setrA   r   r4   )classesZ	clf_sammeZ
prob_sammealgr?   probarY   r+   r+   r,   	test_iriso   s(    
r_   lossZlinearZsquareZexponentialc                 C   st   t | dd}|tjtj |tjtj}|dks8tt|jdksJttt	dd |jD t|jksptd S )Nr   )r`   r"   g?r   c                 s   s   | ]}|j V  qd S r%   r!   rS   r+   r+   r,   rU      s     z test_diabetes.<locals>.<genexpr>)
r   r>   diabetesrX   rW   rY   r:   r=   rZ   r[   )r`   regrY   r+   r+   r,   test_diabetes   s    rc   c                 C   s  t jd}|jdtjjd}|jdtjjd}t| dd}|j	tj
tj|d |tj
}dd |tj
D }|tj
}dd |tj
D }|jtj
tj|d}	d	d |jtj
tj|dD }
t|dkstt||d
  t|dkstt||d
  t|
dkstt|	|
d
  tddd}|j	tj
tj|d |tj
}dd |tj
D }|jtj
tj|d}	dd |jtj
tj|dD }
t|dkstt||d
  t|
dkstt|	|
d
  d S )Nr   
   size)rA   n_estimatorssample_weightc                 S   s   g | ]}|qS r+   r+   rT   pr+   r+   r,   
<listcomp>   s     z'test_staged_predict.<locals>.<listcomp>c                 S   s   g | ]}|qS r+   r+   rj   r+   r+   r,   rl      s     c                 S   s   g | ]}|qS r+   r+   rT   sr+   r+   r,   rl      s    r   )rg   r"   c                 S   s   g | ]}|qS r+   r+   rj   r+   r+   r,   rl      s     c                 S   s   g | ]}|qS r+   r+   rm   r+   r+   r,   rl      s   )r2   randomRandomStaterandintrV   rW   r&   ra   r   r>   rX   rF   staged_predictr-   staged_predict_probarY   staged_scorer=   r:   r	   r   )rA   rngZiris_weightsZdiabetes_weightsr?   ZpredictionsZstaged_predictionsr^   Zstaged_probasrY   Zstaged_scoresr+   r+   r,   test_staged_predict   sF      rv   c                  C   sh   t t d} dddd}t| |}|tjtj tt dd} ddd}t| |}|t	jt	j d S )N)	estimator)r   r   rQ   )rg   estimator__max_depthrA   r   rw   r"   )rg   rx   )
r   r   r   r>   rV   rX   rW   r   r   ra   )boost
parametersr?   r+   r+   r,   test_gridsearch   s    


r|   c                  C   s   dd l } dD ]p}t|d}|tjtj |tjtj}| |}| |}t	||j
ks`t|tjtj}||kstqtdd}|tjtj |tjtj}| |}| |}t	||j
kst|tjtj}||kstd S )Nr   rQ   rR   r!   )pickler   r>   rV   rX   rW   rY   dumpsloadstype	__class__r:   r   ra   )r}   r]   objrY   rn   obj2Zscore2r+   r+   r,   test_pickle   s$    





r   c               	   C   s~   t jdddddddd\} }dD ]X}t|d	}|| | |j}|jd dksRt|d dtjf |dd  k	 s tq d S )
Ni  rd   r    r   Fr   )	n_samples
n_featuresZn_informativeZn_redundantZ
n_repeatedr   r"   rQ   rR   )
r   Zmake_classificationr   r>   feature_importances_r&   r:   r2   r6   r9   )r)   yr]   r?   Zimportancesr+   r+   r,   test_importances   s    


r   c               	   C   sF   t  } td}tjt|d | jttt	
dgd W 5 Q R X d S )Nz*sample_weight.shape == (1,), expected (6,)matchr   rh   )r   reescapepytestraises
ValueErrorr>   r)   rE   r2   rJ   )r?   msgr+   r+   r,   ,test_adaboost_classifier_sample_weight_error  s    
r   c               	   C   s   ddl m}  t|  }|tt tt dd}|tt ddl m} t	| dd}|tt t	t
 dd}|tt ddgddgddgddgg}dd	dd
g}tt dd}tjtdd ||| W 5 Q R X d S )Nr   )RandomForestClassifierrB   rR   )RandomForestRegressorr!   r   r   barr   zworse than randomr   )sklearn.ensembler   r   r>   r)   rN   r   rE   r   r   r   r   r   r   )r   r?   r   ZX_failZy_failr+   r+   r,   test_estimator  s    
r   c               	   C   s@   d} t dddd}tjt| d |tjtj W 5 Q R X d S )Nz+Sample weights have reached infinite values   g      7@rB   )rg   Zlearning_raterA   r   )r   r   warnsUserWarningr>   rV   rX   rW   )r   r?   r+   r+   r,   test_sample_weights_infinite6  s    r   c                  C   s<  G dd dt } tjddddd\}}t|}t||dd	\}}}}tttt	t
fD ]}||}||}	t| d
dddd||}
t| d
dddd||}|
|	}||}t|| |
|	}||}t|| |
|	}||}t|| |
|	}||}t|| |
|	|}|||}t|| |
|	}||}t||D ]\}}t|| qZ|
|	}||}t||D ]\}}t|| q|
|	}||}t||D ]\}}t|| q|
|	|}|||}t||D ]\}}t|| qdd |
jD }tdd |D sTtqTd S )Nc                       s"   e Zd ZdZd fdd	Z  ZS )z-test_sparse_classification.<locals>.CustomSVCz8SVC variant that records the nature of the training set.Nc                    s    t  j|||d t|| _| S z<Modification on fit caries data type for later verification.rh   superr>   r   
data_type_r(   r)   r   ri   r   r+   r,   r>   C  s    
z1test_sparse_classification.<locals>.CustomSVC.fit)Nr.   r/   r0   __doc__r>   __classcell__r+   r+   r   r,   	CustomSVC@  s   r   r         *   )Z	n_classesr   r   r"   r   r!   T)ZprobabilityrB   )rw   r"   rA   c                 S   s   g | ]
}|j qS r+   r   rT   ir+   r+   r,   rl     s     z.test_sparse_classification.<locals>.<listcomp>c                 S   s   g | ]}|t kp|tkqS r+   r   r   rT   tr+   r+   r,   rl     s     )r   r   Zmake_multilabel_classificationr2   Zravelr   r   r   r   r   r   r   r>   rF   r   rL   r	   Zpredict_log_probar-   rY   Zstaged_decision_functionziprr   rs   rt   rZ   r9   r:   )r   r)   r   X_trainX_testy_trainy_testsparse_formatX_train_sparseX_test_sparsesparse_classifierdense_classifiersparse_resultsdense_results
sprase_res	dense_restypesr+   r+   r,   test_sparse_classification=  sz    	   

  


















r   c                  C   s
  G dd dt } tjddddd\}}t||dd	\}}}}tttttfD ]}||}||}	t	|  dd

||}
t	|  dd

|| }}|
|	}||}t|| |
|	}||}t||D ]\}}t|| qdd |
jD }tdd |D sJtqJd S )Nc                       s"   e Zd ZdZd fdd	Z  ZS )z)test_sparse_regression.<locals>.CustomSVRz8SVR variant that records the nature of the training set.Nc                    s    t  j|||d t|| _| S r   r   r   r   r+   r,   r>     s    
z-test_sparse_regression.<locals>.CustomSVR.fit)Nr   r+   r+   r   r,   	CustomSVR  s   r   r   2   r   r   )r   r   	n_targetsr"   r   r!   ry   c                 S   s   g | ]
}|j qS r+   r   r   r+   r+   r,   rl     s     z*test_sparse_regression.<locals>.<listcomp>c                 S   s   g | ]}|t kp|tkqS r+   r   r   r+   r+   r,   rl     s     )r   r   Zmake_regressionr   r   r   r   r   r   r   r>   rF   r	   rr   r   rZ   r9   r:   )r   r)   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r+   r+   r,   test_sparse_regression  sD    	   
    




r   c                  C   sF   G dd dt } t|  dd}|tt t|jt|jksBtdS )z
    AdaBoostRegressor should work without sample_weights in the base estimator
    The random weighted sampling is done internally in the _boost method in
    AdaBoostRegressor.
    c                   @   s   e Zd Zdd Zdd ZdS )z=test_sample_weight_adaboost_regressor.<locals>.DummyEstimatorc                 S   s   d S r%   r+   )r(   r)   r   r+   r+   r,   r>     s    zAtest_sample_weight_adaboost_regressor.<locals>.DummyEstimator.fitc                 S   s   t |jd S )Nr   )r2   zerosr&   r'   r+   r+   r,   rF     s    zEtest_sample_weight_adaboost_regressor.<locals>.DummyEstimator.predictN)r.   r/   r0   r>   rF   r+   r+   r+   r,   DummyEstimator  s   r   r    )rg   N)	r
   r   r>   r)   rN   r=   Zestimator_weights_Zestimator_errors_r:   )r   rz   r+   r+   r,   %test_sample_weight_adaboost_regressor  s    r   c                  C   s   t jd} | ddd}| ddgd}| d}ttdd}||| || |	| t
t }||| || dS )zX
    Check that the AdaBoost estimators can work with n-dimensional
    data matrix
    r   r   r    r   Zmost_frequent)ZstrategyN)r2   ro   rp   Zrandnchoicer   r   r>   rF   r-   r   r   )ru   r)   Zycyrrz   r+   r+   r,   test_multidimensional_X  s    



r   c              	   C   s\   t jt j }}tt }t|| d}d|jj}t	j
t|d ||| W 5 Q R X d S )N)rw   rA   z {} doesn't support sample_weightr   )rV   rX   rW   r   r   r   formatr   r.   r   r   r   r>   )rA   r)   r   rw   r?   err_msgr+   r+   r,   -test_adaboostclassifier_without_sample_weight  s    
r   c            
      C   sR  t jd} t jdddd}d| d | |jd d  }|d	d
}|d	  d9  < d|d	< tt d
dd}t	|}t	|}|
|| |
|d d	 |d d	  t |}d|d	< |j
|||d ||d d	 |d d	 }||d d	 |d d	 }||d d	 |d d	 }	||k s,t||	k s:t|t|	ksNtd S )Nr   r   d     )numg?r#   g-C6?r   r   rd   i'  rw   rg   r"   rh   )r2   ro   rp   ZlinspaceZrandr&   Zreshaper   r   r   r>   r7   rY   r:   r   Zapprox)
ru   r)   r   Zregr_no_outlierZregr_with_weightZregr_with_outlierri   Zscore_with_outlierZscore_no_outlierZscore_with_weightr+   r+   r,   $test_adaboostregressor_sample_weight  s0       
r   c                 C   sZ   t tjddddi\}}}}t| dd}||| ttj||dd|	| d S )NT)Z
return_X_yr"   r   rD   r   r$   )
r   r   Zload_digitsr   r>   r   r2   r;   r-   rF   )rA   r   r   r   r   modelr+   r+   r,    test_adaboost_consistent_predict"  s    
 r   zmodel, X, yc              	   C   sD   t |}d|d< d}tjt|d | j|||d W 5 Q R X d S )Nir   z1Negative values in data passed to `sample_weight`r   rh   )r2   r7   r   r   r   r>   )r   r)   r   ri   r   r+   r+   r,   #test_adaboost_negative_weight_error2  s
    
r   c                  C   s~   t jd} | jdd}| jddgdd}t |d }tdd	d
}t|dd	d}|j|||d t 	|j
 dksztdS )zCheck that we don't create NaN feature importance with numerically
    instable inputs.

    Non-regression test for:
    https://github.com/scikit-learn/scikit-learn/issues/20320
    r   )r   rd   re   r   r   r   gtDS 'T	rd      )Z	max_depthr"      r   rh   N)r2   ro   rp   normalr   r7   r   r   r>   isnanr   r5   r:   )ru   r)   r   ri   treeZ	ada_modelr+   r+   r,   Ftest_adaboost_numerically_stable_feature_importance_with_small_weightsB  s    r   zAdaBoost, Estimatorc              	   C   s^   t ddgddgg}t ddg}| | d}d}tjt|d ||| W 5 Q R X d S )	Nr   r   r       r   )Zbase_estimatorzV`base_estimator` was renamed to `estimator` in version 1.2 and will be removed in 1.4.r   )r2   r3   r   r   FutureWarningr>   )AdaBoostZ	Estimatorr)   r   r   warn_msgr+   r+   r,   'test_base_estimator_argument_deprecatedT  s    r   r   c              	   C   s^   t ddgddgg}t ddg}|  }||| d}tjt|d |j W 5 Q R X d S )Nr   r   r    r   r   zoAttribute `base_estimator_` was deprecated in version 1.2 and will be removed in 1.4. Use `estimator_` instead.r   )r2   r3   r>   r   r   r   Zbase_estimator_)r   r)   r   r   r   r+   r+   r,   'test_base_estimator_property_deprecatedi  s    r   )Wr   Znumpyr2   r   r   Zscipy.sparser   r   r   r   r   Zsklearn.utils._testingr   r   r	   Zsklearn.baser
   r   Zsklearn.dummyr   r   Zsklearn.linear_modelr   Zsklearn.model_selectionr   r   r   r   r   Z!sklearn.ensemble._weight_boostingr   Zsklearn.svmr   r   Zsklearn.treer   r   Zsklearn.utilsr   Zsklearn.utils._mockingr   Zsklearnr   ro   rp   ru   r)   rE   rN   rG   rH   rO   Z	load_irisrV   ZpermutationrW   rf   permrX   Zload_diabetesra   r<   r@   markZparametrizerM   rP   r_   rc   rv   r|   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r+   r+   r+   r,   <module>   s   (

  	

"

-]0
	&

	
