U
    3d-                     @   s  d Z ddlZddlZddlZddlmZ ddlmZ ddlmZ ddlm	Z	 ddl
mZ ddlmZ dd	lmZ dd
l
mZ ddlmZmZmZ ddlmZ ddlmZ ddlmZmZ ddlmZmZ e Ze Z dd Z!dd Z"dd Z#dd Z$dd Z%dd Z&dd Z'ej()ddd gd!d" Z*d#d$ Z+d%d& Z,d'd( Z-d)d* Z.d+d, Z/ed-ef d.d/id0ej()d1d2d3gd4d5 Z0ed-ef d.d6id0ej()d1d2d3gd7d8 Z1d9d: Z2d;d< Z3d=d> Z4dS )?zD
Testing for Isolation Forest algorithm (sklearn.ensemble.iforest).
    N)assert_array_equal)assert_array_almost_equal)ignore_warnings)assert_allclose)ParameterGrid)IsolationForest)_average_path_length)train_test_split)load_diabetes	load_irismake_classification)check_random_state)roc_auc_score)
csc_matrix
csr_matrix)Mockpatchc              	   C   s   t ddgddgg}t ddgddgg}tdgdddgddgd	}t . |D ]"}tf d
| i||| qTW 5 Q R X dS )z6Check Isolation Forest for various parameter settings.r                  ?      ?TF)n_estimatorsmax_samples	bootstraprandom_stateN)nparrayr   r   r   fitpredict)global_random_seedX_trainX_testgridparams r%   G/tmp/pip-unpacked-wheel-zrfo1fqw/sklearn/ensemble/tests/test_iforest.pytest_iforest$   s    r'   c                 C   s   t | }ttjdd |d\}}tddgddgd}ttfD ]p}||}||}|D ]V}tf d	| d
||}	|		|}
tf d	| d
||}|	|}t
|
| qXq@dS )z=Check IForest for various parameter settings on sparse input.N2   r   r   r   TF)r   r   
   )r   r   )r   r	   diabetesdatar   r   r   r   r   r   r   )r    rngr!   r"   r#   Zsparse_formatZX_train_sparseZX_test_sparser$   Zsparse_classifierZsparse_resultsZdense_classifierZdense_resultsr%   r%   r&   test_iforest_sparse4   s4     
 
r.   c               	   C   s   t j} d}tjt|d tdd|  W 5 Q R X t " t	dt tdd|  W 5 Q R X t ( t	dt tt
dd|  W 5 Q R X tt( t | | ddd	df  W 5 Q R X dS )
z7Test that it gives proper exception on deficient input.3max_samples will be set to n_samples for estimationmatch  r   errorautor   Nr   )irisr,   pytestwarnsUserWarningr   r   warningscatch_warningssimplefilterr   Zint64Zraises
ValueErrorr   )Xwarn_msgr%   r%   r&   test_iforest_errorN   s    

 r@   c               	   C   sF   t j} t | }|jD ](}|jttt	| j
d kstqdS )zDCheck max_depth recalculation when max_samples is reset to n_samplesr   N)r6   r,   r   r   estimators_Z	max_depthintr   ceillog2shapeAssertionError)r>   clfZestr%   r%   r&   test_recalculate_max_depthd   s    
rH   c               	   C   s   t j} t | }|j| jd ks&ttdd}d}tjt	|d ||  W 5 Q R X |j| jd kslttdd| }|jd| jd  kstd S )Nr   i  r3   r/   r0   g?)
r6   r,   r   r   max_samples_rE   rF   r7   r8   r9   )r>   rG   r?   r%   r%   r&   test_max_samples_attributel   s    
rJ   c                 C   s   t | }ttj|d\}}td| d|}|jdd ||}|jdd ||}t|| td| d|}||}t|| dS )zCheck parallel regression.r)   r   )n_jobsr   r   )rK   r   N)	r   r	   r+   r,   r   r   
set_paramsr   r   )r    r-   r!   r"   Zensembley1y2Zy3r%   r%   r&    test_iforest_parallel_regression{   s    



rO   c           	      C   s   t | }d|dd }|t|d |d f}|dd }|jdddd	}t|dd |f}td
gd dgd  }td|d|}|	| }t
||dkstdS )z#Test Isolation Forest performs wellg333333?iX  r   Nr2   r   )   r   )lowhighsizer   rQ   d   )r   r   g\(\?)r   randnZpermutationr   Zvstackuniformr   r   r   decision_functionr   rF   )	r    r-   r>   r!   Z
X_outliersr"   y_testrG   Zy_predr%   r%   r&   test_iforest_performance   s    rZ   contamination      ?r5   c              	   C   s   ddgddgddgddgddgddgddgddgg}t || d	}|| || }||}t|dd  t|d d kstt|d
dg ddg   d S )NrP   r   r         	   )r   r[      )	r   r   rX   r   r   minmaxrF   r   )r[   r    r>   rG   Zdecision_funcpredr%   r%   r&   test_iforest_works   s    4

(rf   c                  C   s&   t j} t | }|j|jks"td S N)r6   r,   r   r   rI   Z_max_samplesrF   )r>   rG   r%   r%   r&   test_max_samples_consistency   s    rh   c                  C   sV   t d} ttjd d tjd d | d\}}}}tdd}||| || d S )Nr   r(   r)   g?)Zmax_features)r   r	   r+   r,   targetr   r   r   )r-   r!   r"   Zy_trainrY   rG   r%   r%   r&    test_iforest_subsampled_features   s      
rj   c                  C   s   dt dt j  d } dt dt j  d }ttdgdg ttdgdg ttd	gd
g ttdg| g ttdg|g ttt dd	ddgdd
| |g tt d}t|t | d S )N       @g      @g?g     0@g}?r   g        r   r   r      i  )	r   logZeuler_gammar   r   r   Zaranger   sort)Z
result_oneZ
result_twoZavg_path_lengthr%   r%   r&    test_iforest_average_path_length   s    
ro   c                  C   s   ddgddgddgg} t dd| }t  | }t|ddgg|ddgg|j  t|ddgg|ddgg|j  t|ddgg|ddgg d S )Nr   r   g?)r[   rk   )r   r   r   Zscore_samplesrX   Zoffset_)r!   Zclf1Zclf2r%   r%   r&   test_score_samples   s     rp   c                  C   sv   t d} | dd}tdd| dd}|| |jd }|jdd || t|jdks`t|jd |ksrtdS )	z/Test iterative addition of iTrees to an iForestr      r   r*   T)r   r   r   Z
warm_start)r   N)r   rV   r   r   rA   rL   lenrF   )r-   r>   rG   Ztree_1r%   r%   r&   test_iforest_warm_start   s       


rs   z*sklearn.ensemble._iforest.get_chunk_n_rowsZreturn_valuer   )Zside_effectzcontamination, n_predict_calls)r\   r   )r5   r   c                 C   s   t || | j|kstd S rg   rf   Z
call_countrF   Zmocked_get_chunkr[   Zn_predict_callsr    r%   r%   r&   test_iforest_chunks_works1  s    
rv   r*   c                 C   s   t || | j|kstd S rg   rt   ru   r%   r%   r&   test_iforest_chunks_works2  s    
rw   c                  C   s|  t d} t }||  t jd}t|| dks<tt||	dddksZtt|| d dksttt|| d dkstt 
|	dddd} t }||  t|| dkstt||	dddkstt|t ddkst|	dd} t }||  t|| dks:tt||	dddksZtt|t ddksxtdS )z=Test whether iforest predicts inliers when using uniform data)rU   r*   r   r   rU   r*   N)r   Zonesr   r   randomZRandomStateallr   rF   rV   repeat)r>   Ziforestr-   r%   r%   r&   test_iforest_with_uniform_data  s(    



 r{   c                  C   s2   t dddd\} }t| } tdddd|  d	S )
zdCheck that Isolation Forest does not segfault with n_jobs=2

    Non-regression test for #23252
    iL rU   r   )Z	n_samplesZ
n_featuresr   r*      r   )r   r   rK   N)r   r   r   r   )r>   _r%   r%   r&   *test_iforest_with_n_jobs_does_not_segfault?  s    r~   c               	   C   s^   t ddgddgg} t ddg}t }|| | d}tjt|d |j W 5 Q R X d S )Nr   r   r   r_   r   zoAttribute `base_estimator_` was deprecated in version 1.2 and will be removed in 1.4. Use `estimator_` instead.r0   )r   r   r   r   r7   r8   FutureWarningZbase_estimator_)r>   ymodelr?   r%   r%   r&   'test_base_estimator_property_deprecatedJ  s    r   )5__doc__r7   r:   Znumpyr   Zsklearn.utils._testingr   r   r   r   Zsklearn.model_selectionr   Zsklearn.ensembler   Zsklearn.ensemble._iforestr   r	   Zsklearn.datasetsr
   r   r   Zsklearn.utilsr   Zsklearn.metricsr   Zscipy.sparser   r   Zunittest.mockr   r   r6   r+   r'   r.   r@   rH   rJ   rO   rZ   markZparametrizerf   rh   rj   ro   rp   rs   rv   rw   r{   r~   r   r%   r%   r%   r&   <module>   s^   
"