U
    2d9                 %   @   sV	  d Z ddlZddlZddlZddlZddlmZmZm	Z	 ddl
mZ ddlmZ ddlmZ ddlmZ ddlmZ dd	lmZ dd
lmZ ddlmZ ddlmZ ddlmZ ddlmZ ddlmZ ddlmZ ddlmZ ddlmZ ddlm Z  ddlm!Z! ddlm"Z" ddlm#Z# ddlm$Z$ ddlm%Z% ddlm&Z& ddlm'Z' ddlm(Z( ddlm)Z) ddlm*Z* ddlm+Z+ ddlm,Z, dd lm-Z- dd!lm.Z. dd"l/m0Z0 dd#l1m2Z2 dd$l1m3Z3 dd%l1m4Z4 dd&l5m6Z6 dd'l5m7Z7 dd(l8m9Z9 e:d)Z;e<d)d* Z=ee>d+Z?e@d,d,d,d,d*d*d*d-d-d-d-d-ge@dddd,d,d,d*d*d*d-d-d-ge@dd,d*d-dd,d*d-dd,d*d-dd,d*ge@d,d,d*d*d*d-d-d-d.d.d.d.d.d.d.d.gd,d,d,d,d*d*d*d-d-d-d-d-gd/d/d/d/d0d0d0d1d1d1d1d1gfZAe6 ZBed2d3 ZCd4d5 ZDd d6d7ZEd8d9 ZFd:d; ZGd<d= ZHd>d? ZId@dA ZJejKLdBdCdDgejKLdEd.d+dFdGdHdId)gejKLdJee.gdKdL ZMejKLdBdCdDgejKLdEd.dFdGgejKLdJee.gdMdN ZNdOdP ZOejKLdJee.gdQdR ZPdSdT ZQejKLdJeee.gdUdV ZRdWdX ZSdYdZ ZTd[d\ ZUd]d^ ZVejKLd_e@dgdF d,gdF  e@d,d,d*d*d-d-d.d.d+d+dFdFgeWd`d`gd`d`gd`d`ggfe@dgdI d,gd-  e@d,d,d,d*d*d*d-d-d-d.d+dFgeWdadbgdadbgdadbggfgdcdd ZXejKLdedfdgdhdigejKLdjd+dkdlgdmdn ZYdodp ZZejKLdqe%e'gejKLdrdsdtdugdvdw Z[ejKLdrdxdydzgd{d| Z\ed}d~ Z]dd Z^dd Z_dd Z`dd Zadd Zbdd Zcdd Zddd Zedd Zfdd Zgdd Zhdd Ziedd ZjejKLde,e-gdd Zkdd Zldd Zmdd Zndd Zodd ZpejKLdddddddddgdd ZqejKLdddddddgdd ZrejKLdrddtdugdd Zsdd Ztdd Zuedd ZvddÄ Zwddń ZxddǄ ZyejKLddddddddgddф Zzddӄ Z{ddՄ Z|ddׄ Z}ddل Z~ddۄ ZejKLdJee.gdd݄ Zdd߄ Zdd Zdd Zdd Zdd Zdd Zdd ZejKLde%e&e'fdd Zdd Zdd Zdd ZejKLdeee.fdd ZejKLde dDfedDdddDfe dDfedDdddDfe.dDdddDfe. dDfe,dddDfe-dddDfe%dddDfe&dddDfe'dddDfe dDfe  dDfe! dDfe" dDfe$d*ddDfe#d*ddDfedDdddCfedDdddCfedDejdddCfedDejdddCfe,dddCfe,ejdddCfe-dddCfe-ejdddCfe%dddCfe%ejdddCfe&dddCfe&ejdddCfe'dddCfe'ejdddCfgdd ZdS (  zTest the split module    N)
coo_matrix
csc_matrix
csr_matrix)stats)comb)combinations)combinations_with_replacement)permutations)assert_allclose)assert_array_almost_equal)assert_array_equal)ignore_warnings)_num_samples)MockDataFrame)cross_val_score)KFold)StratifiedKFold)
GroupKFold)TimeSeriesSplit)LeaveOneOut)LeaveOneGroupOut)	LeavePOut)LeavePGroupsOut)ShuffleSplit)GroupShuffleSplit)StratifiedShuffleSplit)PredefinedSplit)check_cv)train_test_split)GridSearchCV)RepeatedKFold)RepeatedStratifiedKFold)StratifiedGroupKFold)DummyClassifier)_validate_shuffle_split_build_repr)_yields_constant_splits)load_digits)make_classification)SVC
                  123c            "      C   s<  d} d}d}d}d}t ddgddgddgdd	gg}t ddddg}t ddddg}t ddddg}t }	t|}
t|}t|}t }t|}td
d}t	ddddg}t
|}d}d}d}d}d}d}d}d}d}| t| ||||t|||d|g	}tt|	|
|||||||g	|||||||||g	D ]\}\}}|| ||||ksZtt jt||||t|||| ||||D ]:\}} t |jjdkstt | jjdkstq|t|ks2tq2d}!tjt|!d |	d || W 5 Q R X tjt|!d |
d || W 5 Q R X d S )Nr0   r,   r+   r.   r/   r-            r   random_statezLeaveOneOut()zLeavePOut(p=2)z3KFold(n_splits=2, random_state=None, shuffle=False)z=StratifiedKFold(n_splits=2, random_state=None, shuffle=False)LeaveOneGroupOut()LeavePGroupsOut(n_groups=2)zJShuffleSplit(n_splits=10, random_state=0, test_size=None, train_size=None)z.PredefinedSplit(test_fold=array([1, 1, 2, 2]))zBStratifiedGroupKFold(n_splits=2, random_state=None, shuffle=False)iz%The 'X' parameter should not be None.match)nparrayr   r   r   r   r   r   r   r   r"   r   	enumeratezipget_n_splitsAssertionErrortestingassert_equallistsplitasarraydtypekindreprpytestraises
ValueError)"	n_samplesZn_unique_groupsn_splitspZn_shuffle_splitsXZX_1dygroupsZlooZlpokfskfloloZloposspssgkfZloo_reprZlpo_reprZkf_reprZskf_reprZ	lolo_reprZ	lopo_reprZss_reprZps_reprZ	sgkf_reprZn_splits_expectedr;   cvZcv_reprtraintestmsg r_   L/tmp/pip-unpacked-wheel-zrfo1fqw/sklearn/model_selection/tests/test_split.py(test_cross_validator_with_default_paramsA   s    "
 ra   c                  C   sJ  d} t jd}|jdd| dfd}|jdd| fd}|dd}|jdd| dfd}|jdd| fd}t tddt t t	 t
 t t td	d
t t tddtddt t|dg}|D ]}t|||| t|||| zt|||| W q tk
rB }	 z$d}
d|
}|t|	ks2tW 5 d }	~	X Y qX qd S )N   r.   r   r/   r,   )sizerQ         ?	test_sizen_groupsrP   )Z	test_fold)binaryZ
multiclassz/Supported target types are: {}. Got 'multilabel)r>   randomRandomStaterandintreshaper   r   r   r   r    r!   r"   r   r   r   r   r   r   r   r   rF   rG   rN   formatstrrC   )rO   rngrR   rS   Zy_2dy_multilabelrT   Z	splittersZsplittereZallowed_target_typesr^   r_   r_   r`   	test_2d_y   sD    rv   c                 C   sL   t | t | } }| |t  ks&t|d k	rH| |t t|ksHtd S N)setintersectionrC   unionrange)r\   r]   rO   r_   r_   r`   check_valid_split   s    r|   c           
      C   s   t |}| ||||kstt }d}| |||D ](\}}	t||	|d |d7 }||	 q6||kslt|d k	r|tt|kstd S )Nr   )rO   r.   )r   rB   rC   rx   rG   r|   updater{   )
r[   rR   rS   rT   expected_n_splitsrO   Zcollected_test_samplesZ
iterationsr\   r]   r_   r_   r`   check_cv_coverage   s    r   c               	   C   sB  t ddgddgddgg} t ddgddgddgddgd	d
gg}tttd| f t dddddg}td}tjt	dd t||| W 5 Q R X t
d}t t|}tjt	dd t|||| W 5 Q R X t " td t|||d dd W 5 Q R X t " td t||||dd W 5 Q R X t dddddg}tt t||| W 5 Q R X tt t||| W 5 Q R X tt td W 5 Q R X tt td W 5 Q R X d}tjt|d td W 5 Q R X tjt|d td W 5 Q R X tjt|d t
d W 5 Q R X tjt|d t
d W 5 Q R X tt td W 5 Q R X tt td W 5 Q R X tt td W 5 Q R X tt td W 5 Q R X tt t
d W 5 Q R X tt t
d W 5 Q R X tt tdd d W 5 Q R X d S )Nr.   r,   r/   r0   r-   r4   r5   r6   	   r+   rd   zThe least populated classr<   ignorerT   r~   r   z>k-fold cross-validation requires at least one train/test split      ?       @rP   shuffle)r>   r?   rN   nextr   rG   r   rL   ZwarnsWarningr"   arangelenwarningscatch_warningssimplefilterr   rM   	TypeError)X1X2rS   Zskf_3Zsgkf_3Znaive_groupsZerror_stringr_   r_   r`   test_kfold_valueerrors   sb    (



r   c                  C   sb   t d} td}t|| d d dd t d}td}t||d d dd dtd|ks^td S )N   r/   )rS   rT   r~      r-   )r>   onesr   r   rB   rC   )r   rU   r   r_   r_   r`   test_kfold_indices%  s    

r   c                  C   s   ddgddgddgddgd	d
gg} t d| d d }t|\}}t|ddg t|ddg t|\}}t|ddg t|ddg t d| }t|\}}t|dddg t|ddg t|\}}t|ddg t|dddg d S )Nr.   r,   r/   r0   r-   r4   r5   r6   r   r+   rd   r   )r   rG   r   r   )r   splitsr\   r]   r_   r_   r`   test_kfold_no_shuffle5  s    "r   c                  C   s  t dddddg } }td| |}t|\}}t|ddg t|ddg t|\}}t|ddg t|ddg t ddddddddg } }td| |}t|\}}t|ddddg t|dddg t|\}}t|dddg t|ddddg dtd| |kstt d} d	d	d	d
d
d
d
g}dddddddg}t j	t
td| |t
td| | ddddddddg}t |} t j	t
td| |t
td| | d S )Nr0   r.   r   r,   r/   r5   r-   r4   r1   0)r>   r   r   rG   r   r   rB   rC   rD   rE   rF   	ones_liker   )rR   rS   r   r\   r]   y1y2r_   r_   r`    test_stratified_kfold_no_shuffleL  s<    
 
 r   r   FTkr4   r5   r6   r   kfoldc                 C   s  d}t |}t dgtd|  dgtd|   dgtd|   }t t|}t |t| }g }|svd nd}	|| |	|d}
|
j|||d	D ]V\}}tt || t| |d
d tt || t| |d
d |	t| qt 
|dkstd S )N  r0   皙?r   {Gz?r.   {Gz?r8   r   rT   {Gz?Zatol)r>   r   r?   intr   r   bincountrG   r
   appendptprC   )r   r   r   rO   rR   rS   rT   distr
test_sizesr8   rV   r\   r]   r_   r_   r`   test_stratified_kfold_ratiosw  s&    
  r   c           
         s   d}t dgtd|  dgtd|   dgtd|   }t t| t t| fdd	}||}tdddgD ]$}t ||}||}	|	|kstqd S )
Nd   r,   r   r   r   r.   r   c                    s0   sd nd}dd |dj  | dD S )Nr   c                 S   s    g | ]\}}t |t |fqS r_   )rF   ).0r\   r]   r_   r_   r`   
<listcomp>  s   zNtest_stratified_kfold_label_invariance.<locals>.get_splits.<locals>.<listcomp>r   r   )rG   )rS   r8   rR   rT   r   r   r   r_   r`   
get_splits  s        z:test_stratified_kfold_label_invariance.<locals>.get_splits)	r>   r?   r   r   r   r   r	   ZtakerC   )
r   r   r   rO   rS   r   Zsplits_basepermZy_permZsplits_permr_   r   r`   &test_stratified_kfold_label_invariance  s     	r   c                  C   sf   t ddD ]V} tdjt| d}dd |D }t|t| dksNtt|| ks
tq
d S )N   r   r-   rR   c                 S   s   g | ]\}}t |qS r_   r   r   _r]   r_   r_   r`   r     s     z&test_kfold_balance.<locals>.<listcomp>r.   )	r{   r   rG   r>   r   maxminrC   sum)r;   rU   sizesr_   r_   r`   test_kfold_balance  s
    r   c           	   	   C   s   t d}dgd dgd  }t t|}dD ]}| d|d}tddD ]f}||d | |d | |d | }d	d
 |D }t |t | dkstt 	||ksJtqJq0d S )Nr   r   r/   r.      TFr   r   c                 S   s   g | ]\}}t |qS r_   r   r   r_   r_   r`   r     s     z0test_stratifiedkfold_balance.<locals>.<listcomp>)
r>   r   r   r   r{   rG   r   r   rC   r   )	r   rR   rS   rT   r   r[   r;   rV   r   r_   r_   r`   test_stratifiedkfold_balance  s    
&r   c                  C   s   t d} t dddd}t dddd}td}td}t| |||||D ]T\\}}\}}\}	}
t|||	fdD ]$\}}tt||t|ks|t	q|d||< qVt
|dkst	d S )Nr/   Tr   r   r8   r.   ,  r,   )r   r>   r   zerosrA   rG   r   r   intersect1drC   r   )rU   kf2Zkf3rR   Z	all_foldsZtr1Zte1Ztr2Zte2Ztr3Zte3Ztr_aZtr_br_   r_   r`   test_shuffle_kfold  s    

  
r   c                 C   s  t d}dgd dgd  }t t|}t d}dgd dgd  }t t|}| dddd	}t jt||||t|||| | ddt j	dd	}t
||f||f||fD ]P}t
|j| |j| D ]4\\}	}
\}	}tt t j|
| W 5 Q R X qqd S )
N   r   r5   r.   r6      r/   Tr   )r>   r   r   r   rD   rE   rF   rG   rm   rn   rA   rL   rM   rC   r   )r   rR   rS   Zgroups_1r   r   Zgroups_2rU   datar   Ztest_aZtest_br_   r_   r`   2test_shuffle_kfold_stratifiedkfold_reproducibility  s     

 &r   c                  C   s  t d} dgd dgd  }tdddd}tdddd}t|| ||| |D ]$\\}}\}}t|t|ksTtqTt|| |d dd t d	}dgd dgd  }tdddd}tdddd}t	d
d |||D }	t	dd |||D }
|	|
kstd S )N(   r      r.   r-   Tr   r   r+   c                 S   s   g | ]}t |d  qS r.   tupler   sr_   r_   r`   r     s     z0test_shuffle_stratifiedkfold.<locals>.<listcomp>c                 S   s   g | ]}t |d  qS r   r   r   r_   r_   r`   r     s     )
r>   r   r   rA   rG   rx   rC   r   r   sorted)ZX_40rS   Zkf0Zkf1r   Ztest0test1rR   r   Z	test_set1Z	test_set2r_   r_   r`   test_shuffle_stratifiedkfold  s    
*
r   c                  C   s   t jd d t jd d  } }tddd}d}t|dd}t|| ||d }d	|ksZt|d
ksftt|ddd}t|| ||d }|d	kstt|ddd}t|| ||d }|d	kstt|}t|| ||d }d|kst|d
kstd S )NiX  r+   g{Gzt?)Cgammar/   Fr   r[   gq=
ףp?皙?Tr   r   r.   gGz?)	digitsr   targetr*   r   r   meanrC   r   )rR   rS   modelrP   r[   Z
mean_scorer_   r_   r`   1test_kfold_can_detect_dependent_samples_on_digits  s"    		r   c                  C   s   t dd} tdgd dgd  }t|dd}td}t|t| }g }| |||D ]t\}}t	|| || j
dksttt|| t| |d	d
 tt|| t| |d	d
 |t| qdt|dkstd S )Nr/   rk   r.   r4   r      rd   )r.   r,   r/   r0   r-   r4   r.   r.   r,   r,   r/   r/   r0   r0   r-   r-   r4   r4   r   r   )r"   r>   r?   r   rp   rH   r   r   rG   r   rc   rC   r
   r   r   )rZ   rS   rR   rT   r   r   r\   r]   r_   r_   r`   #test_stratified_group_kfold_trivialI  s    

  r   c            
      C   s  t dd} tdgd dgd  }t|dd}tddddd	d	dddddd	d
d
d
dddg}tddgddgddgg}g }t| ||||D ]\\\}}}t|| || j	dkst
t|| t| }	t|	|dd |t| qt|dks
t
d S )Nr/   rk   r.   r4   r   r   rd   r,   r0   r-   g-?gsh|??gZd;O?gZd;O?rf   MbP?r   )r"   r>   r?   r   rp   rH   rA   rG   r   rc   rC   r   r   r
   r   r   )
rZ   rS   rR   rT   expectedr   r\   r]   expect_dist
split_distr_   r_   r`   'test_stratified_group_kfold_approximate[  s    
. r   zy, groups, expectedrf         ?      ?c           	      C   s   t dd}t| dd}t||| ||D ]N\\}}}t|| || jdksZtt	| | t
| }t||dd q0d S )Nr/   rk   rd   r.   r   r   r   )r"   r>   r   rp   rA   rG   r   rc   rC   r   r   r
   )	rS   rT   r   rZ   rR   r\   r]   r   r   r_   r_   r`   .test_stratified_group_kfold_homogeneous_groupsm  s    
 r   	cls_distr)g?333333?)g333333?ffffff?)皙?r   )r   r   rj   rb   F   c                 C   s*  d}t |d}t|d}tjd}d}|jd|| d}t|dd}|||}	|j|||	d	}
|j|||	d	}d}d}t	|
|D ]~\\}}\}}t
|	| |	| jdkstt|| t| }t|| t| }|tj|| d
7 }|tj|| d
7 }q|| }|| }||ks&td S )Nr-   rk   r   r   r,   )rc   rQ   rd   r.   r   )Zqk)r"   r   r>   rm   rn   choicer   rp   rG   rA   r   rc   rC   r   r   r   Zentropy)r   rj   rP   rZ   Zgkfrs   Zn_pointsrS   rR   gZ
sgkf_foldsZ	gkf_foldsZ	sgkf_entrZgkf_entrZ
sgkf_trainZ	sgkf_testr   Zgkf_testZ
sgkf_distrZ	gkf_distrr_   r_   r`   /test_stratified_group_kfold_against_group_kfold  s*    

r   c                  C   s   t dddt} t dddt}t tdddt}t tdddt}t| |||D ]x\}}}}t|d |d  t|d |d  t|d |d  t|d |d  t|d |d  t|d |d  q`d S )Nr   r   rh   r8   r,   r.   )r   rG   rR   r>   Zint32r   rA   r   )Zss1Zss2Zss3Zss4t1t2t3Zt4r_   r_   r`   test_shuffle_split  s    r   split_classztrain_size, exp_train, exp_test)Nr   r.   )r6   r6   r,   )r   r6   r,   c                 C   sR   t d}t d}t| |d||\}}t||ks>tt||ksNtd S Nr+   
train_size)r>   r   r   rG   r   rC   )r   r   	exp_trainexp_testrR   rS   X_trainX_testr_   r_   r`   $test_shuffle_split_default_test_size  s
    

r   )Nr6   r,   )r5   r5   r/   )r   r5   r/   c                 C   s\   t d}t d}td}tt| d|||\}}t||ksHtt||ksXtd S r   )r>   r   r{   r   r   rG   r   rC   )r   r   r   rR   rS   rT   r   r   r_   r_   r`   *test_group_shuffle_split_default_test_size  s    

r   c                  C   s0  t d} t dddddddg}tt ttddd| | W 5 Q R X tt ttddd| | W 5 Q R X tt  ttdddd| | W 5 Q R X t d	} t dddddddddg	}tt ttdd
| | W 5 Q R X tt ttdd| | W 5 Q R X d S )Nr5   r   r.   r,   r/   r   rg   rh   r   r   r   )	r>   r   rH   rL   rM   rN   r   r   rG   rR   rS   r_   r_   r`   "test_stratified_shuffle_split_init  s    
""$
 r  c                  C   s   t dddddddddddddddg} d}d}td||ddt t| | }|D ](\}}t||ksntt||ksVtqVd S )	Nr   r.   r,   r/   r-   r+   r4   )rh   r   r8   )r>   r?   r   rG   r   r   rC   )rS   rh   r   sssr\   r]   r_   r_   r`   0test_stratified_shuffle_split_respects_test_size  s    (    r  c            	      C   s8  t ddddddddddddgt ddddddddddddgt dddddddddddddddgd t ddddddddddddddddgt dgd dgd  t d	d
 tdD ddddddddddddgddddddddddddgg} | D ]4}tddddt t||}t |}t 	dt| }t|| }|D ]\}}t
t || t ||  t t j|| ddd tt||  }t t j|| ddd tt||  }t||d t|t| |jkstt||kstt||kstt
t jj||g  qNqd S )Nr.   r,   r/   r   r0   rd   i   2   c                 S   s   g | ]}|gd |  qS )r   r_   )r   r;   r_   r_   r`   r     s     z6test_stratified_shuffle_split_iter.<locals>.<listcomp>r   r1   r2   r3   r4   gQ?r   T)Zreturn_inverse)r>   r?   Zconcatenater{   r   rG   r   r   Z
asanyarrayceilr   uniquer   floatr   rc   rC   libZarraysetopsr   )	ZysrS   r  rh   r   r\   r]   Zp_trainZp_testr_   r_   r`   "test_stratified_shuffle_split_iter  s<      *(
 


r  c                     s  d} d  fdd}dD ]x}t |d ddg }t d	|  dd
}dg| }dg| }d}|jt ||dD ]D\}}	|d7 }||f||	ffD ]"\}
}|D ]}|
|  d7  < qqqr| kstt|d	|  d	d	|   d\}}t||kstt|	|ksttt|	|	dks tt 
|}|jd	|  ks>t|| t|ksTtt|dksftt|| }t|| }||| ||| qd S )Nr-   r   c                    s<   d  }t  |}| D ]}||}||kstdqd S )N皙?z=An index is not drawn with chance corresponding to even draws)r   ZbinomZpmfrC   )Z
idx_countsrQ   	thresholdZbfcountZprobrk   r_   r`   assert_counts_are_ok"  s    
z@test_stratified_shuffle_split_even.<locals>.assert_counts_are_ok)r4      r,   r   r.         ?rP   rh   r8   r  r  )r>   r?   r   rG   r   rC   r$   r   rx   ry   r  rh   r	  )Zn_foldsr  rO   rT   r   Ztrain_countsZtest_countsn_splits_actualr\   r]   counteridsidZn_trainZn_testZgroup_countsZ	ex_test_pZ
ex_train_pr_   rk   r`   "test_stratified_shuffle_split_even  sF    
  

  



r  c                  C   s|   ddddgd ddgd  } t | }tdddd}t|j|| d	\}}tt ||g  tt ||t t	|  d S )
Nr   r.   r,   r/   r0   r-   rf   r  r  )
r>   r   r   r   rG   r   r   union1dr   r   )rS   rR   r  r\   r]   r_   r_   r`   4test_stratified_shuffle_split_overlap_train_test_bugP  s    
r  c                  C   s  t ddgddgddgddggt ddgddgddgddggfD ]} t | }tdddd}t|j|| d\}}| | }| | }tt ||g  tt ||t 	t
|  t | d d df }|t |d d df kst|t |d d df ksDtqDd S )Nr   r.   rf   r  r  )r>   r?   r   r   r   rG   r   r   r  r   r   r   rC   )rS   rR   r  r\   r]   y_trainy_testexpected_ratior_   r_   r`   (test_stratified_shuffle_split_multilabela  s      
r  c            
      C   s   dddgdgd  dddg } dddgdgd  dddg }t | gd |gd  }t |}tdddd}t|j||d\}}|| }|| }t |d d d	f }	|	t |d d d	f kst|	t |d d d	f kstd S )
Nr.   r   r   r+   r   rf   r  r  r0   )r>   r?   r   r   r   rG   r   rC   )
Zrow_with_many_zerosZrow_with_many_onesrS   rR   r  r\   r]   r  r  r  r_   r_   r`   4test_stratified_shuffle_split_multilabel_many_labelsz  s    
r  c            	      C   s   t dd} g }g }ttdddtD ](\}\}}|| || || |< q*t| }tt 	| |
 ksvtt|  \}}t|| t|| d S )Nr+   g      r-   Tr   )r>   fullr@   r   rG   rR   r   r   r   r  rB   rC   rA   r   )	foldsZkf_trainZkf_testr;   Z	train_indZtest_indrY   Zps_trainZps_testr_   r_   r`   %test_predefinedsplit_with_kfold_split  s    "



r!  c                  C   sT  t D ]H} tt|  }}d}d}t||dd}t| |j||| d|ksRtt| }t	| }|j
||| dD ]\}}	t|| }
t||	 }tt|| |rttt||	 |
rt|| j||	 j |jksttt||	g  tt|t|t|  dks$ttt|
td| t|  dksvtqvqd S )Nr4   gUUUUUU?r   r   r   r.   r  )test_groupsr>   r   r   r   rK   rB   rC   r  rH   rG   anyZin1drc   r   r   absround)groups_irR   rS   rP   rh   ZsloZl_uniquelr\   r]   Zl_train_uniqueZl_test_uniquer_   r_   r`   test_group_shuffle_split  s&    


&"r(  c               	   C   s  t  } tdd}tdd}t| dks*tt|dks:tt|dksJtttdddks`tt| df|df|dffD ]\}\}}ttD ]\}}tt|}|dkr|n||d  d }	t	t| }
}|j
|
||d	|	kstt|}|j|
||d	D ]d\}}tt|| ||  g  t|t| t|ksHtt|| jd
 st|qqqz| 
d d dddddgdkst| j
ddddgd	dkst|
d d tddkst|j
tdd	dksttt | 
d d dtjdg W 5 Q R X tt |
d d dtjdg W 5 Q R X d}tjt|d | 
d d d  W 5 Q R X tjt|d |
d d d  W 5 Q R X d S )Nr.   ri   r,   r9   zLeavePGroupsOut(n_groups=1)r:   r/   zLeavePGroupsOut(n_groups=3)r   r   abcr  皙?333333?r0   r4           z*The 'groups' parameter should not be None.r<   )r   r   rK   rC   r@   r"  r   r>   r  r   rB   rH   rG   r   r   tolistshaper   rL   rM   rN   naninf)ZlogoZlpgo_1Zlpgo_2jr[   Zp_groups_outr;   r&  rj   rP   rR   rS   Z
groups_arrr\   r]   r^   r_   r_   r`   test_leave_one_p_group_out  sD    

&
 &"  r4  c               
   C   s  t ddddddddg} t t| }t j| dd}t j|| d}t j|| d}tddj|| d}tddj|| d}d|d d < ||f||ffD ]8\}}t||D ]$\\}	}
\}}t|	| t|
| qqdtddj	||| d	kst
dt j	||| d	kst
d S )
Nr   r.   r,   T)copyr   ri   r/   )rS   rT   )r>   r?   r   r   r   rG   r   rA   r   rB   rC   )rT   rR   Zgroups_changingrW   Zlolo_changingZlploZlplo_changingZlloZllo_changingr\   r]   Z
train_chanZ	test_chanr_   r_   r`   $test_leave_group_out_changing_groups  s    
r6  c                  C   s   t ddddddg} t t| }tt j|| d}ddddgddgfddddgddgfddddgddgfg}|D ](\}}t|\}}t|| t|| qvd S )Nr,   r   r.   r   r0   r-   r/   )	r>   r?   r   r   iterr   rG   r   r   )rT   rR   r   Zexpected_indicesZexpected_trainZexpected_testr\   r]   r_   r_   r`   %test_leave_group_out_order_dependence  s    
r8  c               	   C   sL  t d }  }}td}tjt|d tt 	| || W 5 Q R X t d }  }}td| d}tjt|d tt 	| || W 5 Q R X t d }  }}td| d}tjt|d tt
d	d
	| || W 5 Q R X t d	 }  }}td| d}tjt|d tt
d	d
	| || W 5 Q R X d S )Nr   zFound array with 0 sample(s)r<   r.   z:The groups parameter contains fewer than 2 unique groups (z'). LeaveOneGroupOut expects at least 2.z^The groups parameter contains fewer than (or equal to) n_groups (3) numbers of unique groups (zR). LeavePGroupsOut expects that at least n_groups + 1 (4) unique groups be presentr/   ri   )r>   r   reescaperL   rM   rN   r   r   rG   r   r   )rR   rS   rT   r^   r_   r_   r`   :test_leave_one_p_group_out_error_on_fewer_number_of_groups(  s,    


"
r;  c               
   C   sR   t tfD ]D} tt | dd W 5 Q R X tt | dd W 5 Q R X qd S )Nr   )	n_repeatsr   )r    r!   rL   rM   rN   r   r_   r_   r`   test_repeated_cv_value_errorsK  s
    r=  
RepeatedCVc                 C   s6   d\}}| ||d}d |jj}|t|ks2td S )N)r,   r4   rP   r<  z.{}(n_repeats=6, n_splits=2, random_state=None))rq   	__class____name__rK   rC   )r>  rP   r<  Zrepeated_cvZrepeated_cv_reprr_   r_   r`   test_repeated_cv_reprU  s    rB  c               
   C   s  ddgddgddgddgd	d
gg} d}t dd|d}tdD ]}|| }t|\}}t|ddg t|dddg t|\}}t|dddg t|ddg t|\}}t|ddg t|dddg t|\}}t|dddg t|ddg tt t| W 5 Q R X q<d S )Nr.   r,   r/   r0   r-   r4   r5   r6   r   r+   i{icrP   r<  r8   r   )r    r{   rG   r   r   rL   rM   StopIteration)rR   r8   rkfr   r   r\   r]   r_   r_   r`   &test_repeated_kfold_determinstic_split_  s&    "
rF  c                  C   s0   d} d}t | |d}| | }|| ks,td S Nr/   r0   r?  )r    rB   rC   )rP   r<  rE  r~   r_   r_   r`   $test_get_n_splits_for_repeated_kfold|  s
    rH  c                  C   s0   d} d}t | |d}| | }|| ks,td S rG  )r!   rB   rC   )rP   r<  rskfr~   r_   r_   r`   /test_get_n_splits_for_repeated_stratified_kfold  s
    rJ  c               
   C   s&  ddgddgddgddgd	d
gg} dddddg}d}t dd|d}tdD ]}|| |}t|\}}t|ddg t|dddg t|\}}t|dddg t|ddg t|\}}t|ddg t|dddg t|\}}t|dddg t|ddg tt t| W 5 Q R X qJd S )Nr.   r,   r/   r0   r-   r4   r5   r6   r   r+   r   iqsrC  )r!   r{   rG   r   r   rL   rM   rD  )rR   rS   r8   rI  r   r   r\   r]   r_   r_   r`   1test_repeated_stratified_kfold_determinstic_split  s(    "rK  c                	   C   s  t tt t jtttddd t jtttdddd t jtttdtdtdd t jtttddd t jtttddd	d t jtttddd
 t tttdtd t jtttdddd t jtdd ttdddd W 5 Q R X d S )Nr/   r,  r   r   r  Z
wrong_typerg   r,   r0   )Zsome_argument*   r+   FT)r   stratifyzrtrain_size=11 should be either positive and smaller than the number of samples 10 or a float in the \(0, 1\) ranger<   r   r.   r   rh   )rL   rM   rN   r   r{   r>   Zfloat32r   r_   r_   r_   r`   test_train_test_split_errors  s(    rO  ztrain_size,test_size)r-  r   )r  r   )r.  r   )皙ɿr   )r   r-  )r   r  )r   r.  )r   rP  c              	   C   s0   t jtdd ttd| |d W 5 Q R X d S )Nz"should be .* in the \(0, 1\) ranger<   r+   rN  rL   rM   rN   r   r{   rN  r_   r_   r`   $test_train_test_split_invalid_sizes1  s    rR  )r   )r   r   )r   r   )r   rS  )r   r   )r   r   c              	   C   s0   t jtdd ttd| |d W 5 Q R X d S )Nz%should be either positive and smallerr<   r+   rN  rQ  rN  r_   r_   r`   $test_train_test_split_invalid_sizes2  s    rT  )Nr5   r/   c                 C   s4   t t| d\}}t||ks tt||ks0td S )Nr   )r   rR   r   rC   )r   r   r   r   r   r_   r_   r`   'test_train_test_split_default_test_size  s    rU  c                  C   sD  t dd} t| }t d}t| |d dd}|\}}}}t|t|ksRtt|d d df |d  t|d d df |d  t| || }|\}}}}	}}t	|t
stt	|t
stt dddd	d
}
t dddd}t|
|}|d jdks
t|d jdkst|d
 jdks2t|d	 jdksFtt ddddd
d
d
d
g}td
ddddgd
dd
ddgD ]p\}}t|||dd\}}t||kstt|t| t|kstt |dkt |d
kks~tq~t d}dD ]@}t|d|d\}}t|ddg t|ddd
d	ddddg qd S )Nr   r+   r+   r+   rf   r  r   r   r-   r/   r,   i  r5   r   )r5   r-   r/   r,   r.   )r/   r-   r/   r,   )r5   r5   r   )r/   r5   r   r0   r   r   r4   )rh   rM  r8   )r,   r   F)r   rh   r6   r   )r>   r   rp   r   r   r   rC   r   r/  
isinstancerF   r0  r?   rA   r   )rR   X_srS   rG   r   r   r  r  Z	X_s_trainZX_s_testZX_4dZy_3drh   Zexp_test_sizer\   r]   r_   r_   r`   test_train_test_split  sF    

&   
&
rY  c                  C   sb   d} t | }|d|  k}t|||dd}|\}}}}|j|j | ksJt|j|j | ks^tdS )zCheck for integer overflow on 32-bit platforms.

    Non-regression test for:
    https://github.com/scikit-learn/scikit-learn/issues/20774
    i Gz?r   )rM  r   N)r>   r   r   rc   rC   )Z
big_numberrR   rS   rG   r   r   r  r  r_   r_   r`   $test_train_test_split_32bit_overflow  s    	
r[  c                  C   st   t g} zddlm} | | W n tk
r4   Y nX | D ]4}|t}t|\}}t||s`tt||s:tq:d S )Nr   )	DataFrame)	r   Zpandasr\  r   ImportErrorrR   r   rW  rC   )typesr\  InputFeatureTypeX_dfr   r   r_   r_   r`   test_train_test_split_pandas3  s    ra  c                  C   sX   t dd} tttg}|D ]4}|| }t|\}}t|tsDtt|tstqd S )Nr   rV  )	r>   r   rp   r   r   r   r   rW  rC   )rR   Zsparse_typesr_  rX  r   r   r_   r_   r`   test_train_test_split_sparseE  s    
rb  c                  C   s@   t t} t| \}}t|t s"tt|t s0tt| \}}d S rw   )r   rR   r   rW  rC   )r`  r   r   ZX_train_arrZ
X_test_arrr_   r_   r`   !test_train_test_split_mock_pandasQ  s
    rc  c                  C   s   t d} dgd dgd  }t t dt df}| }dD ]}t| ||rV|nd dd\}}}}t| ||rv|nd dd\}	}
}}t| ||r|nd dd\}}}}t j||	 t j|| t j|| t j|| qDd S )	Nr5   r1   r0   r   r/   r   r   )rM  r8   )r>   r   hstackr   r/  r   rD   rE   )rR   r   r   y3rM  ZX_train1ZX_test1Zy_train1Zy_test1ZX_train2ZX_test2Zy_train2Zy_test2ZX_train3ZX_test3Zy_train3Zy_test3r_   r_   r`    test_train_test_split_list_inputZ  s6    
  
   
   
 rf  ztest_size, train_size)r   N)r  N)r   gffffff?)Ny              ?)r   N)r+   N)r6   r/   c              	   C   s0   t t tt| |dt W 5 Q R X d S )Nr  )rL   rM   rN   r   r   rG   rR   r  r_   r_   r`   test_shufflesplit_errorsr  s    rg  c                  C   s8   t dd} tdd | tD dd | tD  d S )N   r7   c                 S   s   g | ]\}}|qS r_   r_   )r   r)  r*  r_   r_   r`   r     s     z2test_shufflesplit_reproducible.<locals>.<listcomp>)r   r   rG   rR   )rX   r_   r_   r`   test_shufflesplit_reproducible{  s    
ri  c                  C   s   t ddd} td}dgd dgd  }ttdtdf}| }tjt| 	||t| 	|| tjt| 	||t| 	|| d S )	Nr,   rL  r   r5   r1   r0   r   r/   )
r   r>   r   rd  r   r/  rD   rE   rF   rG   )r  rR   r   r   re  r_   r_   r`   &test_stratifiedshufflesplit_list_input  s    
&rj  c                  C   sX   t jdt jddd} t j| dd d f< t ddg| jd d }t| |dd	d
 d S )N   rI   r+   rd   r,   r   r.   r   rL  r   )r>   r   Zfloat64rp   r1  repeatr0  r   r  r_   r_   r`    test_train_test_split_allow_nans  s    rn  c                  C   s^  t d} tddd}t jttd| t||  t dddddddddg	}td|dd}t jtt	d| |t|| | t dddddddddg	}td|dd}t jtt	d| |t|| | |
d	d}td|dd}t jtt	d| |t|| | t tt	d| |d ttd| |d krdtt d
} t ddddgddddgddddgddddgddddgg}td|dd}t jttd| t||  t ddgddgddgddgddgg}td|dd}t jttd| t||  tt tdd W 5 Q R X d S )Nr   r/   F)
classifierr   r.   Tr,   rd   r-   rW   r   )r>   r   r   rD   rE   rF   r   rG   r?   r   rp   allr   rC   rL   rM   rN   )rR   r[   Zy_binaryZy_multiclassZy_multiclass_2drt   Zy_multioutputr_   r_   r`   test_check_cv  sJ    
&  

4&(&rq  c                  C   s   t  tt} t| }tjt|ttt|tt t dddtt}t|}tjt|ttt|tt z.d}tjt|ttt|tt W n t	k
r   d}Y nX |rt	dd S )NTr   r   FzVIf the splits are randomized, successive calls to split should yield different results)
r   rG   rR   rS   r   r>   rD   rE   rF   rC   )Zkf_iterZkf_iter_wrappedZkf_randomized_iterZkf_randomized_iter_wrappedZsplits_are_equalr_   r_   r`   test_cv_iterable_wrapper  s.     
rr  c              (   C   s  t jd}d}d}d}t | }}d| }|d||}|| }	tt | t |}
| |d}t|	|||D ]\}\}}||
|< qxt|
t|kst
t |
D ] }|tt|
|k|	 kst
qt |D ]"}tt |
||k dkst
qt j|td}|	|||D ],\}}tt || || dkst
qt d	d
ddd
dddddddddd
ddddddddddddddd d!ddd"d#d$d%d&g&}tt |}t|}d}d| }|| }	t | }}t |}
t|	|||D ]\}\}}||
|< qt|
t|kst
t |
D ]$}|tt|
|k|	 ks(t
q(t D td't t |D ]&}tt |
||k dksnt
qnW 5 Q R X t j|td}|	|||D ],\}}tt || || dkst
qt|	||| }t|	||||D ]&\\}}\}}t|| t|| qt dddd(d(g}t t| }}tjtd)d* ttd+d	||| W 5 Q R X d S ),Nr   r   r   r-   r  rk   r.   rl  ZAlbertZJeanZBertrandZMichelZFrancisZRobertZRachelZLoisZMichelleZBernardZMarionZLauraZFranckZJohnZGaelZAnnaZAlixZDavidZTonyZAbelZBeckyZMadmoodZCaryZMaryZ	AlexandreZBarackZAbdoulZRashaXiZSilviar   r,   z%Cannot have number of splits.*greaterr<   r/   )r>   rm   rn   r   ro   r   r  r   r@   rG   rC   r$  r   rH   objectr   r?   r   r   r   FutureWarningrF   r/  rA   r   rL   rM   rN   r   r   )r   rs   rj   rO   rP   rR   rS   Z	tolerancerT   Zideal_n_groups_per_foldr   Zlkfr;   r   r]   groupr\   Zcv_iterZtrain1r   Ztrain2Ztest2r_   r_   r`   test_group_kfold  s    


 &+
"
.&$
rw  c               	   C   sn  ddgddgddgddgd	d
gddgddgg} t jtdd ttdd|  W 5 Q R X td}|| d d }t|\}}t|ddg t|ddg t|\}}t|ddddg t|ddg td| }t|\}}t|dddg t|ddg t|\}}t|dddddg t|ddg td| }tt|}||	 ks\t
|dksjt
d S )Nr.   r,   r/   r0   r-   r4   r5   r6   r   r+   r   r      r   z$Cannot have number of folds.*greaterr<   rk   rd   r   )rL   rM   rN   r   r   rG   r   r   rF   rB   rC   )rR   Ztscvr   r\   r]   r  r_   r_   r`   test_time_series_cva  s,    .ry  c                 C   s^   t | |D ]N\\}}\}}t|| t||ks4ttt|| d}t|||d   q
d S )Nr   )rA   r   r   rC   r   )r   check_splitsmax_train_sizer\   r]   Zcheck_trainZ
check_testZsuffix_startr_   r_   r`   !_check_time_series_max_train_size  s
    
r|  c                  C   s~   t d} tdd| }tddd| }t||dd tddd| }t||dd tddd| }t||dd d S )N)r4   r.   r/   rk   )rP   r{  )r{  r,   r-   )r>   r   r   rG   r|  )rR   r   rz  r_   r_   r`   test_time_series_max_train_size  s    
r}  c               	   C   sJ  t d} tddd| }t|\}}t|dg t|dddg t|\}}t|ddddg t|ddd	g t|\}}t|ddddddd	g t|d
ddg tdddd| }t|\}}t|ddddg t|d	d
g t|\}}t|ddd	d
g t|ddg tjtdd  tddd| }t| W 5 Q R X d S )Nr+   r.   r/   )rP   rh   r   r.   r,   r0   r-   r4   r5   r6   r   )rP   rh   r{  zToo many splits.*with test_sizer<   	r>   r   r   rG   r   r   rL   rM   rN   rR   r   r\   r]   r_   r_   r`   test_time_series_test_size  s*    
r  c               	   C   s  t d} tddd| }t|\}}t|ddg t|dddg t|\}}t|dddd	dg t|d
ddg td	ddd| }t|\}}t|ddg t|ddg t|\}}t|dd	g t|dd
g t|\}}t|ddg t|ddg tddddd| }t|\}}t|dddd	g t|dd
g t|\}}t|dd	ddg t|ddg tddd	d| }t|\}}t|ddg t|dddg t|\}}t|dddd	dg t|d
ddg tjtdd  tddd| }t| W 5 Q R X d S )Nr~  r,   )rP   gapr   r.   r0   r-   r4   r/   r5   r6   r   )rP   r  r{  )rP   r  r{  rh   )rP   r  rh   zToo many splits.*and gapr<   r  r  r_   r_   r`   test_time_series_gap  sF    
r  c               	   C   s   t jd} tdddd\}}| ddd}t tddtddg}t|dD ]:\}}t	t
 dd	d
gi|dd}t|||||d|id qNd S )Nr   r   r,   )rO   Z	n_classesr8   r-   rk   r/   ZstrategyZ
stratifiedZmost_frequentraise)Z
param_gridr[   Zerror_scorerT   )rR   rS   rT   r[   Z
fit_params)r>   rm   rn   r)   ro   r   r   r   r   r   r#   r   )rs   rR   rS   rT   ZcvsZinner_cvZouter_cvZgsr_   r_   r`   test_nested_cv  s,    
     r  c                  C   s(   G dd d} t | dddks$td S )Nc                   @   s   e Zd ZdddZdd ZdS )z%test_build_repr.<locals>.MockSplitterr   Nc                 S   s   || _ || _|| _d S rw   )r)  r*  r+  )selfr)  r*  r+  r_   r_   r`   __init__  s    z.test_build_repr.<locals>.MockSplitter.__init__c                 S   s   t | S rw   r%   )r  r_   r_   r`   __repr__  s    z.test_build_repr.<locals>.MockSplitter.__repr__)r   N)rA  
__module____qualname__r  r  r_   r_   r_   r`   MockSplitter  s   
r  r-   r4   zMockSplitter(a=5, b=6, c=None))rK   rC   )r  r_   r_   r`   test_build_repr  s    	r  
CVSplitterc              	   C   sN   | dd}dggdg }}t jtdd t|j||dgd W 5 Q R X d S )NrZ  rg   r.   r   [With n_samples=1, test_size=0.99 and train_size=None, the resulting train set will be emptyr<   r   )rL   rM   rN   r   rG   )r  r[   rR   rS   r_   r_   r`   !test_shuffle_split_empty_trainset  s    
r  c               	   C   sj   dgg\} t jtdd t| dd W 5 Q R X dgdgdgg} t jtdd t| dd W 5 Q R X d S )Nr.   r  r<   rZ  rg   z[With n_samples=3, test_size=0.67 and train_size=None, the resulting train set will be emptygq=
ףp?)rL   rM   rN   r   r   r_   r_   r`   $test_train_test_split_empty_trainset/  s    
r  c               	   C   sD   t  } dggdg }}tjtdd t| || W 5 Q R X d S )Nr.   r   z+Cannot perform LeaveOneOut with n_samples=1r<   )r   rL   rM   rN   r   rG   r[   rR   rS   r_   r_   r`   !test_leave_one_out_empty_trainsetE  s    r  c               	   C   sV   t dd} dgdggddg }}tjtdd t| j||ddgd W 5 Q R X d S )	Nr,   re   r.   r   r/   z6p=2 must be strictly less than the number of samples=2r<   r   )r   rL   rM   rN   r   rG   r  r_   r_   r`   test_leave_p_out_empty_trainsetM  s    
 r  Klassc              	   C   s,   t jtdd | dddd W 5 Q R X d S )Nz$has no effect since shuffle is Falser<   r/   Fr   r   )rL   rM   rN   )r  r_   r_   r`   test_random_state_shuffle_falseW  s    r  zcv, expected{   r   r7   ri   re   c                 C   s   t | |kstd S rw   )r'   rC   )r[   r   r_   r_   r`   test_yields_constant_splits^  s    %r  )N)__doc__r   rL   r9  Znumpyr>   Zscipy.sparser   r   r   Zscipyr   Zscipy.specialr   	itertoolsr   r   r	   Zsklearn.utils._testingr
   r   r   r   Zsklearn.utils.validationr   Zsklearn.utils._mockingr   Zsklearn.model_selectionr   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r    r!   r"   Zsklearn.dummyr#   Zsklearn.model_selection._splitr$   r&   r'   Zsklearn.datasetsr(   r)   Zsklearn.svmr*   r   rR   r   rS   ZeyeZP_sparser?   r"  r   ra   rv   r|   r   r   r   r   r   markZparametrizer   r   r   r   r   r   r   r   r   r   rH   r   r   r   r   r   r  r  r  r  r  r  r  r!  r(  r4  r6  r8  r;  r=  rB  rF  rH  rJ  rK  rO  rR  rT  rU  rY  r[  ra  rb  rc  rf  rg  ri  rj  rn  rq  rr  rw  ry  r|  r}  r  r  r  r  r  r  r  r  r  rm   rn   r  r_   r_   r_   r`   <module>   s  
  &(
U'I+


,  

  

%4%7#
	
	

 
	/
	
.!
{%#9 


$