U
    3di                     @   s  d dl m Z  d dlmZ d dlZd dlZd dlZd dlZd dlZd dl	m
Z d dlmZmZmZmZ d dlmZ d dlmZ d dlmZ d dlmZ d d	lmZ d d
lmZ d dlmZ d dlmZ d dlmZ d dlmZ d dlmZ d dlmZ d dlmZmZ d dlm Z  d dlm!Z! d dlm"Z" d dlm#Z# d dl$m%Z% d dl&m'Z' e(d)dZ*dd Z+dd Z,dd Z-d d! Z.d"d# Z/d$d% Z0d&d' Z1d(d) Z2d*d+ Z3d,d- Z4ej56d.d/d0d1e7d2d3fd d4d5gd6fd7d8d9gd:fd;d<e8dddfe8d d5d6fej9d d4d5gej:d=d6fej9d d4d5gej;d=d6fej9d d4d5gej<d=d6fd2d>gd3fd?e9d2d>gd3fd@dAdBdCgd:fdDe8dEdFd:fe9dAdBdCgd:fej9dAdBdCge=d=d:fgdGdH Z>dIdJ Z?dKdL Z@ej56dMdNdOdPdQgej56dRdNdSdOdTdUgdVdW ZAej56dMdNdOdTgej56dRdNdSdOdTdUgdXdY ZBej56dMdOdPdQgej56dRdNdSdOdTdUgej56dZd4d5gdBdCggd[d\ ZCej56d]d2d>gej56d^d2d>gej56dMdOdPdQgej56dRdOdTgej56d_d d`dadbgdcdddggfd4d5degdadbgdddggfgdfdg ZDej56dMdNdOdTgej56dRdNdSdOdTgdhdi ZEej56dMdOdPdQgej56dRdNdSdOdTgej56djd d`dadbgdcdddggfd4d5degdadbgdddggfgdkdl ZFej56dmdndodpdqgdrds ZGej56dMdNdOdTgdtdu ZHej56dmdodpdqgej56dZd5dCgdvdw ZIej56dMdNdOdPgdxdy ZJdzd{ ZKej56d|ddegd}d~ ZLej56ddOdTgdd ZMdd ZNdd ZOej56dddgdd ZPej56ddgdgddgddgddggdd ZQdd ZRdd ZSdd ZTej56ddddddddgdd ZUdd ZVej56dddgdejWd>fdejWd>fdejWde d2fdejWd2fdejWd d>fgej56ddgddddgdd ZXej56ddgdedddd fdedddd fdgdd ZYej56deZdd2fej[d2feZej[d2fe\ej[d2fe]ej[d2fdddddej[gd>fdgddÄ Z^ddń Z_ddǄ Z`ddɄ Zaej56de9d4e9d5gd4d5gded`gggdd̄ Zbej56dMdOdPdQgdd΄ ZcdS )    )copy)chainN)assert_array_equalassert_allclose_dense_sparseassert_no_warnings_convert_container)check_random_state)_determine_key_type)
deprecated)gen_batches)_get_column_indices)resample)	safe_mask)column_or_1d)_safe_indexing)_safe_assign)shuffle)gen_even_slices)_message_with_time_print_elapsed_time)get_chunk_n_rows)is_scalar_nan)_to_object_array)_approximate_mode)MockDataFrame)config_context	   )   r   c               	   C   s   t d tjjjkstt tjtjjjks.ttjd} t dd| dksVttjd} t | | ksrttjd} t dd| dkstt	t
 t d W 5 Q R X d S )N*   d   +   zsome invalid seed)r   nprandomZmtrandZ_randAssertionErrorRandomStaterandintpytestraises
ValueError)Zrng_42 r)   B/tmp/pip-unpacked-wheel-zrfo1fqw/sklearn/utils/tests/test_utils.pytest_make_rng)   s    r+   c               	   C   s   t ttddtddd tddd g d} tjt| d ttdd W 5 Q R X d}tjt|d ttdd W 5 Q R X d S )N      r   z.gen_batches got batch_size=0, must be positivematchz2gen_batches got batch_size=0.5, must be an integerg      ?)	r   listr   slicer&   r'   r(   next	TypeError)Zmsg_zeroZ	msg_floatr)   r)   r*   test_gen_batches;   s    (r4   c               	   C   s  t jddn} t d t dd }| }|dks8tt| dksHtt| d jts\td	t	| d j
 ksvtW 5 Q R X t jddz} t d td
G dd d}| }t|dstt| dkstt| d jtstd	t	| d j
 kstW 5 Q R X d S )NT)recordalwaysc                   S   s   dS )Nspamr)   r)   r)   r)   r*   hamP   s    ztest_deprecated.<locals>.hamr7      r   r
   zdon't use thisc                   @   s   e Zd ZdZdS )ztest_deprecated.<locals>.Hamr9   N)__name__
__module____qualname__SPAMr)   r)   r)   r*   Ham`   s   r>   r=   )warningscatch_warningssimplefilterr
   r#   len
issubclasscategoryFutureWarningstrmessagelowerhasattr)wr8   r7   r>   r)   r)   r*   test_deprecatedH   s$    

$
rK   c                	   C   s   t  d ksttt t dgddg W 5 Q R X tt t ddgddgddd W 5 Q R X tt ddgdddks~td S )	Nr   r9   Fr   )replace	n_samplesr-      )rM   )r   r#   r&   r'   r(   rB   r)   r)   r)   r*   test_resamplem   s    "rO   c                  C   s   t jd} d}d}| j|dfd}| jd||d}t||ddd d\}}t |dks\tt||dd|d\}}t |dkrtt |dkstd S )	Nr   r   g?r9   size
   rM   random_statestratifyr   )	r!   r"   r$   normalZbinomialr   allr#   sum)rngrM   pXy_Zy_not_stratifiedZy_stratifiedr)   r)   r*   test_resample_stratified{   s    r^   c                  C   s   t jd} d}| j|dfd}| jdd|d}t||dd| |d\}}t||d	d| |d\}}t |jd dk sxtt |jd dkstt||dd
| |d\}}|jd d
kstt |jd dkstd S )Nr   r   r9   rP   r-   T2   )rL   rM   rT   rU   F  )	r!   r"   r$   rV   r%   r   uniqueshaper#   )rY   rM   r[   r\   Z	X_replacer]   ZX_no_replacer)   r)   r*    test_resample_stratified_replace   s@         
     
     
rc   c                  C   s\   t jd} d}| j|dfd}| jdd|dfd}t||d| |d\}}|jdksXtd S )Nr   r   r9   rP   r-   r_   rS   )r!   r"   r$   rV   r%   r   ndimr#   )rY   rM   r[   r\   r)   r)   r*   test_resample_stratify_2dy   s    re   c               	   C   sn   t jd} d}| j|dfd}| jdd|d}t|}tjt	dd t
||d| |d\}}W 5 Q R X d S )	Nr   r   r-   rP   zA sparse matrix was passedr.   r_   rS   )r!   r"   r$   rV   r%   sp
csr_matrixr&   r'   r3   r   )rY   rM   r[   r\   rU   r)   r)   r*   #test_resample_stratify_sparse_error   s    
rh   c                  C   sp   t d} | dd}t|}dddddg}t||}|| jd dksLtt||}|| jd dksltd S )Nr   rN   r,   FTr   )r   Zrandrf   rg   r   rb   r#   )rT   r[   ZX_csrmaskr)   r)   r*   test_safe_mask   s    


rj   c                  C   s  ddddgfdddddgfdt dd fd	dd
dgfd	ddd
d
dgfd	dgd
gdggfddddgdddggfddd
dggfdddgd
d
gddggfdddgdd
gddggfddd
dggfdt ddfg} | D ]D\}}|dkrtt|t | qtt t| W 5 Q R X qd S )Nbinaryr7   Zeggr   r9   
continuousrR   g      4@
multiclassr-   r   zmultilabel-indicatorzmulticlass-multioutputrN   r,   zcontinuous-multioutput   )r   )rk   rm   rl   )	r!   arangereshaper   r   Zravelr&   r'   r(   )ZEXAMPLESZy_typer\   r)   r)   r*   test_column_or_1d   s$    rr   z
key, dtype)r   int)0rF   )TboolTru   r9   r-   rs   rt   12rF   ))r   r9   r-   rs   ))rt   rv   rw   rF   dtypeF))TFru   )col_0rF   rz   col_1col_2))rz   r{   r|   rF   beginendc                 C   s   t | |kstd S N)r	   r#   )keyry   r)   r)   r*   test_determine_key_type   s    r   c                	   C   s&   t jtdd td W 5 Q R X d S )NzNo valid specification of ther.         ?)r&   r'   r(   r	   r)   r)   r)   r*   test_determine_key_type_error   s    r   c                	   C   s2   t jtdd ttddddd W 5 Q R X d S )NzOnly array-like or scalar arer.   r   r-   r9   F)Zaccept_slice)r&   r'   r3   r	   r1   r)   r)   r)   r*   #test_determine_key_type_slice_error  s    r   
array_typer0   arraysparse	dataframeindices_typetupleseriesr1   c                 C   s   ddg}|dkr.t |d tr.|d  d7  < tdddgdddgdd	d
gg| }t||}t||dd}t|tdddgdd	d
gg|  d S Nr9   r-   r1   r   r,   rN            r   r   axis
isinstancers   r   r   r   r   r   indicesr   subsetr)   r)   r*   &test_safe_indexing_2d_container_axis_0  s    "
 r   c              
   C   sz   ddg}|dkr.t |d tr.|d  d7  < tdddddddd	d
g	| }t||}t||dd}t|tddg|  d S r   r   r   r)   r)   r*   test_safe_indexing_1d_container  s    
r   r   c              	   C   s   t |}|dkr.t|d tr.|d  d7  < dddg}tdddgdd	d
gdddgg| |}t||}t|d tr| dkrd}tjt|d t||dd W 5 Q R X n0t||dd}t	|tddgd	d
gddgg|  d S )Nr1   r9   rz   r{   r|   r-   r   r,   rN   r   r   r   r   r   r   LSpecifying the columns using strings is only supported for pandas DataFramesr.   r   )
r   r   rs   r   rF   r&   r'   r(   r   r   )r   r   r   Zindices_convertedcolumns_namer   err_msgr   r)   r)   r*   &test_safe_indexing_2d_container_axis_1!  s(    
  
 r   array_read_onlyindices_read_onlyzaxis, expected_arrayr,   rN   r   r   r   r   c           	      C   s   t dddgdddgddd	gg}| r2|jd
d t||}t ddg}|rZ|jd
d t||}t|||d}t|t|| d S )Nr9   r-   r   r,   rN   r   r   r   r   F)writer   )r!   r   Zsetflagsr   r   r   )	r   r   r   r   r   expected_arrayr   r   r   r)   r)   r*   &test_safe_indexing_2d_read_only_axis_1?  s    
"

r   c              
   C   sf   dgdgd  dgd  }t ddddddd	d
dg	| }t ||}t||dd}t|t ddg|  d S )NFTr-   r   r9   r   r,   rN   r   r   r   r   r   r   r   r   r   r)   r)   r*   $test_safe_indexing_1d_container_maskU  s
    
r   zaxis, expected_subsetc                 C   sd   dddg}t dddgddd	gd
ddgg| |}dddg}t ||}t|||d}t|t ||  d S )Nrz   r{   r|   r9   r-   r   r,   rN   r   r   r   r   FTr   r   )r   r   r   Zexpected_subsetr   r   r   r   r)   r)   r*   test_safe_indexing_2d_mask_  s    
  

 r   z array_type, expected_output_type)r0   r0   )r   r   )r   r   )r   r   c                 C   sR   t dddgdddgddd	gg| }d}t||d
d}t ddd	g|}t|| d S Nr9   r-   r   r,   rN   r   r   r   r   r   r   r   )r   expected_output_typer   r   r   r   r)   r)   r*   #test_safe_indexing_2d_scalar_axis_0s  s
    
"r   c              
   C   s>   t ddddddddd	g	| }d}t||d
d}|dks:td S r   )r   r   r#   )r   r   r   r   r)   r)   r*   test_safe_indexing_1d_scalar  s    r   c           	   	   C   s   dddg}t dddgddd	gd
ddgg| |}t|trn| dkrnd}tjt|d t||dd W 5 Q R X nDt||dd}dd	dg}|dkrdgd	gdgg}t ||}t|| d S )Nrz   r{   r|   r9   r-   r   r,   rN   r   r   r   r   r   r   r.   r   r   )r   r   rF   r&   r'   r(   r   r   )	r   r   r   r   r   r   r   Zexpected_outputr   r)   r)   r*   #test_safe_indexing_2d_scalar_axis_1  s"    
  

r   c                 C   s>   t dddgdddgddd	gg| }t|d d
d}t|| d S r   r   )r   r[   ZX_subsetr)   r)   r*   test_safe_indexing_None_axis_0  s    "r   c               	   C   sF   t d} d}| t}t jt|d t|dgdd W 5 Q R X d S )Npandasz&No valid specification of the columns.r.   r   r9   r   )r&   importorskip	DataFrameX_toyr'   r(   r   )pdr   r[   r)   r)   r*   0test_safe_indexing_pandas_no_matching_cols_error  s
    

r   r   c              	   C   s0   t jtdd ttddg| d W 5 Q R X d S )Nz'axis' should be either 0r.   r   r9   r   )r&   r'   r(   r   r   r   r)   r)   r*   test_safe_indexing_error_axis  s    r   X_constructorc              	   C   sp   t td}| dkr t|} n| dkr<td}||} d}tjt|d t	| ddgdd	 W 5 Q R X d S )
NrN   r   r   r   z:'X' should be a 2D NumPy array, 2D sparse matrix or pandasr.   r   r9   r   )
r0   ranger!   Zasarrayr&   r   ZSeriesr'   r(   r   )r   r[   r   r   r)   r)   r*   !test_safe_indexing_1d_array_error  s    

r   c               	   C   sT   ddg} dddgdddgd	d
dgg}d}t jt|d t|| dd W 5 Q R X d S )Nr{   r|   r9   r-   r   r,   rN   r   r   r   r   z.String indexing is not supported with 'axis=0'r.   r   r   )r&   r'   r(   r   )r   r   r   r)   r)   r*   4test_safe_indexing_container_axis_0_unsupported_type  s
    r   c               	   C   s   t d} | dddgdddgd}t|ddgdd	}t| jd
rN| jj}n
| jjj}t	
  t	d| d|jd< W 5 Q R X |jd dkstd S )Nr   r9   r-   r   r,   rN   )abr   r   SettingWithCopyWarningerrorrR   )r   r   )r&   r   r   r   rI   errorsr   corecommonr?   r@   rA   Zilocr#   )r   r[   r   r   r)   r)   r*   4test_safe_indexing_pandas_no_settingwithcopy_warning  s    



r   zkey, err_msg)rR   z all features must be in \[0, 2\])Zwhateverz/A given column is not a column of the dataframec              	   C   sF   t d}|jtdddgd}t jt|d t||  W 5 Q R X d S )Nr   rz   r{   r|   columnsr.   )r&   r   r   r   r'   r(   r   )r   r   r   ZX_dfr)   r)   r*   test_get_column_indices_error  s    
r   r   col1col2col3c              	   C   st   t d}tjdtd}dddddg}|j||d}d| }t t}t	||  W 5 Q R X t
|j|ksptd S )	Nr   )r9   rN   rx   r   r   r   r   z1Selected columns, {}, are not unique in dataframe)r&   r   r!   zerosrs   r   formatr'   r(   r   rF   valuer#   )r   r   Ztoyr   r[   r   exc_infor)   r)   r*   6test_get_column_indices_pandas_nonunique_columns_error  s    

r   c                  C   sZ   dd } t ddgddggddgd	d
ggg}t| |}t| t| ||ksVtd S )Nc                 S   s   t dd | D S )Nc                 s   s    | ]}t d d |D V  qdS )c                 s   s   | ]}t |V  qd S r   r   ).0Cr)   r)   r*   	<genexpr>  s     zPtest_shuffle_on_ndim_equals_three.<locals>.to_tuple.<locals>.<genexpr>.<genexpr>Nr   )r   Br)   r)   r*   r     s     zFtest_shuffle_on_ndim_equals_three.<locals>.to_tuple.<locals>.<genexpr>r   )Ar)   r)   r*   to_tuple  s    z3test_shuffle_on_ndim_equals_three.<locals>.to_tupler9   r-   r   r,   rN   r   r   r   )r!   r   setr   r#   )r   r   Sr)   r)   r*   !test_shuffle_on_ndim_equals_three  s
    &r   c            
      C   sF  dddg} t jdddgtd}dddg}tt jddgddgddggtd}tt d	dd}t| ||||dd
\}}}}}	|dddgkst	t
|tkst	t|dddg |jtkst	|dddgkst	t
|tkst	t|t jddgddgddggtd t
|tkst	t|	 t ddgddgddgg d S )Nr   r   crx   r9   r-   r   r   r   )rT   r,   rN   )r!   r   objectr   rf   Z
csc_matrixrp   rq   r   r#   typer0   r   ry   Ztoarray)
r   r   r   deZa_sZb_sZc_sZd_sZe_sr)   r)   r*   "test_shuffle_dont_convert_to_array  s    

$&r   c               	      sb   t d tt fddtddD  } t |  tdd}tjtdd t| W 5 Q R X d S )NrR   c                    s   g | ]} | qS r)   r)   )r   r1   Z
some_ranger)   r*   
<listcomp>,  s     z(test_gen_even_slices.<locals>.<listcomp>r   ro   z+gen_even_slices got n_packs=-1, must be >=1r.   )	r   r0   r   r   r   r&   r'   r(   r2   )Zjoined_rangeZslicesr)   r   r*   test_gen_even_slices)  s     

r   )	row_bytes
max_n_rowsworking_memoryexpected)   Nr9   r   )r   NgG?  )r   Nr9     )r   Nr9   r   )r   Nr-   i   )r   r   r9   r   )i   Nr9   r9   c              
   C   s   t    t dt t| ||d}W 5 Q R X ||ks:tt|t|ksNtt|dR t   t dt t| |d}W 5 Q R X ||kstt|t|kstW 5 Q R X d S )Nr   r   r   r   r   r   r   )r?   r@   rA   UserWarningr   r#   r   r   )r   r   r   r   actualr)   r)   r*   test_get_chunk_n_rows5  s    

r   c               
   C   s   d} d}d}d}d}t jt|d t| ||d}W 5 Q R X ||ksHtt|t|ks\tt|dL t jt|d t| |d}W 5 Q R X ||kstt|t|kstW 5 Q R X dS )	z<Check that warning is raised when working_memory is too low.i  Nr9   zICould not adhere to working_memory config. Currently 1MiB, 2MiB required.r.   r   r   r   )r&   Zwarnsr   r   r#   r   r   )r   r   r   r   Zwarn_msgr   r)   r)   r*   test_get_chunk_n_rows_warnsT  s&    r   sourcerG   is_longABCZABCDEFZABCABCABCABCABCABCABCABCABCABCu   ၈timetime_str)g?z   0.2s)   z  20.0s)i  z33.3min)i N  z333.3minc                 C   s   t | ||}|r"t|dks2tnt|dks2t|d|  d sHt|t| d d  }||sjt|d t|  }|dst|d td  }||st|d t|  }|dst|d d }|r|rtntt|dgkstd S )	NF   [z] r   z, total= ro   .)r   rB   r#   
startswithendswithr0   r   )r   rG   r   r   r   outr)   r)   r*   test_message_with_timeo  s"    
r   r   Zhello皙?
 )Nr   c              	   C   sP   | tddd  td|  | tddd  W 5 Q R X | j|ksLtd S )NZdefault_timerc                   S   s   dS )Nr   r)   r)   r)   r)   r*   <lambda>      z)test_print_elapsed_time.<locals>.<lambda>r   c                   S   s   dS )Nr   r)   r)   r)   r)   r*   r     r   )setattrtimeitr   Z
readouterrr   r#   )rG   r   ZcapsysZmonkeypatchr)   r)   r*   test_print_elapsed_time  s    	r   zvalue, resultnan)r   F)g        F)NF)r   F)r   F)l   	H]vieFc                 C   s&   t | |ksttt | ts"td S r   )r   r#   r   ru   )r   resultr)   r)   r*   test_is_scalar_nan  s    r  c                  C   s4   t jddgt jd} t| ddd}t|ddg d	S )
zMake sure sklearn.utils._approximate_mode returns valid
    results for cases where "class_counts * n_draws" is enough
    to overflow 32-bit signed integer.

    Non-regression test for:
    https://github.com/scikit-learn/scikit-learn/issues/20774
    i r`   rx   ia  r   )Zclass_countsZn_drawsrY   i`     N)r!   r   int32r   r   )r[   retr)   r)   r*   test_approximate_mode  s    r  c                   C   s   d S r   r)   r)   r)   r)   r*   
dummy_func  s    r  c                 C   sB   ddl m}m} t|dd  t|dd  ddlm} |jjd= d S )Nr   )parallel_backendregister_parallel_backendZlokyZfailing)joblib)sklearn.utilsr  r	  r   Zsklearn.utils._joblibr
  parallelZBACKENDS)Ztmpdirr  r	  r
  r)   r)   r*   test_deprecation_joblib_api  s
    r  sequencec                 C   s:   t | }t|tjst|jjdks(t|jdks6td S )NOr9   )r   r   r!   Zndarrayr#   ry   kindrd   )r  r   r)   r)   r*   test_to_object_array  s    r  c                 C   s   t jd}|dd}ddg}|t||jd }t|| }t|||d t||dd}t	|t||  ddg}||jd t|}t|| }t|||d t||dd}t	|t||  d	\}}|j|j }t|| }t|||d t	|t||  d
S )z,Check that `_safe_assign` works as expected.r   rR   rN   r9   r-   )row_indexerr   )column_indexer)NNN)
r!   r"   r$   ZrandnrB   rb   r   r   r   r   )r   rY   ZX_arrayr  valuesr[   Zassigned_portionr  r)   r)   r*   test_safe_assign  s2    
 
 
r  )dr   	itertoolsr   r?   stringr   r&   Znumpyr!   Zscipy.sparser   rf   Zsklearn.utils._testingr   r   r   r   r  r   r	   r
   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   Zsklearn.utils._mockingr   Zsklearnr   rp   rq   r   r+   r4   rK   rO   r^   rc   re   rh   rj   rr   markZparametrizeZbool_r1   r   r  Zint64Zuint8r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   ascii_lowercaser   r   floatr   Zfloat32Zfloat64r  r  r  r  r  r  r)   r)   r)   r*   <module>   s  %



 00	





 






	



,
