U
    2d0                     @   s  d dl Z d dlZd dlZd dlmZ d dlmZ d dlZd dl	m
Z
 d dl	mZ d dl	mZ d dl	mZ d dl	mZ d d	lmZ d d
lmZ d dlmZ d dlmZmZmZ d dlmZ d dlmZmZmZ d dlmZ d dlm Z  d dl!m"Z" d dl#m$Z$ d dl%m&Z& d dl'm(Z( d dl)m*Z* dd Z+dd Z,dd Z-e j./ddddd gd!d" Z0e j./ddddgd#d$ Z1e j./ddddgd%d& Z2e j./ddddd gd'd( Z3d)d* Z4d+d, Z5d-d. Z6d/d0 Z7e j./dddge j./d1de8e9gd2d3 Z:e j./dddge j./d4d5d6gd7d8 Z;e j./dd dge j./d1e9e<d9e<d:gd;d< Z=d=d> Z>e j./d?dej?d@dAd gdBdC Z@e j./d1e8dDgdEdF ZAe j./dGdHdIej?fgdJdK ZBdLdM ZCe j./dNejDejEgdOdP ZFe j./d?dej?d@dAd gdQdR ZGe j./d1e8dDgdSdT ZHe j./dUdVgdWggdVgej?gggdXdY ZIdZd[ ZJd\d] ZKd^d_ ZLd`da ZMdbdc ZNe j./dddedfdgdhdigdjdk ZOe j./dlde e e e gdmdn ZPdodp ZQdqdr ZRdsdt ZSe j./ddddgdudv ZTdwdx ZUdydz ZVd{d| ZWe j./d}d~dgdd ZXdd ZYdd ZZdd Z[e j.j/dd de\d gd~ dgd~ gfdde\ej] gd~ ej]gd~ gfej] ej]e\ej] gd~ ej]gd~ gfdddgdddge\dddgdddggfdej] dgddej]ge\dej] dgddej]ggfgdddddgddd Z^e j./ddej]ej] dfddgddd gdfgdd Z_e j.j/dddgej] ej]gfddgdgd dgd gfgddgddd Z`e j./dddgdd Zae j./dddVejbjcdVdge j./dddVejbjcdVdgdd Zde j./de\ddVgdVdWgge\ddVgdVdggddddfej\ddgddgge9dej\ddgddgge9di dfgdd Zee j./dej?ejfej\fd ejgej\fdejgej\fej?ejfejhfdejgejhfej?ejfejDfdejgejDfej?ejfejifdejgejifej?ejfejjfdejgejjfej?ejfejkfdejgejkfge j./ddd~e\d dVdWgfdd~e\d dVdWgfgdd Zle j./dejhejDejiejjejkgdd Zme j./ddddge j./dej?ej\fd ej\fej?ejhfej?ejDfej?ejifej?ejjfgddń ZnddǄ Zoe j./dej\ddgddgge8ddej\ddddgddddgge8dfe\ej?dIgdIej?ggej?e\dIdIddgdIdIddggfej\ej?dgdej?gge8dej?ej\ddddgddddgge8dfej\ddgddgge8ddej\ddddgddddgge8dfgddʄ Zpe j./deege j./ddej?dfdgddф Zqddӄ ZrddՄ Zse j./deegddׄ Zte j./dejhejDejiejjejkgddل Zue j./dddgddބ Zve j./ddgd~ddWd dVgfdhdVd dWdd~gfgdd Zwe j./ddej?gdd Zxe j./ddej?gdd Zye j./dddddge8ddWfddddge8ddVfdddge8ddWfddddge8ddWfddVdWd~gezddWfdVdVdVdWgezddVfddddVgezddWfdVdVdVdgezddWfgdd Z{e j./ddddd gdd Z|e j./dddgdd Z}dd Z~dd Zdd Ze j./dejejfgdd Ze j./d ddge j./dddgdd Ze j./d ddge j./ddddge j./dddgdd ZdS (      Nsparse)kstest)_convert_container)assert_allclose)assert_allclose_dense_sparse)assert_array_equal)assert_array_almost_equal)enable_iterative_imputer)load_diabetes)MissingIndicator)SimpleImputerIterativeImputer
KNNImputer)DummyRegressor)BayesianRidgeARDRegressionRidgeCV)Pipeline)
make_union)GridSearchCV)tree)_sparse_random_matrix)ConvergenceWarning)_most_frequentc                 C   s   t | | | j|jkstd S N)r   dtypeAssertionErrorxy r!   D/tmp/pip-unpacked-wheel-zrfo1fqw/sklearn/impute/tests/test_impute.py"_assert_array_equal_and_same_dtype!   s    
r#   c                 C   s   t | | | j|jkstd S r   )r   r   r   r   r!   r!   r"   _assert_allclose_and_same_dtype&   s    
r$   c           	      C   s   d||f }t }| jjdks(|jjdkr,t}t||d}|| |  }||j||	dd ||||	dd t||d}|t
|  |t
|  }t
|r| }||j||	dd ||||	dd dS )zUtility function for testing imputation for a given strategy.

    Test with dense and sparse arrays

    Check that:
        - the statistics (mean, median, mode) are correct
        - the missing values are imputed correctlyz<Parameters: strategy = %s, missing_values = %s, sparse = {0}fmissing_valuesstrategyF)err_msgTN)r   r   kindr	   r   fit	transformcopyZstatistics_formatr   
csc_matrixissparsetoarray)	XX_truer(   Z
statisticsr'   r)   Z	assert_aeimputerX_transr!   r!   r"   _check_statistics+   s$    	
r6   r(   meanmedianmost_frequentconstantc                 C   s   t jdd}t j|d d d< t| d}|t|}|jdksFt	||}|jdks^t	t
| d}||}|jdkst	d S )N
      r(   )r;   r<   )initial_strategy)nprandomrandnnanr   fit_transformr   
csr_matrixshaper   r   )r(   r2   r4   	X_imputedZiterative_imputerr!   r!   r"   test_imputation_shapeP   s    



rG   c              	   C   st   t d}t j|d d df< t| dd}tjtdd || W 5 Q R X tjtdd |	| W 5 Q R X d S )N      r      r(   verboseThe 'verbose' parametermatchZSkipping)
r?   onesrB   r   pytestwarnsFutureWarningr+   UserWarningr,   r(   r2   r4   r!   r!   r"    test_imputation_deletion_warninga   s    
rW   c              	   C   s   t d}tj}tjddddgtd}|j||d|gd|d	d
gg|d}t| dd}t jt	dd |
| W 5 Q R X t|j| t jtdd || W 5 Q R X d S )Npandasabcdr   rK      r<   r;   columnsrL   rN   rO   z6Skipping features without any observed values: \['b'\])rR   importorskipr?   rB   arrayobject	DataFramer   rS   rT   r+   r   Zfeature_names_in_rU   r,   )r(   pdr'   feature_namesr2   r4   r!   r!   r"   .test_imputation_deletion_warning_feature_nameso   s$    


 rg   c              	   C   s   t d}d|d< t|}t| dd}tjtdd || W 5 Q R X ||	  tjtdd |
| W 5 Q R X d S )NrH   r   )r(   r'   zProvide a dense arrayrO   )r?   rQ   r   r/   r   rR   raises
ValueErrorr+   r1   r,   rV   r!   r!   r"   test_imputation_error_sparse_0   s    

rj   c                 O   s8   t | dr| jnt| }|dkr&tjS tj| f||S Nsizer   )hasattrrl   lenr?   rB   r8   Zarrargskwargslengthr!   r!   r"   safe_median   s    rs   c                 O   s8   t | dr| jnt| }|dkr&tjS tj| f||S rk   )rm   rl   rn   r?   rB   r7   ro   r!   r!   r"   	safe_mean   s    rt   c               
   C   sv  t jd} d}d}|| || f}t |d }t d|d d }|dd d  |dd d< dt jdd fd	t jd
d fg}|D ]\}}}	t |}
t |}t |d }t|d D ]Z}|| d dk|| d  || d  }t|d ||  || ||   d}|d | | }|d | }t 	||}|| 
t|d |  }|	|||||< t |||f|
d d |f< d|krt |t 	|| || f|d d |f< n(t ||t 	|| |f|d d |f< t j||
d d |f  t j||d d |f  q|d	kr<t |jdd }nt |jdd }|d d |f }t|
|||| qd S )Nr   r;   rK   r^   r<   r7   c                 S   s   t t| |fS r   )rt   r?   hstackzvpr!   r!   r"   <lambda>       z-test_imputation_mean_median.<locals>.<lambda>r8   c                 S   s   t t| |fS r   )rs   r?   ru   rv   r!   r!   r"   rz      r{   )Zaxis)r?   r@   RandomStatezerosarangerB   emptyrangemaxrepeatZpermutationrn   ru   shuffleisnananyallr6   )rngZdimdecrE   r}   valuestestsr(   Ztest_missing_valuesZtrue_value_funr2   r3   Ztrue_statisticsjZnb_zerosZnb_missing_valuesZ	nb_valuesrw   ry   rx   Zcols_to_keepr!   r!   r"   test_imputation_mean_median   sJ    

(&
 
r   c                  C   s   t dt jt jgdt jt jgddt jgddt jgddt jgddt jgddt jgddt jgg } t dddgdddgdddgdddgddd	gddd
gdddgdddgg }ddddd	d
ddg}t| |d|t j d S )Nr   rJ   r^   r<   g      g      @g      @g            ?r8   )r?   rb   rB   Z	transposer6   )r2   ZX_imputed_medianZstatistics_medianr!   r!   r"   $test_imputation_median_special_cases   s0    





r   r   c              	   C   s\   t jdddgdddgddd	gg|d
}d}tjt|d t| d}|| W 5 Q R X d S )NrY   rZ   rI   r^   e   gh	   r]   6non-numeric data:
could not convert string to float: 'rO   r=   )r?   rb   rR   rh   ri   r   rC   )r(   r   r2   msgr4   r!   r!   r"   .test_imputation_mean_median_error_invalid_type  s
    &
r   typelist	dataframec              	   C   sn   dddgdddgddd	gg}|d
kr8t d}||}d}t jt|d t| d}|| W 5 Q R X d S )NrY   rZ   rI   r^   r   r   r   r   r   r   rX   r   rO   r=   )rR   ra   rd   rh   ri   r   rC   )r(   r   r2   re   r   r4   r!   r!   r"   :test_imputation_mean_median_error_invalid_type_list_pandas  s    


r   USc              	   C   s   t jt jt jddgt jdt jdgt jddt jgt jdddgg|d}d}tjt|d	  t| d
}||| W 5 Q R X d S )NrY   r%   r[   r\   rZ   r   r]   z#SimpleImputer does not support datarO   r=   )	r?   rb   rB   rR   rh   ri   r   r+   r,   )r(   r   r2   r)   r4   r!   r!   r"   /test_imputation_const_mostf_error_invalid_types$  s    

r   c               	   C   sz   t ddddgddddgddddgddddgg} t dddgdddgdddgdddgg}t| |dt jdddgd d S )	Nr   r   rJ   r<   rI   rK      r9   )r?   rb   r6   rB   )r2   r3   r!   r!   r"   test_imputation_most_frequent9  s    



	r   markerZNAN c                 C   s   t j| | ddg| d| dg| dd| g| dddggtd}t jdddgdddgdddgdddggtd}t| dd	}|||}t|| d S )
NrY   r%   r[   r\   rZ   r   r]   r9   r&   )r?   rb   rc   r   r+   r,   r   r   r2   r3   r4   r5   r!   r!   r"   %test_imputation_most_frequent_objectsT  s&    





r   categoryc                 C   sr   t d}td}|j|| d}tjdddgdddgdddgd	ddggtd}td
d}|	|}t
|| d S )NrX   ,Cat1,Cat2,Cat3,Cat4
,i,x,
a,,y,
a,j,,
b,j,x,r]   rY   ir   r   r    rZ   r9   r=   rR   ra   ioStringIOZread_csvr?   rb   rc   r   rC   r   r   re   r%   dfr3   r4   r5   r!   r!   r"   $test_imputation_most_frequent_pandasq  s    

"

r   zX_data, missing_value)rK   r         ?c              	   C   sN   t jd| td}||d< tjtdd t|ddd}|| W 5 Q R X d S )	NrH   r]   r   r   zimputing numericalrO   r:   r   r'   r(   
fill_value)r?   fullfloatrR   rh   ri   r   rC   )ZX_datamissing_valuer2   r4   r!   r!   r"   +test_imputation_constant_error_invalid_type  s      r   c               	   C   s   t ddddgddddgddddgdd	d
dgg} t d
ddd
gdd
dd
gddd
d
gdd	d
d
gg}tddd
d}|| }t|| d S )Nr   r<   rI   r^   rJ   r   r      r   r   r:   r   )r?   rb   r   rC   r   )r2   r3   r4   r5   r!   r!   r"    test_imputation_constant_integer  s
    22
r   array_constructorc              	   C   s   t t jddt jgdt jdt jgddt jt jgdddt jgg}t ddddgddddgddddgddddgg}| |}| |}tddd	}||}t|| d S )
Ng?r   333333??gffffff?      ?r   r:   )r(   r   )r?   rb   rB   r   rC   r   )r   r2   r3   r4   r5   r!   r!   r"   test_imputation_constant_float  s    	*
r   c                 C   s   t j| dd| gd| d| gdd| | gddd	| ggtd
}t jddddgddddgddddgddd	dggtd
}t| ddd}||}t|| d S )NrY   rZ   r[   r\   r   r%   r   r   r   r]   missingr:   r   )r?   rb   rc   r   rC   r   r   r!   r!   r"   test_imputation_constant_object  s.    









  
r   c                 C   sz   t d}td}|j|| d}tjddddgddddgdd	ddgd
d	ddggtd}tdd}|	|}t
|| d S )NrX   r   r]   r   r   r   rY   r    r   rZ   r:   r=   r   r   r!   r!   r"   test_imputation_constant_pandas  s    








r   r2   rK   r<   c                 C   sf   t  | }|jdkstt  }|dgdgg |jdks@t|dgtjgg |jdksbtd S )Nr   rK   r<   )r   r+   n_iter_r   r?   rB   r2   r4   r!   r!   r"   "test_iterative_imputer_one_feature  s    r   c                  C   st   t dddd} | jd }tdt|dfdtjddfg}d	d
ddgi}t dddd }t||}|| | d S )Nd   皙?)densityr   r4   r'   r   random_stateZimputer__strategyr7   r8   r9   rK   )	r   datar   r   r   ZDecisionTreeRegressorr1   r   r+   )r2   r'   Zpipeline
parametersYZgsr!   r!   r"   $test_imputation_pipeline_grid_search  s    

r   c                  C   sv  t ddddd} |   }tdddd}|||}d|d	< t||krTt|  }t|j	d ddd}|||}d|j	d< t|j	|j	krt|   }tddd
d}|||}d|d	< t
|| |   }t|j	d dd
d}|||}d|j	d< t
|j	|j	 |  }t|j	d dd
d}|||}d|j	d< t|j	|j	krrtd S )NrJ   g      ?r   r   r   r7   T)r'   r(   r-   r   r   F)r   r-   r1   r   r+   r,   r?   r   r   r   r	   Ztocsc)ZX_origr2   r4   Xtr!   r!   r"   test_imputation_copy  s4    



r   c                  C   s   t jd} d}d}t||d| d }|dk}t j||< tdd}||}t||j	
| tdd|}t |
||j	
|krtd|_t|
||j	
| d S )Nr   r   r;   r   r   )max_iterrJ   )r?   r@   r|   r   r1   rB   r   rC   r   initial_imputer_r,   r+   r   r   r   )r   nr\   r2   Zmissing_flagr4   rF   r!   r!   r"   !test_iterative_imputer_zero_iters:  s    


 r   c                  C   sp   t jd} d}d}t||d| d }tdddd}|| || tdddd}|| || d S )	Nr   r   rI   r   r   rK   )r'   r   rM   r<   )r?   r@   r|   r   r1   r   r+   r,   )r   r   r\   r2   r4   r!   r!   r"   test_iterative_imputer_verboseR  s    


r   c                  C   sB   d} d}t | |f}tddd}||}t||j| d S )Nr   rI   r   rK   )r'   r   )r?   r}   r   rC   r   r   r,   )r   r\   r2   r4   rF   r!   r!   r"   "test_iterative_imputer_all_missing`  s    
r   imputation_orderr@   roman	ascending
descendingarabicc           
      C   sR  t jd}d}d}d}t||d|d }d|d d df< td|dd	d
ddd| |d
}|| dd |jD }t||j	 |j
kst| dkrt |d |d  t d|kstn| dkrt |d |d  t |d ddkstn^| dkr*|d |d  }||d d  }	||	ksNtn$d| krNt|||d  ksNtd S )Nr   r   r;   r<   r   r   rK   rJ   FT)
r'   r   n_nearest_featuressample_posteriorskip_complete	min_value	max_valuerM   r   r   c                 S   s   g | ]
}|j qS r!   Zfeat_idx).0r   r!   r!   r"   
<listcomp>  s     z;test_iterative_imputer_imputation_order.<locals>.<listcomp>r   r   r   r@   Zending)r?   r@   r|   r   r1   r   rC   imputation_sequence_rn   r   Zn_features_with_missing_r   r   r~   )
r   r   r   r\   r   r2   r4   Zordered_idxZordered_idx_round_1Zordered_idx_round_2r!   r!   r"   'test_iterative_imputer_imputation_orderi  s>    
(.

r   	estimatorc           	      C   s   t jd}d}d}t||d|d }tdd| |d}|| g }|jD ]>}| d k	r`t| ntt	 }t
|j|szt|t|j qLtt|t|kstd S )Nr   r   r;   r   r   rK   )r'   r   r   r   )r?   r@   r|   r   r1   r   rC   r   r   r   
isinstancer   r   appendidrn   set)	r   r   r   r\   r2   r4   hashesZtripletZexpected_typer!   r!   r"   !test_iterative_imputer_estimators  s$       

r   c                  C   s   t jd} d}d}t||d| d }tdddd| d}||}tt ||dk d tt 	||dk d t||dk ||dk  d S )	Nr   r   r;   r   r   rK   皙?)r'   r   r   r   r   
r?   r@   r|   r   r1   r   rC   r   minr   r   r   r\   r2   r4   r   r!   r!   r"   test_iterative_imputer_clip  s        
r   c                  C   s   t jd} d}d}t||d| d }d|d d df< tdddd	dd
dd| d	}||}tt ||dk d tt 	||dk d
 t||dk ||dk  d S )Nr   r   r;   r   r   rK   r<   rJ   Tr   r@   )	r'   r   r   r   r   r   rM   r   r   r   r   r!   r!   r"   %test_iterative_imputer_clip_truncnorm  s(    
r   c                     s   t jd} | jdd t j d d< tddd| d  t  fdd	td
D }t	|dksnt
t	|dks~t
| |  }}t|| | d\}}|dkr|d7 }t|| | d\}}|dk s|dkst
dd S )N*   )rJ   rJ   )rl   r   r   T)r   r   r   r   c                    s   g | ]}  d  d  qS )r   )r,   )r   _r   r!   r"   r     s     zEtest_iterative_imputer_truncated_normal_posterior.<locals>.<listcomp>r   Znormg-q=r   r   z&The posterior does appear to be normal)r?   r@   r|   normalrB   r   rC   rb   r   r   r   r7   Zstdr   )r   ZimputationsmusigmaZks_statisticZp_valuer!   r   r"   1test_iterative_imputer_truncated_normal_posterior  s&       
r   c                 C   s   t jd}d}d}|jdd||fd}|jdd||fd}d|d d df< d|d< tdd| |d|}td| d	|}t||d d df ||d d df  d S )
Nr   r   r;   rI   )lowhighrl   rK   r   )r'   r   r>   r   r&   )	r?   r@   r|   randintr   r+   r   r   r,   )r(   r   r   r\   X_trainX_testr4   Zinitial_imputerr!   r!   r"   +test_iterative_imputer_missing_at_transform  s(        r   c                  C   s   t jd} t jd}d}d}t||d| d }tddd| d}|| ||}||}t |t	
t |ksttddd	d d
| d}tddd	d d
|d}	|| |	| ||}
||}|	|}t|
| t|
| d S )Nr   rK   r   r;   r   r   T)r'   r   r   r   Fr   )r'   r   r   r   r   r   )r?   r@   r|   r   r1   r   r+   r,   r7   rR   Zapproxr   r   )Zrng1Zrng2r   r\   r2   r4   Z
X_fitted_1Z
X_fitted_2imputer1imputer2ZX_fitted_1aZX_fitted_1br!   r!   r"   .test_iterative_imputer_transform_stochasticity  sL       


	





r  c                  C   s   t jd} | dd}t j|d d df< td| d}td| d}|||}||}t	|d d dd f | t	|| d S )Nr   r   r;   )r   r   rK   )
r?   r@   r|   randrB   r   r+   r,   rC   r   )r   r2   m1m2Zpred1Zpred2r!   r!   r"   !test_iterative_imputer_no_missing?  s    
r  c            	      C   s   t jd} d}| |d}| d|}t ||}| ||dk }| }t j||< tdd| d}||}t	||dd d S )	Nr   2   rK   r   rJ   r   rM   r   g{Gz?atol)
r?   r@   r|   r  dotr-   rB   r   rC   r   )	r   r\   ABr2   nan_mask	X_missingr4   X_filledr!   r!   r"   test_iterative_imputer_rank_oneM  s    

r  rankrI   rJ   c                 C   s   t jd}d}d}||| }|| |}t ||}|||dk }| }t j||< |d }|d | }	||d  }
||d  }tddd|d|	}|	|}t
|
|d	d
 d S )Nr   F   r   r<   rJ   r   rK   )r   r   rM   r   r   r	  )r?   r@   r|   r  r  r-   rB   r   r+   r,   r   )r  r   r   r\   r  r  r  r  r  r   X_test_filledr   r4   
X_test_estr!   r!   r"   )test_iterative_imputer_transform_recovery\  s.    
   
r  c               	   C   s  t jd} d}d}| ||}| ||}t |j}t|D ]R}t|D ]D}|d d || | f  |d d |f |d d |f  d 7  < qLq@| ||dk }| }	t j	|	|< |d }|	d | }
||d  }|	|d  }t
dd| d|
}||}t||dd	d
 d S )Nr   r   r;   r<   g      ?rK   r  gMbP?{Gz?)rtolr
  )r?   r@   r|   rA   r}   rE   r   r  r-   rB   r   r+   r,   r   )r   r   r\   r  r  r  r   r   r  r  r   r  r   r4   r  r!   r!   r"   &test_iterative_imputer_additive_matrixu  s&    D

r  c                  C   s   t jd} d}d}| |d}| d|}t ||}| ||dk }| }t j||< tdddd| d	}||}	t	|j
||j kstt|jdd| d
}||}
t|	|
dd tdddd| d	}|| |j|jkstd S )Nr   r  rJ   rK   r   r   r  F)r   Ztolr   rM   r   )r   r   rM   r   gHz>r	  )r?   r@   r|   r  r  r-   rB   r   rC   rn   r   r   r   r   r+   r   )r   r   r\   r  r  r2   r  r  r4   ZX_filled_100ZX_filled_earlyr!   r!   r"   %test_iterative_imputer_early_stopping  sF    
    
   
    
r  c            
   	   C   s   t dd\} }| j\}}d| d d df< tjd}d}t|D ]0}|jt|t|| dd}tj	| ||f< q@t
d	dd
}t  tdt || |}	W 5 Q R X tt|	rtd S )NT)Z
return_X_yrK   rI   r   g333333?F)rl   replacerJ   )r   r   error)r   rE   r?   r@   r|   r   choicer~   intrB   r   warningscatch_warningssimplefilterRuntimeWarningrC   r   r   r   )
r2   r    Z	n_samples
n_featuresr   Zmissing_rateZfeatZ
sample_idxr4   ZX_fillr!   r!   r"   $test_iterative_imputer_catch_warning  s"    
 
 
r$  z$min_value, max_value, correct_outputr   r   r;      i,  ZscalarszNone-defaultinflistszlists-with-inf)idsc                 C   s   t jddd}t| |d}|| t|jt jrFt|j	t jsJt
|jjd |jd krv|j	jd |jd kszt
t|dd d f |j t|dd d f |j	 d S )Nr   r;   rI   r   r   rK   )r?   r@   r|   rA   r   r+   r   Z
_min_valuendarrayZ
_max_valuer   rE   r   )r   r   Zcorrect_outputr2   r4   r!   r!   r"   )test_iterative_imputer_min_max_array_like  s    
 r+  zmin_value, max_value, err_msg)r   r   min_value >= max_value.r,  z_value' should be of shapec              	   C   s@   t jd}t| |d}tjt|d || W 5 Q R X d S )Nr;   rI   r)  rO   )r?   r@   r   rR   rh   ri   r+   )r   r   r)   r2   r4   r!   r!   r"   *test_iterative_imputer_catch_min_max_error  s    r.  zmin_max_1, min_max_2ir^   zNone-vs-infzScalar-vs-vectorc              	   C   s   t t jdddgdt jt jdgddt jdgt jddt jgg}t t jdt jdgddt jt jgt jdddgg}t| d | d dd	}t|d |d dd	}|||}|||}t|d d df |d d df  d S )
Nr<   rK   r;   r   rI   r^   rJ   r   )r   r   r   )r?   rb   rB   r   r+   r,   r   )Z	min_max_1Z	min_max_2r   r   r   r  ZX_test_imputed1ZX_test_imputed2r!   r!   r"   4test_iterative_imputer_min_max_array_like_imputation  s.    *    r/  r   TFc              	   C   s   t jd}t ddddgddddgddddgdd	ddgg}t t jdd	dgt jd	ddgt jdddgg}td
| |d}|||}| rt|d d df t 	|d d df  n t|d d df dddgdd d S )Nr   rJ   r<   rK   r;   r   rI   r   r^   r7   )r>   r   r         g-C6?)r  )
r?   r@   r|   rb   rB   r   r+   r,   r   r7   )r   r   r   r   r4   r  r!   r!   r"   'test_iterative_imputer_skip_non_missing  s    2.  *r2  
rs_imputer)seedrs_estimatorc                 C   sH   G dd d}||d}t | d}td}|| |j|ksDtd S )Nc                   @   s$   e Zd Zdd Zdd Zdd ZdS )zCtest_iterative_imputer_dont_set_random_state.<locals>.ZeroEstimatorc                 S   s
   || _ d S r   r   )selfr   r!   r!   r"   __init__,  s    zLtest_iterative_imputer_dont_set_random_state.<locals>.ZeroEstimator.__init__c                 _   s   | S r   r!   )r6  rp   Zkgardsr!   r!   r"   r+   /  s    zGtest_iterative_imputer_dont_set_random_state.<locals>.ZeroEstimator.fitc                 S   s   t |jd S )Nr   )r?   r}   rE   )r6  r2   r!   r!   r"   predict2  s    zKtest_iterative_imputer_dont_set_random_state.<locals>.ZeroEstimator.predictN)__name__
__module____qualname__r7  r+   r8  r!   r!   r!   r"   ZeroEstimator+  s   r<  r   r-  )r   r?   r}   r+   r   r   )r3  r5  r<  r   r4   r   r!   r!   r"   ,test_iterative_imputer_dont_set_random_state(  s    




r=  zX_fit, X_trans, params, msg_errr   missing-onlyauto)featuresr   zBhave missing values in transform but have no missing values in fitrY   rZ   r[   r]   z1MissingIndicator does not support data with dtypec              	   C   sD   t dd}|jf | tjt|d || | W 5 Q R X d S )Nr   r   rO   )r   
set_paramsrR   rh   ri   r+   r,   )X_fitr5   paramsZmsg_err	indicatorr!   r!   r"   test_missing_indicator_error<  s    
rE  zmissing_values, dtype, arr_typez,param_features, n_features, features_indicesr   c                 C   s  t | | dgdd| gg}t | | dgdddgg}t dddgdddgg}t dddgdddgg}	|||}|||}||}|	|}	t| |dd}
|
|}|
|}|jd |kst|jd |kstt|
j	| t
||d d |f  t
||	d d |f  |jtks&t|jtks6tt|t jsHtt|t jsZt|
jd	d
 |
|}|
|}|jtkst|jtkst|jdkst|jdkstt
| | t
| | d S )NrK   r^   r<   r1  r;   r   F)r'   r@  r   Tr   csc)r?   rb   astyper   rC   r,   rE   r   r   Z	features_r   r   boolr   r*  rA  r.   r1   )r'   arr_typer   Zparam_featuresr#  Zfeatures_indicesrB  r5   ZX_fit_expectedZX_trans_expectedrD  
X_fit_maskX_trans_maskZX_fit_mask_sparseZX_trans_mask_sparser!   r!   r"   test_missing_indicator_newT  sB    

  



rL  rI  c              	   C   s   d}t ||dgd|dgg}t ||dgdddgg}| |}| |}t|d}tjtdd	 || W 5 Q R X || tjtdd	 || W 5 Q R X d S )
Nr   rK   r^   r<   r1  r;   r   z"Sparse input with missing_values=0rO   )r?   rb   r   rR   rh   ri   rC   r,   )rI  r'   rB  r5   ZX_fit_sparseZX_trans_sparserD  r!   r!   r"   5test_missing_indicator_raise_on_sparse_with_missing_0  s    

rM  param_sparsezmissing_values, arr_typec                 C   sL  t ||dgd|dgg}t ||dgdddgg}| |t j}| |t j}t||d}||}||}|dkr|jdkst|jdkstn|d	kr|d
krt	|t j
stt	|t j
stn||dkrt	|t j
stt	|t j
stnRt|r$|jdkst|jdksHtn$t	|t j
s6tt	|t j
sHtd S )NrK   r^   r<   r1  r;   )r'   r   TrF  r?  r   F)r?   rb   rG  float64r   rC   r,   r.   r   r   r*  r   r0   )rI  r'   rN  rB  r5   rD  rJ  rK  r!   r!   r"   #test_missing_indicator_sparse_param  s*    

rP  c                  C   sX   t jdddgdddggtd} tddd}|| }t|t dddgdddgg d S )	NrY   rZ   r[   r]   r   )r'   r@  TF)r?   rb   rc   r   rC   r   )r2   rD  r5   r!   r!   r"   test_missing_indicator_string  s    
rQ  zX, missing_values, X_trans_expc                 C   s0   t t|ddt|d}|| }t|| d S )Nr9   r&   r   )r   r   r   rC   r   )r2   r'   ZX_trans_expZtransr5   r!   r!   r"   #test_missing_indicator_with_imputer  s    

rR  imputer_constructorz.imputer_missing_values, missing_value, err_msgNaNzInput X contains NaN)z-1r   z(types are expected to be both numerical.c              	   C   sR   t jd}|dd}||d< | |d}tjt|d || W 5 Q R X d S )Nr   r;   r   r   rO   )r?   r@   r|   rA   rR   rh   ri   rC   )rS  Zimputer_missing_valuesr   r)   r   r2   r4   r!   r!   r"   (test_inconsistent_dtype_X_missing_values  s    
rU  c                  C   sB   t ddgddgg} tddd}|| }|jd dks>td S )NrK   r>  r   r@  r'   r   )r?   rb   r   rC   rE   r   r2   mir   r!   r!   r"   !test_missing_indicator_no_missing  s    
rY  c                  C   sP   t dddgdddgdddgg} tddd}|| }| | ksLtd S )Nr   rK   r<   r   rV  )r   rD   r   rC   Zgetnnzsumr   rW  r!   r!   r"   /test_missing_indicator_sparse_no_explicit_zeros)  s    "
r[  c                 C   s8   t ddgddgg}|  }|| |jd ks4td S )NrK   )r?   rb   r+   Z
indicator_r   )rS  r2   r4   r!   r!   r"   test_imputer_without_indicator4  s    
r\  c                 C   s   | t jddgdt jdgddt jgdddgg}t ddd	dd
d
gdddd
dd
gddd	d
d
dgdddd
d
d
gg}tt jdd}||}t|st|j|jkstt	|
 | d S )NrK   rJ   r<   r   rI   r         @r   g      @g               @g      @g      "@T)r'   add_indicator)r?   rB   rb   r   rC   r   r0   r   rE   r   r1   )rI  ZX_sparser3   r4   r5   r!   r!   r"   2test_simple_imputation_add_indicator_sparse_matrix=  s    .	
r`  zstrategy, expected)r9   rZ   )r:   r   c                 C   sN   ddgdt jgg}t jddgd|ggtd}t| d}||}t|| d S )NrY   rZ   r[   r]   r=   )r?   rB   rb   rc   r   rC   r   )r(   expectedr2   r3   r4   r5   r!   r!   r"   "test_simple_imputation_string_listZ  s
    

rb  zorder, idx_orderc              	   C   s   t jd}|dd}t j|d ddf< t j|d ddf< t j|d dd	f< t j|d d
df< tt6 td| dd	|}dd |j
D }||kstW 5 Q R X d S )Nr   r   rJ   r  rK      r      r<   r;   r^   )r   r   r   c                 S   s   g | ]
}|j qS r!   r   )r   r   r!   r!   r"   r   y  s     z)test_imputation_order.<locals>.<listcomp>)r?   r@   r|   r  rB   rR   rS   r   r   r+   r   r   )orderZ	idx_orderr   r2   Ztrsidxr!   r!   r"   test_imputation_orderh  s    rg  r   c              	   C   sD  t d| ddgddddgdd| dgddd	| gg}t ddd
dgd
d| dgd| ddgddd
| gg}t d| ddg| d| | gd
| d| g| d| dgg}t ddddg| d
| dgd
dddg| d| d
gg}t| ddd}||}||}||}||}	t|| t|	| ||fD ]$}
||
}||}t||
 qd S )Nr   rI   r   r^   rJ   r   r   r   r   r<   rK   r7   T)r'   r(   r_  )r?   rb   r   rC   inverse_transformr,   r   )r   X_1ZX_2ZX_3ZX_4r4   	X_1_transZX_1_inv_transZ	X_2_transZX_2_inv_transr2   r5   ZX_inv_transr!   r!   r"   (test_simple_imputation_inverse_transform}  sV    



	



	



	



	  







rk  c              	   C   sz   t d| ddgddddgdd| dgddd	| gg}t| d
d}||}tjtd|j dd || W 5 Q R X d S )Nr   rI   r   r^   rJ   r   r   r   r   r7   r&   zGot 'add_indicator='rO   )	r?   rb   r   rC   rR   rh   ri   r_  rh  )r   ri  r4   rj  r!   r!   r"   3test_simple_imputation_inverse_transform_exceptions  s    



	
 rm  z)expected,array,dtype,extra_value,n_repeatextra_valueZmost_frequent_valuevaluer   Zmin_valuevaluerw   rd  c                 C   s"   | t tj||d||kstd S )Nr]   )r   r?   rb   r   )ra  rb   r   rn  Zn_repeatr!   r!   r"   test_most_frequent  s
      rp  r>   c                 C   sp   t dt jdgdt jt jgg}t| dd}||}t|dddf d ||}t|dddf d dS )zCheck the behaviour of the iterative imputer with different initial strategy
    and keeping empty features (i.e. features containing only missing values).
    rK   r<   rI   T)r>   keep_empty_featuresNr   )r?   rb   rB   r   rC   r   r,   )r>   r2   r4   rF   r!   r!   r"   *test_iterative_imputer_keep_empty_features  s      

rr  rq  c                 C   s   t dt jdgdt jt jgg}t| d}dD ]`}t|||}| rl|j|jksTtt|dddf d q.|j|jd |jd d fks.tq.dS )z>Check the behaviour of `keep_empty_features` for `KNNImputer`.rK   r<   rI   )rq  rC   r,   Nr   )r?   rb   rB   r   getattrrE   r   r   )rq  r2   r4   methodrF   r!   r!   r"   $test_knn_imputer_keep_empty_features  s     
rv  c                  C   s  t d} | d| jdd dgddi}t| jddd	}t||tj	dgdgdggt
d | d| jddd
gddi}tddd}t||tj	dgdgd
ggt
d | d| jdd dgddi}t| jddd	}t||tj	dgdgdggdd ttjddd	}t||tj	dgdgdggdd | d| jdd ddgddi}t| jdd}t||tj	dgdgdgdggdd | d| jdd dgddi}t| jdd}t||tj	dgdgdggdd | d| jdd dgddi}t| jddd	}t||tj	dgdgdggdd | d| jdd ddgddi}t| jdd}t||tj	dgdgdgdggdd d S )NrX   Zfeatureabcdestringr]   r:   nar   Zfghok)r   r(   rK   rI   ZInt64r   rO  r<   r8   r&   r7   r   r   r]  g       r^  )rR   ra   rd   ZSeriesr   ZNAr#   rC   r?   rb   rc   r$   rB   )re   r   r4   r!   r!   r"   test_simple_impute_pd_na  s`    
         r|  c                  C   sj   t d} tj}| j||d|gd|ddggdddd	gd
}t|d|}| }dddg}t|| dS )zDCheck that missing indicator return the feature names with a prefix.rX   rK   r^   r<   r;   rY   rZ   r[   r\   r_   r   Zmissingindicator_aZmissingindicator_bZmissingindicator_dN)	rR   ra   r?   rB   rd   r   r+   Zget_feature_names_outr   )re   r'   r2   rD  rf   Zexpected_namesr!   r!   r"   (test_missing_indicator_feature_names_outC  s    




r}  c                  C   s\   ddgddgddgg} t dd| }|tjtjgg}|jtksHtt|ddgg dS )zkCheck transform uses object dtype when fitted on an object dtype.

    Non-regression test for #19572.
    rY   rZ   r[   r9   r=   N)	r   r+   r,   r?   rB   r   rc   r   r   )r2   Zimp_frequentr5   r!   r!   r"    test_imputer_lists_fit_transformV  s
    r~  
dtype_testc                 C   sp   t jddt jgt jddgdddggt jd}t |}t jt jt jt jgg| d}||}|j| ksltdS )	zACheck transform preserves numeric dtype independent of fit dtype.r   g333333@r   g@r<   rK   r]   N)	r?   asarrayrB   rO  r   r+   r,   r   r   )r  r2   impr   r5   r!   r!   r"   .test_imputer_transform_preserves_numeric_dtypec  s     
r  
array_typerb   r   c                 C   s   t t jdgt jdgt jdgg}t|| }d}td||d}dD ]V}t|||}|j|jksdt| dkr~|d	d	d
f jn|d	d	d
f }t	|| qBd	S )zCheck the behaviour of `keep_empty_features` with `strategy='constant'.
    For backward compatibility, a column full of missing values will always be
    fill and never dropped.
    r<   rI   r   r;   r:   )r(   r   rq  rs  r   Nr   
r?   rb   rB   r   r   rt  rE   r   r  r   )r  rq  r2   r   r4   ru  rF   constant_featurer!   r!   r"   0test_simple_imputer_constant_keep_empty_featuresp  s    "
(r  c                 C   s   t t jdgt jdgt jdgg}t||}t| |d}dD ]~}t|||}|r|j|jksbt|dkr||dddf jn|dddf }t	|d q<|j|jd |jd	 d	 fks<tq<dS )
zYCheck the behaviour of `keep_empty_features` with all strategies but
    'constant'.
    r<   rI   r   )r(   rq  rs  r   Nr   rK   r  )r(   r  rq  r2   r4   ru  rF   r  r!   r!   r"   'test_simple_imputer_keep_empty_features  s    "
(r  )rR   r  Znumpyr?   Zscipyr   Zscipy.statsr   r   Zsklearn.utils._testingr   r   r   r   r	   Zsklearn.experimentalr
   Zsklearn.datasetsr   Zsklearn.imputer   r   r   r   Zsklearn.dummyr   Zsklearn.linear_modelr   r   r   Zsklearn.pipeliner   r   Zsklearn.model_selectionr   Zsklearnr   Zsklearn.random_projectionr   Zsklearn.exceptionsr   Zsklearn.impute._baser   r#   r$   r6   markZparametrizerG   rW   rg   rj   rs   rt   r   r   rc   strr   r   r   r   r   rB   r   r   r   r   rD   r  r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r  r  r  r  r  r  r$  rb   r&  r+  r.  r/  r2  r@   r|   r=  rE  rO  Zint32r/   Z
coo_matrixZ
lil_matrixZ
bsr_matrixrL  rM  rP  rQ  rR  rU  rY  r[  r\  r`  rb  rg  rk  rm  r  rp  rr  rv  r|  r}  r~  Zfloat32r  r  r  r!   r!   r!   r"   <module>   s,  %



C 





"
+	 
% 
!
2
$**


	0


*,






   
	



 
"

9

 


<
