U
    2d|                    @   s
  d dl Z d dlZd dlZd dlZd dlmZ d dlm	Z	m
Z
 d dlZd dlmZ d dlmZ d dlmZ d dlmZ d dlmZ d dlmZ d d	lmZ d d
lmZ d dlmZ d dlmZ d dlmZ d dlmZ d dlmZ d dlmZ d dlmZ d dlmZ d dlm Z  d dlm!Z! d dlm"Z" d dlm#Z# d dlm$Z$ d dlm%Z% d dlm&Z& d dlm'Z' d dlm(Z( d dlm)Z) d dlm*Z* d dl+m,Z, d dl+m-Z- d d l.m/Z/ d d!l0m1Z1 d d"l2m3Z3 d d#l4m5Z5 d d$l6m7Z7 d d%l8m9Z9 d d&lm:Z: d d'l;m<Z< e<= Z>ej?@d ZAd(ZBd)ZCeAjDd*d+eBd,ZEeAjDd+d-eBd,ZFeAGeCeBeF eE ZHeHd ddf Id+eBZJeHddd f IeCd+ZKeJL ZMeKL ZNd.d/ ZOd0d1 ZPd2d3 ZQd4d5 ZRejSTd6d7d8gd+d9d:gd;d<d=ggd+d9d:gd+d9d:gd;d<d=ggd>d?gfd+d d+gd d d+ggd+d d+gd d d+gd d d+gd d d+ggeUd+d:gfd+ejVd+gejVejVd+ggd+ejVd+gejVejVd+gejVejVd+gejVejVd+ggeUd+d:gfgejSTd@dAdBdCgdDdE ZWdFdG ZXejSTdHde	jYe	jZgejSTdIdJdKgdLdM Z[ejSTdNedJdOe&dJdPgejSTdHej\e	jYe	jZgejSTdIdJdKgejSTdQej]ej^gejSTdRd d?dSgdTdU Z_ejSTdVd-dWdXgejSTdYdZd+d[gejSTdQej]ej^gejSTd@ej\e	jYe	jZgd\d] Z`d^d_ Zaed`da Zbdbdc Zcddde Zddfdg Zedhdi Zfdjdk Zgdldm ZhejSTd8dKdgdndo ZiejSTd8dKdgdpdq Zjdrds Zkdtdu Zldvdw Zmdxdy Zndzd{ ZoejSTd8dKdgd|d} ZpejSTd~dKdJgejSTddKdJgejSTd@ej\e	jYe	jZgdd Zqdd Zrdd Zsdd Ztdd Zudd Zvdd Zwdd ZxejSTddKdJgejSTddKdJgejSTd7ej?Gd-d:e	jyd-d:ddgdd Zzdd Z{dd Z|ejSTdd dddd+gejSTdddddgdd Z}dd Z~dd Zdd Zdd Zdd Zdd Zdd Zdd Zdd Zdd Zdd Zdd Zdd ZejSTddAdgdd Zdd ZddĄ ZddƄ ZddȄ Zddʄ Zdd̄ Zdd΄ ZddЄ Zdd҄ ZddԄ Zddք Zdd؄ Zddڄ Zdd܄ Zddބ Zdd Zdd Zdd Zdd Zdd Zdd Zdd Zdd Zdd Zdd Zdd Zdd ZejSTdddgdd ZejSTdddgejSTddKdJgejSTd7eKeHgdd Zdd  Zdd Zdd ZejSTd7eHeeHeeH eeHjgdd ZejSTdddgdd Zd	d
 Zdd ZejSTddddddgdd Zdd ZejSTdddgdd ZejSTdddgejSTddKdJgdd ZejSTdddgejSTddKdJgdd ZejSTdddgejSTddKdJgdd ZejSTde	j?d-d+d d d!e	Zejd"ejVd#gd$d% ZejSTd&d'd(gd)d* Zd+d, Zd-d. ZejSTd/e e$e&ee"e)gd0d1 ZejSTd/e e$e&ee"e)eegd2d3 Zd4d5 ZdS (6      N)sparsestats)gen_batches)assert_almost_equal)assert_array_almost_equal)assert_array_equal)assert_array_less)assert_allclose)assert_allclose_dense_sparse)skip_if_32bit)_convert_container)mean_variance_axis)	Binarizer)KernelCenterer)
Normalizer)	normalize)StandardScaler)scale)MinMaxScaler)minmax_scale)QuantileTransformer)quantile_transform)MaxAbsScaler)maxabs_scale)RobustScaler)robust_scale)add_dummy_feature)PowerTransformerpower_transform)_handle_zeros_in_scale)BOUNDS_THRESHOLD)linear_kernel)NotFittedError)clone)Pipeline)cross_val_predict)SVR)shuffle)datasets        size
   c                 C   s   t | dr|  } | S )Ntoarray)hasattrr1   a r5   I/tmp/pip-unpacked-wheel-zrfo1fqw/sklearn/preprocessing/tests/test_data.pyr1   L   s    
r1   c                 C   s   t | jd S )Nr   )npasarrayshaper3   r5   r5   r6   _check_dim_1axisR   s    r:   c                 C   s:   ||kr| d | |ks6t n| | ||  |ks6t d S )Nr-   )AssertionError)ibatch_start
batch_stopn
chunk_sizen_samples_seenr5   r5   r6   assert_correct_incrV   s    rB   c               
   C   sz   ddg} ddg}t | |D ]Z\}}t||}t|}t }t|dd }tt |j|||d W 5 Q R X qd S )N      r-   sample_weight)ziprngrandnr   pytestraises
ValueErrorfit)Z
n_samplessZn_featuress	n_samples
n_featuresXyscalerZsample_weight_notOKr5   r5   r6   9test_raises_value_error_if_sample_weights_greater_than_1d]   s    
rS   XwrP   rF   rC   rD                   @      ?array_constructorarrayZ
sparse_csrZ
sparse_cscc           
      C   s   | d }t||}t| |} t| jd }t|d}|j| ||d t|jd }t|d}||| dddgddd	gg}	t|j|j t|j	|j	 t|
|	|
|	 d S )
Nr   r   	with_meanrE         ?g      @      @g      @g      @)
startswithr   r7   onesr9   r   rM   r   mean_var_	transform)
rT   rP   rF   rZ   r]   ZywZscaler_wrQ   rR   X_testr5   r5   r6   "test_standard_scaler_sample_weightp   s    



rf   c                  C   s  t tttfD ]} t }|| j| dd}t| tr@t	| } t
| dkrt|j|   t|jtt t|jddtt t|jddtt n\t|j|   t|j|   t|jddtt t|jddd t|jddd |j| jd kst||}t||  qtd} t }|| j| dd}t|jd t|jd t|jddd t|jddd |j| jd kstd S 	NTcopyr-   r   axis        rY   rV   r-   )X_1rowX_1colX_list_1rowr   rM   rd   
isinstancelistr7   r[   r:   r   rb   ravelscale_ra   rO   r   meanZ
zeros_likestdn_samples_seen_r9   r;   inverse_transform)rP   rR   X_scaledX_scaled_backr5   r5   r6   test_standard_scaler_1d   s4    



r{   sparse_constructoradd_sample_weightFTc                 C   s   t jd}d}d}| r$t |}nd }d}t jt jt jfD ]|}||||}|d k	rf||}d}t	|d}	|	j
||d|}
|j|
jkst|	jjt jkst|	jjt jks<tq<d S )Nr   r0   rD   TFr\   rE   )r7   randomRandomStatera   float16float32float64rI   astyper   rM   rd   dtyper;   rb   rt   )r}   r|   rH   rN   rO   rF   r]   r   rP   rR   ry   r5   r5   r6   test_standard_scaler_dtype   s"    
r   rR   r\   with_centeringr   constant      Y@c                 C   s6  t | tr"|r"t| jj d tjd}d}d}|rRt	|j
|dd d}ni }tj||f||d}	||	}
| j|
f||
}t | trt| jt|
jd d	d
 t| jt|
jd  t|drt| |	 n
t||
 t | tr2|s2t|
| jd}t|dr(t| |  n
t|| d S )Nz# does not yet support sample_weightr   d   r-   r.   rC   rE   )r9   
fill_valuer   gHz>Zatolr1   r\   )rq   r   rJ   skip	__class____name__r7   r~   r   dictuniformfullrM   rd   r   r	   rc   zerosr9   rt   ra   r2   r1   r   r]   )rR   r}   r|   r   r   rH   rN   rO   Z
fit_paramsX_arrayrP   ry   Z
X_scaled_2r5   r5   r6   &test_standard_scaler_constant_features   s,    


r   rN   r   i'  averageg|=g    _Bc                 C   s  d\}}t jdd t||d D |d}|jd }t j| |f|d}|| |d | d d d f< || || d d d d f< ||}	tdd	|	}
t t jj	}| | |d  | d |d  |d   }|d |k}t 
|stt|
j| || kstt|
j| d
 |dd d f |dd d f  dk}t|
jt | d t|
jt | d t |d |k|}t|
j| t |
j|  d S )N)i   c                 S   s   g | ]}d | qS )r0   r5   .0r<   r5   r5   r6   
<listcomp>  s     z?test_standard_scaler_near_constant_features.<locals>.<listcomp>r-   r   r   rC   Fr\   rY   r,   )r7   r[   ranger9   emptyr   rM   finfor   epsanyr;   allrc   r	   rt   Zlogical_notlogical_andsqrt)rN   rZ   r   r   Z	scale_minZ	scale_maxscalesrO   rP   r   rR   r   ZboundsZwithin_boundsZrepresentable_diffZcommon_maskr5   r5   r6   +test_standard_scaler_near_constant_features  s&    "
($r   c                  C   s`   ddddg} t | }| |fD ]<}t|}t| d t| d tt|ddd| qd S )NrY         @      @rl   Fr]   with_std)r7   r[   r   r   ru   rv   r   )ZX_listZX_arrrP   ry   r5   r5   r6   test_scale_1dH  s    
r   c               	   C   s  t jdt dt jd} t  tdt t|  W 5 Q R X t	t| t 
d t jdt dt jd} d}tjt|d t| }W 5 Q R X t	|t 
d t jddt jd} t  tdt t| }W 5 Q R X t	|t 
d t jdd	t jd}d
}tjt|d t|}W 5 Q R X t	|t 
d t	|| tjt|d t|dd}W 5 Q R X t	|t 
d t	|| d S )N   gh㈵>r   errorr0   z:standard deviation of the data is probably very close to 0matchg0.++g}Ô%ITz$Dataset may contain too large valuesFr   )r7   r   logr   warningscatch_warningssimplefilterUserWarningr   r   r   rJ   warns)xwarning_messageZx_scaledZx_small_scaledZx_bigZx_big_scaledZx_big_centeredr5   r5   r6   (test_standard_scaler_numerical_stabilityT  s2    


r   c                  C   sr  t jd} d}d}| ||}d|d d df< t }||j|dd}t t |r^t	|j
|kslt	t|jdd|dg  t|jdddddddg ||k	st	||}||k	st	||k	st	t|| t|d	d
d}t t |rt	t|jd	d|dg  t|d	dd}t t |r6t	t|jd	d|dg  t|jd	d|dg  ||k	stt	||j|d
d}t t |rt	t|jdd|dg  t|jdddddddg ||kst	| dd}d|d d df< t }||j|dd}t t |r,t	t|jdd|dg  t|jdddddddg ||k	snt	d S )Nr   rV   rU   rl   Trh   rj   rY   r-   F)rk   r   )r7   r~   r   rI   r   rM   rd   r   isnanr;   rw   r   ru   rv   rx   r   )rH   rO   rN   rP   rR   ry   rz   r5   r5   r6   test_scaler_2d_arrays{  sJ    

r   c               	   C   s   t jd} | ddddgt j}t jdd t |}|	|}W 5 Q R X t 
|t j}t t |s|tt||dd	 d S )
Nr   rV   r0   i@ r-   raise)ZoverrC   decimal)r7   r~   r   r   r   r   Zerrstater   rM   rd   fit_transformr   r   isfiniter;   r   )rH   rP   rR   ry   ZX_scaled_f64r5   r5   r6   test_scaler_float16_overflow  s    r   c               	   C   sX   t dddddg} t| dd}t| t dddddg t|t dddddg d S )Nr   gؗҜ<r-   rC   rD   Trh   )r7   r[   r    r	   )s1s2r5   r5   r6   test_handle_zeros_in_scale  s    r   c               
   C   s  t } | jd }ddd||d fD ]`}t | }t }tt|D ]}|| | }qBt|j|j t|j	|j	 |j
|j
kstt|j|j t|j|j t|j|j td|}t | | }t | | }t|j|j t|j	|j	 |j
|j
kstt|j|j t|j|j t|j|j t | }t }ttt|D ]2\}}|| | }t||j|j|||j
d qNq d S )Nr   r-   rC   2   *   r=   r>   r?   r@   rA   )X_2dr9   r   rM   r   rN   partial_fitr   Z	data_min_Z	data_max_rw   r;   Zdata_range_rt   min_slice	enumeraterB   startstoprP   r?   r@   scaler_batchscaler_incrbatchbatch0r<   r5   r5   r6   test_minmax_scaler_partial_fit  sD    

r   c               
   C   s  t } | jd }ddd||d fD ]d}tdd| }tdd}tt|D ]}|| | }qJt|j|j |j	|j	ks|t
|j|jkst
td|}t | | }|dkrttjttjd|j	 ttjttjd|j n4ttj| | dd	|j	 ttj| | dd	|j t | }t }ttt|D ]2\}}|| | }t||j|j|||jd
 q4t|j	|j	 |j|jks t
q d S )Nr   r-   rC   r   r   Fr   r   rj   r   )r   r9   r   rM   r   rN   r   r   rb   rc   r;   rw   r   r7   r   rO   r   ra   rt   varrv   r   rB   r   r   r   r5   r5   r6    test_standard_scaler_partial_fit   sP    


   
	r   c                  C   s`  t jd} d}d}| jdd|d}| jdd|d}| ||| | }t |}t }|D ]}||d	|}q^d
}	t	|j
|j
|	d t	|j|j|	d t	|j|j|	d d}
d}| dd|
t j| }t|}t|}||fD ]l}tdd|}tdd}|D ]}||}qd
}	|j
d k	s6tt	|j|j|	d t	|j|j|	d qd S )Nr   rC   r   g  4&kg  4&kCr.   g     @@    .Ar-   gư>)Zrtol)r   rD   g@xDFr\   )r7   r~   r   r   rI   r   rM   r   reshaper	   rb   rc   rt   randintr   r   r   
csr_matrix
csc_matrixr;   )rH   rO   rN   offsetsr   rP   r   r   chunkZtolr/   r   X_csrX_cscrR   r5   r5   r6   4test_standard_scaler_partial_fit_numerical_stability3  s8    


r   c                 C   s   t dgdgdgdgg}t|}t|}| rBt|jd } tdddd}||fD ]X}|j	|| d
|}t| |  ||}t| |  t| |  qXd S )	NrY   rl   r   r   FTr]   r   ri   rE   )r7   r[   r   r   r   rH   randr9   r   r   rd   r   r1   rx   )rF   rP   r   r   null_transformX_nullX_origr5   r5   r6   test_partial_fit_sparse_inputa  s    


r   c                 C   s|  t d dd d f }| r(t|jd } t }tt|jd dD ]2\}}|d |d d d f }| }| d krt |}|	|| }n2t j|| d |d  d}|j	|| | | d}|
|}t|| t|| ||}	t||	 t|jd }
ttj}t|
|j|  t|
|j|  | d krP|d |jksvtqBt| d |d  t|jksBtqBd S )Nr   r   r-   rE   )r   rH   r   r9   r   r   r   ri   r   r   rd   r   rx   r7   r   r   floatr   r   rc   rt   rw   r;   sumrJ   approx)rF   rP   r   r<   r   ZX_sofarZchunks_copyZscaled_batchZscaled_incrZright_inputZzeroepsilonr5   r5   r6   .test_standard_scaler_trasform_with_partial_fitu  s@      





r   c                  C   s   t jddddddgddddddgddddddgddddddgddddddgddddddggt jd} t }||  ||  d S )Nr-   r   r   rU   r   )r7   r[   Zint32r   rM   rx   )r   rR   r5   r5   r6   .test_standard_check_array_of_inverse_transform  s    
r   c               	   C   s  t j} t }|| }t|jddd t|jddd ||}t| | tdd}|| }t|jddd t|jddd ||}t| | tdd}|| }t|jddd t|jddd	 ||}t| | td
d}t	t
 ||  W 5 Q R X d S )Nr   rj   r-   r-   rC   feature_rangerC   )      333333?r   r   )rC   r-   )irisdatar   r   r   minmaxrx   rJ   rK   rL   rM   )rP   rR   X_transX_trans_invr5   r5   r6   test_min_max_scaler_iris  s,    











r   c            	      C   s  dddgdddgdddgg} dddgdddgdddgg}t  }|| }dddgdddgdddgg}t|| ||}t| | ||}dddgddd	gddd
gg}t||dd t dd}|| }dddgdddgdddgg}t|| t| }t|| t| dd}t|| d S )Nrl   rY         ?皙皙?rX         r^   gsh|??gS?rC   r   r   r   )r   r   r   rx   rd   r   )	rP   X_newrR   r   ZX_expected_0_1r   X_trans_newZX_expected_0_1_newZX_expected_1_2r5   r5   r6   *test_min_max_scaler_zero_variance_features  s&    








r   c                  C   s>   t j} t| dd}ttj|ddd ttj|ddd d S )Nr-   rj   r   )r   r   r   r   r7   r   r   )rP   r   r5   r5   r6   test_minmax_scale_axis1  s    r   c                  C   s\  t tttfD ]} tdd}|| | }t| tr>t	| } t
| dkr|t|jddtt t|jddtt n$t|jddd t|jddd |j| jd kst||}t||  qtd} t }|| | }| dkst| dkst|j| jd ks"tt  }| }| }t|| ||  t|dd d S rg   )rn   ro   rp   r   rM   rd   rq   rr   r7   r[   r:   r   r   r   rO   r   rw   r9   r;   rx   ra   rs   r   )rP   rR   ry   rz   X_1dr   Zmax_r5   r5   r6   test_min_max_scaler_1d  s4    




 
r  c              	   C   s   t jd}|dd}d|d d df< t|}t|}| rP||jd } t	
t t | W 5 Q R X t	
t t | W 5 Q R X tdddd}||}t|j|j ||}t|j|j tdd	j|| d
}|j|dd}	t t |	r
ttdd	j|| d
}
|
j|dd}t t |jrDttdd	j|| d
}|j|dd}t t |jr~tt|j|
j t|j|
j t|j|
j t|j|
j t|j|j t|j|j t|j|j t|j|j | d kr2t|	jdddddddgd t|	jdddddddg t|d\}}t||	jdd t||	jdd |	|k	srt||k	st||	}||k	st||	k	stt|| |
|}||k	st||k	stt| | |
|  }||k	s t||k	stt| | d S )Nr   rU   rV   rl   r   FTr   r\   rE   rh   rj   {GzQ@ffffffֿ(\rC   rY   )!r7   r~   r   rI   r   r   r   r   r9   rJ   rK   rL   r   rM   r   r   r   rx   rd   r   r   r;   r   rb   rc   rt   rw   ru   rv   r   r   r1   tocsc)rF   rH   rP   r   r   r   r   r   rR   ry   
scaler_csrX_csr_scaled
scaler_cscX_csc_scaledX_csr_scaled_meanZX_csr_scaled_varrz   X_csr_scaled_backX_csc_scaled_backr5   r5   r6   test_scaler_without_centering%  sr    





  


r  r]   r   c                 C   s   t jdddgt jddgddt jgddt jggt jd	}||}t|rV| rVtd
 t| |d}|	| t
|jt dddg d S )Nr   r-   rD   rW   r0   rV   rU   r   r   z3'with_mean=True' cannot be used with sparse matrix.r   rC   )r7   r[   nanr   r   issparserJ   r   r   rM   r   rw   )r]   r   rZ   rP   transformerr5   r5   r6   #test_scaler_n_samples_seen_with_nano  s    ( 

r  c                 C   sn   | j |j   krd ksn t| j|j  kr6d ks<n t| j|j  krTd ksZn t| j|jksjtd S )N)rb   r;   rc   rt   rw   )Zscaler_1Zscaler_2r5   r5   r6   "_check_identity_scalers_attributes  s    r  c                  C   sF  t jdddgdddgdddggt jd} t| }| }td	d	d
}|| }t|}||}t|}||}t	|| t	|| t
||  t|||gdD ]\}	}
t|	|
 q||  || || t|||gdD ]\}	}
t|	|
 q||  || || t|||gdD ]\}	}
t|	|
 q,d S )Nr   r-   rD   rV   rW   r   r0   r   Fr   rC   )r7   r[   r   r   r   r  r   r   r$   r
   r	   	itertoolscombinationsr  r   rM   )X_denser   r   transformer_denseZX_trans_denseZtransformer_csrX_trans_csrZtransformer_cscX_trans_cscZtrans_1Ztrans_2r5   r5   r6   test_scaler_return_identity  sB    (






 


 


 r  c               	   C   s  t jd} | jddd}d|d d df< t|}t|}tdddd}tj	dd	 |
|}W 5 Q R X t|j|j ||}t|j|j tj	dd	$ tdd
|}|j|dd}W 5 Q R X t t |rttj	dd	$ tdd
|}	|	j|dd}
W 5 Q R X t t |
jr,ttj	dd	$ tdd
|}|j|dd}W 5 Q R X t t |jrztt|j|	j t|j|	j t|j|	j t|j|j t|j|j t|j|j t|jdddddddgd t|jdddddddg t|
td\}}t||jdd t||jdd ||k	sNt|
|k	s\t||}||k	stt||k	stt|| |	|
}||k	st||
k	stt| | |	| }||k	st||k	stt| | d S )Nr      )rU   rV   r.   r   FTr   )recordr\   rh   rj   rl   gX9v?gV-?g      5@gl?rC   rY   )r7   r~   r   r   r   r   r   r   r   r   r   r   r   rx   rM   rd   r   r   r;   r   rb   rc   rt   ru   rv   r   r   r   r1   r  )rH   rP   r   r   r   r   r   rR   ry   r  r  r	  r
  r  X_csr_scaled_stdrz   r  r  r5   r5   r6   test_scaler_int  sn    



   


r  c                  C   s   t jd} | dd}d|d d df< t|}t|}| }tdd	| t
|| | }tddd	| t
| |  | }tddd	| t
| |  d S )	Nr   rU   rV   rl   r   Frh   )r]   ri   )r7   r~   r   rI   r   r   r   ri   r   rM   r   r1   )rH   rP   r   r   ZX_copyZ
X_csr_copyZ
X_csc_copyr5   r5   r6   test_scaler_without_copy  s    


r  c               	   C   sp  t jd} | dd}t|}t|}tt	 t
|dd W 5 Q R X tt	 tdd| W 5 Q R X tt	 t
|dd W 5 Q R X tt	 tdd| W 5 Q R X tdd|}tt	 || W 5 Q R X tt	 || W 5 Q R X t||}tt	 || W 5 Q R X t||}tt	 || W 5 Q R X d S )Nr   rU   rV   Tr\   )r7   r~   r   rI   r   r   r   rJ   rK   rL   r   r   rM   rd   rx   )rH   rP   r   r   rR   ZX_transformed_csrZX_transformed_cscr5   r5   r6   +test_scale_sparse_with_mean_raise_exception  s.    

r   c               	   C   s8   t jddddgg} tjtdd t|  W 5 Q R X d S )NrV   rW      r   z,Input contains infinity or a value too larger   )r7   infrJ   rK   rL   r   rP   r5   r5   r6   &test_scale_input_finiteness_validation2  s     r$  c               	   C   sB   t dd} tdd}d}tjt|d ||  W 5 Q R X d S )Nr+   r0   Tr   zCannot center sparse matricesr   )r   r   r   rJ   rK   rL   rM   )X_sparserR   err_msgr5   r5   r6   test_robust_scaler_error_sparse;  s
    
r'  r   with_scalingr   densityc                 C   s~   |rt | rtd t||d}||  |rFt|jtj	sTt
n|jd ksTt
|rlt|jtj	szt
n|jd kszt
d S )Nz(RobustScaler cannot center sparse matrix)r   r(  )r   r  rJ   r   r   rM   rq   center_r7   ndarrayr;   rt   )rP   r   r(  rR   r5   r5   r6   test_robust_scaler_attributesC  s    

r-  c                  C   s   t jdd} d| d d df< t| } tdd}||  |jd t	dksTt
|| }t| d d df  |d d df   d S )Nr0   rV   r   Fr   r-   )r7   r~   rI   r   r   r   rM   rt   rJ   r   r;   rd   r	   r1   )rP   rR   r   r5   r5   r6   "test_robust_scaler_col_zero_sparseX  s    



r.  c                  C   sr   t jd} | dd}d|d d df< t }|||}tt j|ddddg  t|j	ddd d d S )Nr   rU   rV   rl   rj   )
r7   r~   r   rI   r   rM   rd   r   medianrv   )rH   rP   rR   ry   r5   r5   r6   test_robust_scaler_2d_arraysg  s    r0  r*  g?皙?strictly_signedpositivenegativer   c                 C   s   t jdd| d }|dkr,t|j|_n8|dkrFt|j |_n|dkrdtj|jjtjd|_|	 }t
dd	}t
dd	}|| || t|j|j d S )
Nr+   rV   r)  r3  r4  r   r   Fr   )r   r   r  r7   absr   r   r9   r   r1   r   rM   r	   rt   )r*  r2  r%  r  Zscaler_sparseZscaler_denser5   r5   r6   +test_robust_scaler_equivalence_dense_sparset  s    



r6  c                  C   s   t jd} | dd}t dddddgg}td	d
}||}|t	|}||j
 }t| | ||}t||  d S )Nr   rU   rV   r1  rY   rX   rl   r   Fr   )r7   r~   r   rI   r[   r   rM   rd   r   r   rt   r   r1   rx   )rH   rP   Z
single_rowrR   Z	row_transZrow_expectedZrow_scaled_backr5   r5   r6   (test_robust_scaler_transform_one_row_csr  s    



r7  c                  C   sl   t j} t }|| }ttj|ddd ||}t| | tj|ddd}|d |d  }t|d d S )Nr   rj      K   qrk   r-   	r   r   r   r   r   r7   r/  rx   
percentile)rP   rR   r   r   r<  iqrr5   r5   r6   test_robust_scaler_iris  s    


r@  c                  C   sp   t j} tdd}|| }ttj|ddd ||}t| | tj|ddd}|d |d  }t|d d S )N)r0   Z   quantile_ranger   rj   r;  r-   r=  )rP   rR   r   r   r<  Zq_ranger5   r5   r6   !test_robust_scaler_iris_quantiles  s    



rD  c                  C   s   t j} tdd}|| }||}t| | tddd}|| }||}t| | t| }||}||}t|j|j d S )Nr*   n_quantilesnormal)rF  Zoutput_distribution)	r   r   r   r   rx   r   r   r   A)rP   r  r   r   r%  ZX_sparse_tranZX_sparse_tran_invr5   r5   r6   test_quantile_transform_iris  s    









rI  c                  C   s.  t ddddddddddg
dddddd	dd
ddg
ddddddddddg
g} t| } t ddddddddddg
dddddd	dd
ddg
ddddddddddg
g}t|}d}tjt|d td
d|  W 5 Q R X td
d}d}tjt|d || W 5 Q R X ||  d}tjt|d |	| W 5 Q R X t ddddddddddg
ddddddddddg
g}d}tjt|d |
| W 5 Q R X td
d| }tjtdd |	d
 W 5 Q R X tdd}d}tjt|d}||  W 5 Q R X t|dkst|j| jd ks*td S )Nr   r9  r   r:  r   rC   rU   rW   r   r0   @ffffff@ffffff@      #@r1  zmThe number of quantiles cannot be greater than the number of samples used. Got 1000 quantiles and 10 samples.r   )	subsamplerE  z>QuantileTransformer only accepts non-negative sparse matrices.zKX has 2 features, but QuantileTransformer is expecting 3 features as input.z+Expected 2D array, got scalar array insteadzn_quantiles is set to n_samplesr-   )r7   Z	transposer   r   rJ   rK   rL   r   rM   rd   rx   r   r   lenr;   Zn_quantiles_r9   )rP   ZX_negr&  r  Z
X_bad_featZwarn_msgr  r5   r5   r6   #test_quantile_transform_check_error  sT    



.
rQ  c            	      C   sz  t ddgddgddgddgddgg} t| }tddd}d}tjt|d ||  W 5 Q R X t ddgddgddgddgddgg}|	|}t
||j t dddddddddddg}t dddddddddddg}t dd	dddd
d	ddddg}t|||ff}|	|}t ddgddgddgddgddgddgddgddgddgg	}t
||j tddd}t dddddddddg	}t dddddddddg	}t dd	dddd
d	ddg	}t|||ff}|	|}t ddgddgddgddgddgddgddgg}t
||j t
|j||j tddddd}|	|}t
||j t
|j||j d S )Nr   r-   rC   TrV   )ignore_implicit_zerosrF  z['ignore_implicit_zeros' takes effect only with sparse matrix. This parameter has no effect.r   rU   rD   rW   r!  r   rl   r   rY   r,   g      ?)rR  rF  rO  random_state)r7   r[   r   r   r   rJ   r   r   rM   r   r   rH  rx   )	rP   r%  r  r   
X_expectedr   ZX_dataZX_colZX_rowr5   r5   r6   +test_quantile_transform_sparse_ignore_zeros  s`    (
(
   

,   
rU  c               	   C   s   t dddgdddgddd	gd
ddgdddgg} tdd}||  || }t t jdddddj}tt j	|dd| t dddgdddgg}t dddgdddgg}t
||| ||}t
| | d S )Nr   rC   rJ  r9  rU   rK  r   rW   rL  r:  r   rM  r   r0   r1  rV   rE  r-   )num)rD   r-   rj   r,   e      )r7   r[   r   rM   r   ZtilelinspaceTr   sortr   rd   rx   )rP   r  r   rT  re   r   r5   r5   r6   !test_quantile_transform_dense_toy:  s*    *



r\  c            	      C   sR  d} d}t jt j| dfdd}d}g }t|D ]`}t||| d d}|| t dd|t |j	 }t 
t |}|d	k st|| q2tt |t|ksttj| dd
ddd}g }t|D ]b}t||| d d}|| t dd|t |j	 }t 
t |}|dk s&t|| qtt |t|ksNtd S )N@B r+   r-   r   rj   rV   r0   )rS  rF  rO  {Gz?gGz?Zcsc)r*  formatrS  r1  )r7   r[  r~   sampler   r   rM   rY  rs   
quantiles_r   r5  r;   appendrP  uniquer   r   )	rN   rF  rP   ZROUNDZinf_norm_arrrS  r  diffZinf_normr5   r5   r6   #test_quantile_transform_subsamplingZ  s>    

re  c                  C   s<  t dddgdddgdddgdddgdddgdd	dgd
ddgdddgdddgdddgg
} t| } tdd}||  || }tt j|	 ddd tt j
|	 ddd ||}t| 	 |	  tdd| 	 }|| }tt j|	 ddd tt j
|	 ddd ||}t| 	 |	  d S )Nrl   rX   g      9@      @g      I@rJ  rK  g      @g       @g     R@rL  g      $@rM  r   r1  r0   rE  r   rj   rY   )r7   r[   r   r   r   rM   r   r   r   r1   r   rx   rd   )rP   r  r   r   r  r5   r5   r6   "test_quantile_transform_sparse_toy  s6    






rg  c               	   C   s\   t dddddgdddd	d
gdddddgg} t| jddd}t| ddd}t||j d S )Nr   r9  r   r:  r   rC   rU   rW   r   r0   rJ  rK  rL  rM  r1  rV   )rk   rF  r-   )r7   r[   r   rZ  r   )rP   Z
X_trans_a0Z
X_trans_a1r5   r5   r6   test_quantile_transform_axis1  s    .rh  c                  C   s  t ddgddgddgg} t| }tddd| }t||  tddd|}t|j|  t||j t ddgddgddgg}t ddgddgddgg}tdd|}|	|}t|| t j

d}t }|| |	d	gg|	t |ggkst|	d
gg|	t |ggks4t|d	gg|t |jggks^t|d
gg|t |jggkstd S )Nr   r-   rD   rF  rS  r   r1  rE  )r+   r-   r0   )r7   r[   r   r   r   r   r   rH  rM   rd   r~   r   r;   r   rx   Zreferences_)r  r%  r   Z
X_trans_sprP   ZX1r  r5   r5   r6   test_quantile_transform_bounds  s2    




((rk  c               	   C   sp   t j} tdgtd gdgdgdgdgdgg}| |fD ]2}tddd	}||}||}t||d
d q8d S )Nrl   r0   r^   rC   rD   rU   r+   r   ri  	   r   )	r   r   r7   r[   r!   r   r   rx   r   )X_1X_2rP   r  r   r   r5   r5   r6   #test_quantile_transform_and_inverse  s    *

ro  c                  C   s   t t jdddgt jt jddgt jdddgg} tddd}||  t |jd d df  sftt |jd d dd f 	 rtd S )Nr   r-   r   r0   r   ri  )
r7   r[   r  r   r   r   ra  r   r;   r   )rP   r  r5   r5   r6   test_quantile_transform_nan  s
    0
 rp  
array_typer   c                 C   s   t dddddddddddddddddd	gd
 }d|dd }t|| }d}t|d|}|jd d df }t|dkstt	t 
|dkstd S )Nr   r-   rC   rD   rU   rV   rl  r   r!  r0   r1  r,   r   rE  )r7   r[   r   r   r   rM   ra  rP  r;   r   rd  )rq  rP   rF  ZqtZ	quantilesr5   r5   r6   *test_quantile_transformer_sorted_quantiles  s    2
rr  c               
   C   s>   dD ]4} t | d}tjtdd |tj W 5 Q R X qd S )N))r,   rA  )rN  )r0   rW  )g      Y@rW  )rA  r   rB  zInvalid quantile range: \(r   )r   rJ   rK   rL   rM   r   r   )Zrange_rR   r5   r5   r6    test_robust_scaler_invalid_range  s    
rt  c               	   C   sT  t jd} | dd}d|d d df< t|}t|dd}t t |rRt	t|dd}t t |j
rtt	t| dd}t||  tt t|ddd	 W 5 Q R X t|jdd
dddddgd t|jdd
dddddg ||k	st	t|d\}}t||jdd
 t||jdd
 t|dddd}t| |  d S )Nr   rU   rV   rl   r   Fr\   r-   )r]   rk   rj   r  r  r  r  rC   rY   Tr   )r7   r~   r   rI   r   r   r   r   r   r;   r   r  r   r1   rJ   rK   rL   ru   rv   r   )rH   rP   r   ry   r  r
  r  r  r5   r5   r6   %test_scale_function_without_centering  s0    

  ru  c                  C   sT   t j} t| dd}ttj|ddd tj|ddd}|d |d  }t|d d S )Nr-   rj   r   r8  r;  r   r   r   r   r7   r/  r>  rP   r   r<  r?  r5   r5   r6   test_robust_scale_axis18  s    rx  c                  C   sV   t jd d df } t| }tt|d tj|dd}|d |d  }t|d d S )Nr-   r   r8  )r<  rv  rw  r5   r5   r6   test_robust_scale_1d_arrayA  s    ry  c                  C   s   dddgdddgdddgg} t  }|| }dddgdddgdddgg}t|| ||}t| | dddgdddgdddgg}||}dddgddd	gddd
gg}t||dd d S )Nrl   rY   r   r   r   r   rX   r^   g_gіs)?rD   r   )r   r   r   rx   rd   )rP   rR   r   rT  r   r   r   X_expected_newr5   r5   r6   )test_robust_scaler_zero_variance_featuresJ  s    




r{  c                  C   s   t jd} | dd}t |t dd t dd g}d}t|dd	|}||}|j	t
jd
ddkstt|jt
jdddkst| t
jdddkstd S )Nr   r]  r-   )r   r-   r   i)r-   c   T)rC  Zunit_variancer   MbP?)r5  r^  )r7   r~   r   rI   vstackra   r   rM   rd   r+  rJ   r   r;   rt   rv   )rH   rP   ZX_with_outliersrC  Zrobust_scalerr   r5   r5   r6    test_robust_scaler_unit_variancec  s    $
r  c                  C   sl  dddgdddgdddgdddgg} t  }|| }dddgdddgdddgdddgg}t|| ||}t| | dddgd	ddgdddgg}||}dddgd	ddgdddgg}t||d
d t| }t|| t| }t| }	||}
||	}dddgdddgdddgdddgg}t|
j	| t|j	| ||
}||}t| |j	 t| |j	 d S )Nrl   rY   r   333333ӿr^   gUUUUUU?gɿrX   r   rC   r   )
r   r   r   rx   rd   r   r   r   r   rH  )rP   rR   r   rT  r   r   r   rz  r   r   r  r  ZX_trans_csr_invZX_trans_csc_invr5   r5   r6   )test_maxabs_scaler_zero_variance_featuresu  s@    $











r  c                  C   sv   ddddgddddgddddgddddgg} t  }|| }ddd	dgddd
dgddddgddddgg}t|| d S )Nrl   rY   r   r   r  r   g      Yg       g{Gzt?g~jthg      п)r   r   r   )rP   rR   r   rT  r5   r5   r6   'test_maxabs_scaler_large_negative_value  s    








r  c                  C   sp   t dddgg} t }|| }|| }t dddgg}t| |  ||}t|  |  d S )Nr   rY   )r   r   r   rM   rd   r   r1   rx   )rP   rR   r   rT  rz   r5   r5   r6   (test_maxabs_scaler_transform_one_row_csr  s    


r  c                  C   s(  t tttfD ]} tdd}|| | }t| tr>t	| } t
| dkrjtt|jddtt ntt|jddd |j| jd kst||}t||  qtd} t }|| | }tt|jddd |j| jd kstt  }t| }t|| t|dd d S )NTrh   r-   r   rj   rY   rm   )rn   ro   rp   r   rM   rd   rq   rr   r7   r[   r:   r   r5  r   ra   rO   rw   r9   r;   rx   rs   r   )rP   rR   ry   rz   r   Zmax_absr5   r5   r6   test_maxabs_scaler_1d  s&    


 

r  c               
   C   s  t d dd d f } | jd }ddd||d fD ]}t | }t }t }t }t||D ]B}|| | }t| | }||}t| | }	||	}q^t	|j
|j
 t	|j
|j
 t	|j
|j
 |j|jkst|j|jkst|j|jkstt	|j|j t	|j|j t	|j|j t	|| ||  td|}
t | |
 }t | |
 }t	|j
|j
 |j|jkstt	|j|j t	|| ||  t | }t }tt||D ]2\}}|| | }t||j|j|||jd qq0d S )Nr   r   r-   rC   r   r   r   )r   r9   r   rM   r   r   r   r   r   r   Zmax_abs_rw   r;   rt   rd   r   r   rB   r   r   )rP   r?   r@   r   r   Zscaler_incr_csrZscaler_incr_cscr   r   r   r   r<   r5   r5   r6   test_maxabs_scaler_partial_fit  sT    


r  c                  C   s  t jd} | dd}t|}d|dd d f< |jd }|jd }d|j||< t|}|||fD ]}tddd}|	|}||k	st
t|}	tdd	d}|	|}||kst
t|}
|	|
fD ]@}t |jd
d}tdD ]}t|| d qt|d d qqhtjtjtjfD ]~}||}tdd	d	| }}||k	sLt
t|tjs^t
t|}tdD ]}t|| d qntt|d d qd S )Nr   rU   rV   rl   rD   l1Tnormri   Fr-   rj   rY   l2)r7   r~   r   rI   r   r   indptrr   r   rd   r;   r1   r5  r   r   r   
coo_matrixr   
lil_matrixrq   lar  )rH   r  X_sparse_unprunedindptr_3indptr_4X_sparse_prunedrP   
normalizerX_normX_norm1X_norm2row_sumsr<   initr5   r5   r6   test_normalizer_l1  s>    





r  c                  C   s  t jd} | dd}t|}d|dd d f< |jd }|jd }d|j||< t|}|||fD ]}tddd}|	|}||k	st
t|}tdd	d}|	|}	|	|kst
t|	}	||	fD ]:}
tdD ]}tt|
| d
 qtt|
d d qqhtjtjtjfD ]}||}tdd	d	| }
}|
|k	sFt
t|
tjsXt
t|
}
tdD ]}tt|
| d
 qhtt|
d d qd S )Nr   rU   rV   rl   rD   r  Tr  FrY   )r7   r~   r   rI   r   r   r  r   r   rd   r;   r1   r   r   r  r  r  r   r  rq   )rH   r  r  r  r  r  rP   r  r  r  r  r<   r  r5   r5   r6   test_normalizer_l2M  s<    





r  c                  C   s  t jd} | dd}t|}d|dd d f< |jd }|jd }d|j||< t|}|||fD ]}tddd}|	|}||k	st
t|}tdd	d}|	|}	|	|kst
t|	}	||	fD ]>}
t|
jd
d}tdD ]}t|| d qt|d d qqhtjtjtjfD ]~}||}tdd	d	| }
}|
|k	sJt
t|
tjs\t
t|
}
tdD ]}t|| d qltt|
d d qd S )Nr   rU   rV   rl   rD   r   Tr  Fr-   rj   rY   r  )r7   r~   r   rI   r   r   r  r   r   rd   r;   r1   r5  r   r   r   r  r   r  rq   r  r  )rH   r  r  r  r  r  rP   r  r  r  r  Zrow_maxsr<   r  r5   r5   r6   test_normalizer_max}  s>    





r  c                  C   s   t jd} | dd}d|dd d f< |dt|dd d f  f  d9  < t | }t|}|||fD ]F}tdd	}|	|}||k	st
t|}tt |t t| qpd S )
Nr   rU   rV   rl   rD   rC   r,   r   r  )r7   r~   r   rI   r5  Zargmaxr   r   r   rd   r;   r1   r   sign)rH   r  Z	X_all_negZX_all_neg_sparserP   r  r  r5   r5   r6   test_normalizer_max_sign  s    (


r  c               
   C   s>  t jddd} tt| ddt| jdddj tt	 tdggdd W 5 Q R X tt	 tdggd	d
 W 5 Q R X t jd}|dd}t
|}t d}||fD ]} t jt jfD ]v}dD ]l}| |} t| |d
}|j|kstt|}|dkrt |jdd}n|d }	|	jdd}t|| qqqt dddgdddgdddgg}dD ]r}t||dd\}
}|dkrt|t dddg n8|dkrt|t dddg nt|t dddg qdt
|}dD ]*}tt t||dd W 5 Q R X qt|ddd\}
}t|t dddg d S )N%   rD   rC   Frh   r   )rk   ri   rj   l3r  r0   rV   )r  r  r  r-   r   rf  rY   rl   rX   )r  r  r   T)r  Zreturn_norm      @r   r  g1C+@r   )r7   r~   r   rI   r   r   rZ  rJ   rK   rL   r   r   ra   r   r   r   r   r;   r1   r5  r   r   r[   NotImplementedError)rP   rsr  r%  ra   r   r  r  r  ZX_norm_squared_Znormsr5   r5   r6   test_normalize  sH     



"


r  c               	   C   s  t dddgdddgg} t jttjtjfD ]}||  }tddd	}t|	|}t 
|dkd
ksntt 
|dkdkst|	|}t|t|ksttdd|}t|	|}||k	stt 
|dkdkstt 
|dkd
ksttdd}|	|}||k	stt|}t 
|dkdks>tt 
|dkd
ksVttdd}|	|}|tk	r||ksttdd}t jdddgdddggt jd}|	|}|tk	r||kstt|}t 
|dkdkstt 
|dkd
ks,tq,tddd	}t jtfD ]Z}||  }t|	|}t 
|dkdksRtt 
|dkdksjt|	|}qtt |	t| W 5 Q R X d S )Nr-   r   rV   rC   rD   r,   rX   T)	thresholdri   rU   rh   Fr   r   )r7   r[   rr   r   r   r   ri   r   r1   rd   r   r;   r  rM   r   rJ   rK   rL   )ZX_r  rP   Z	binarizerZX_binZX_floatr5   r5   r6   test_binarizer  sR    






 

r  c                  C   s.  t jd} | d}tdd}|| ||}t ||j}t	 }t ||j}|
|}t|| | d}t ||j}	||}
t |
|j}||	}t|| t ||jd  }|||  ||  || |  }t|| t |	|jd  }|	||  |	|  || |  }t|| d S )Nr   rV   rU   Fr   )rC   rU   )r7   r~   r   random_sampler   rM   rd   dotrZ  r   r   r   	ones_liker9   r	   )rH   ZX_fitrR   ZX_fit_centeredZK_fitcentererZK_fit_centeredZK_fit_centered2ZX_predZK_predZX_pred_centeredZK_pred_centeredZK_pred_centered2ones_MZK_fit_centered3ones_prime_MZK_pred_centered3r5   r5   r6   test_center_kernel+  s.    









	 
r  c                  C   s@  t jd} | dd| dd }}dd }||}||}tdd}||}||}||j }	||j }
||j }||j }t }|	|	 t
||	| t
||
| t |	|	jd  }|	||	  |	|  ||	 |  }t
||	| t |
|	jd  }|
||	  |
|  ||	 |  }t
||
| d	S )
z-Check kernel centering for non-linear kernel.r   r   r   r  c                 S   s(   t t j| dddt j| ddd gS )zOur mapping function phi.r   N)Za_minZa_max)r7   r~  clipr#  r5   r5   r6   phi\  s
    z2test_kernelcenterer_non_linear_kernel.<locals>.phiFr   N)r7   r~   r   rI   r   r   rd   rZ  r   rM   r	   r  r9   )rH   rP   re   r  Zphi_XZ
phi_X_testrR   Zphi_X_centerZphi_X_test_centerKZK_testZK_centerZK_test_centerkernel_centererr  Z
K_centeredr  ZK_test_centeredr5   r5   r6   %test_kernelcenterer_non_linear_kernelW  s.    	







	 r  c                  C   s   t dddgdddgdddgdddgg} t d}| | j}t }td|fdt fg}| d slt	t
|||dd	}t|| d S )
NrD   r   r-   )rU   r  ZsvrZpairwiserC   )Zcv)r7   r[   ra   r  rZ  r   r%   r'   Z	_get_tagsr;   r&   r   )rP   Zy_truer  ZkcentZpipelineZy_predr5   r5   r6   test_cv_pipeline_precomputed  s    *
r  c                  C   sT   t jd} | d}t t t fD ](}|||}|	|}t
|| q&d S )Nr   r  )r7   r~   r   r  r   r   r   rM   rd   r   r   )rH   rP   objX_transformedZX_transformed2r5   r5   r6   test_fit_transform  s    

r  c                  C   sD   ddgddgddgg} t | } t| dddgdddgdddgg d S Nr-   r   )r   r   r#  r5   r5   r6   test_add_dummy_feature  s    r  c                  C   s`   t ddgddgddgg} t| } t | s6t| t|  dddgdddgdddgg d S r  )r   r  r   Zisspmatrix_coor;   r   r1   r#  r5   r5   r6   test_add_dummy_feature_coo  s    r  c                  C   s`   t ddgddgddgg} t| } t | s6t| t|  dddgdddgdddgg d S r  )r   r   r   Zisspmatrix_cscr;   r   r1   r#  r5   r5   r6   test_add_dummy_feature_csc  s    r  c                  C   s`   t ddgddgddgg} t| } t | s6t| t|  dddgdddgdddgg d S r  )r   r   r   Zisspmatrix_csrr;   r   r1   r#  r5   r5   r6   test_add_dummy_feature_csr  s    r  c                  C   sR   t j} | d d d df }tdddt t g}|D ]}||  || q4d S )NrC   Fr   )r   r   r   r   r   r   )rP   r   ZscalersrR   r5   r5   r6   test_fit_cold_start  s    

r  c               	   C   sZ   t dddddgdddd	d
gdddddgg} tjtdd t| jdd W 5 Q R X d S )Nr   r9  r   r:  r   rC   rU   rW   r   r0   rJ  rK  rL  rM  r1  z1axis should be either equal to 0 or 1. Got axis=2r   rj   )r7   r[   rJ   rK   rL   r   rZ  r#  r5   r5   r6   "test_quantile_transform_valid_axis  s    . r  methodbox-coxyeo-johnsonc              	   C   sX   t | d}tt}tt || W 5 Q R X tt || W 5 Q R X d S )Nr  )	r   r7   r5  ro   rJ   rK   r#   rd   rx   )r  ptrP   r5   r5   r6    test_power_transformer_notfitted  s    

r  standardizec                 C   s@   | dkrt |n|}t| |d}||}t||| d S )Nr  r  r  )r7   r5  r   r   r   rx   )r  r  rP   r  r   r5   r5   r6   test_power_transformer_inverse  s    
r  c                  C   s   t t} dD ]}td|d}|| }t| d|d}t|  \}}|rTt	|}t
|dd| t
|dd| t
| || t
||jd  t|j| jd kstt|jt jstqd S )NTFr  r  r,   r-   r   )r7   r5  ro   r   r   r   r   boxcoxflattenr   r   r   rx   lambdas_rP  r9   r;   rq   r,  )rP   r  r  r   X_trans_funcrT  Zlambda_expectedr5   r5   r6   test_power_transformer_1d  s    

r  c            
      C   s   t t} dD ]}td|d}|| }t| d|d}||fD ]|}t|jd D ]T}t	| d d |f 
 \}}|r~t|}t|d d |f | t||j|  qP||}	t|	|  q>t|j| jd kstt|jt jstqd S )Nr  r  r  r-   )r7   r5  r   r   r   r   r   r9   r   r  r  r   r   r  rx   r   rP  r;   rq   r,  )
rP   r  r  ZX_trans_classr  r   jrT  lmbdaX_invr5   r5   r6   test_power_transformer_2d	  s     


r  c               	   C   s  t dd} | tt t}d}tjt|d | | W 5 Q R X tjt|d | | W 5 Q R X tjt|d t	|dd W 5 Q R X tjt|d | t
tj W 5 Q R X tjt|d | t
tj W 5 Q R X tjt|d t	t
tjdd W 5 Q R X d S )Nr  r  zstrictly positiver   )r   rM   r7   r5  r   rJ   rK   rL   rd   r   r   r9   )r  ZX_with_negativesZnot_positive_messager5   r5   r6   9test_power_transformer_boxcox_strictly_positive_exception)	  s     
r  c                 C   s   t | dd d S )Nr  r  r   r#  r5   r5   r6   +test_power_transformer_yeojohnson_any_inputE	  s    r  c              	   C   s   t | d}tt}|| d}tjt|d  ||d d ddf  W 5 Q R X tjt|d  |	|d d ddf  W 5 Q R X d S )Nr  zBX has \d+ features, but PowerTransformer is expecting \d+ featuresr   r   r-   )
r   r7   r5  r   rM   rJ   rK   rL   rd   rx   )r  r  rP   Zwrong_shape_messager5   r5   r6   &test_power_transformer_shape_exceptionK	  s    


$r  c                  C   sR   t ddd} ttd d ddf }tdg| _| |}t| || d S )Nr  Fr  r   r-   )	r   r7   r5  r   r[   r  rd   r   rx   r  rP   r   r5   r5   r6   "test_power_transformer_lambda_zero^	  s
    
r  c                  C   sL   t ddd} ttd d ddf }tdg| _| |}t|| d S )Nr  Fr  r   r-   )r   r7   r5  r   r[   r  rd   r   r  r5   r5   r6   !test_power_transformer_lambda_oneh	  s
    
r  zmethod, lmbda)r  r1  )r  r   )r  r1  )r  r   )r  rY   c                 C   s   t jd}d}|jdd|dfd}t| dd}|g|_||}t| dd}||}tdt j	
|| | dd td| dd td| dd d S )	Nr   i N  r-   )locr   r/   Fr  rC   r   )r7   r~   r   rG  r   r  rx   r   r   linalgr  ru   rv   )r  r  rH   rN   rP   r  r  X_inv_transr5   r5   r6   #test_optimization_power_transformerr	  s    

r  c                  C   s^   ddddddddd	d
dddddg} t | dd} tdd| j}t j|dddsZtd S )Ngffffff@g rY   rX   gffffff?g333333@r_   gffffff@g?g@r  r   g"@g      @g      r,   r-   r  r  gzG?r}  r   )r7   r[   r   r   rM   r  Zallcloser;   )rP   r  r5   r5   r6   test_yeo_johnson_darwin_example	  s    "r  c                 C   s   t t}t| d}|| |jd }t |t |t jg}t	|dd}|| |jd }t
||dd ||}tt |t | d S )Nr  r   )rS  rV   r   )r7   r5  ro   r   rM   r  ZconcatenateZ	full_liker  r(   r   rd   r   r   )r  rP   r  Zlmbda_no_nansZ
lmbda_nansr   r5   r5   r6   test_power_transformer_nans	  s    






r  c                 C   sB   t }| dkrt|}t| |d}t||||| d S )Nr  )r  )ro   r7   r5  r   r   rM   rd   r   )r  r  rP   r  r5   r5   r6   $test_power_transformer_fit_transform	  s
    
r  c                 C   s   t }| dkrt|}| }||k	s*tt|| t| |dd}|| t|| ||}||k	slt|	|}t|| ||k	st|
|}||k	std S )Nr  Tr  ri   ro   r7   r5  ri   r;   r   r   rM   rd   r   rx   r  r  rP   Z
X_originalr  r   r  r5   r5   r6    test_power_transformer_copy_True	  s     







r  c                 C   s   t }| dkrt|}| }||k	s*tt|| t| |dd}|| t|| ||}||kslt| dkr~t|}|	|}||kst|
|}||kstd S )Nr  Fr  r  r  r5   r5   r6   !test_power_transformer_copy_False	  s"    







r  rn  皙?)r*  rS  )r0   r-   )r   c                 C   sB   t jdddd}tdd}|||  t|jd s>td S )NrV   r-   r  r)  Fr\   r   )	r   r~   r   rM   r   r7   r   rc   r;   )rn  rm  rR   r5   r5   r6   7test_standard_scaler_sparse_partial_fit_finite_variance	  s    

r  r   )r   r-   )rj  r0   c                 C   s   t j}t| dd|}tj|ddtj|dd }}tj|d d d |dd  d f g}||}t	|| d | d | d | d gg d S )NT)r   r  r   rj   rC   r0   r-   )
r   r   r   rM   r7   r   r   Zr_rd   r	   )r   rP   rR   ZX_minZX_maxre   r  r5   r5   r6   test_minmax_scaler_clip	
  s    (
r  c               	   C   sD   t  t} d}tjt|d | tdddf  W 5 Q R X dS )zCheck that `inverse_transform` from `StandardScaler` raises an error
    with 1D array.
    Non-regression test for:
    https://github.com/scikit-learn/scikit-learn/issues/19518
    z'Expected 2D array, got 1D array insteadr   Nr   )r   rM   r   rJ   rK   rL   rx   )rR   r&  r5   r5   r6   -test_standard_scaler_raise_error_for_1d_input
  s    r  c                  C   s   dt jddddgd dddd	d
dddg t jddd } t }t  tdt |	| }W 5 Q R X t 
t |rt| tdkst| tdkst| dkst| dk stdS )a  Check that significantly non-Gaussian data before transforms correctly.

    For some explored lambdas, the transformed data may be constant and will
    be rejected. Non-regression test for
    https://github.com/scikit-learn/scikit-learn/issues/14959
    r   r   rX   r   rf  rU   rX           r  U   rA  r   r,   r-   r   rl   rY   rN  rC   N)r7   r[   r   r   r   r   r   r   RuntimeWarningr   r   r   r;   ru   rJ   r   rv   r   r   )ZX_non_gaussianr  r   r5   r5   r6   1test_power_transformer_significantly_non_gaussian#
  s     "  
r  Transformerc                 C   s*   |   tj}|tj}t|tj dS )9Check one-to-one transformers give correct feature names.N)rM   r   r   get_feature_names_outfeature_namesr   )r  tr	names_outr5   r5   r6   test_one_to_one_features;
  s    r  c              	   C   s   t d}|jtjtjd}|  |}| }t|tj |tj}t|tj t	
d}t jt|d td}|| W 5 Q R X dS )r  Zpandas)columnsz0input_features is not equal to feature_names_in_r   ZabcdN)rJ   ZimportorskipZ	DataFramer   r   r  rM   r  r   reescaperK   rL   rr   )r  pdZdfr  Znames_out_df_defaultZnames_out_df_valid_inmsginvalid_namesr5   r5   r6   test_one_to_one_features_pandasM
  s    

r  c                  C   sX   t jd} | d}t|}t |}| }|jd }t	|dd t
|D  dS )z.Test that kernel centerer `feature_names_out`.r   )rW   rU   r-   c                 S   s   g | ]}d | qS )Zkernelcentererr5   r   r5   r5   r6   r   w
  s     z:test_kernel_centerer_feature_names_out.<locals>.<listcomp>N)r7   r~   r   r  r"   r   rM   r  r9   r   r   )rH   rP   Z
X_pairwiser  r  Zsamples_out2r5   r5   r6   &test_kernel_centerer_feature_names_outm
  s    

r  )r   r  r  Znumpyr7   Znumpy.linalgr  r  Zscipyr   r   rJ   Zsklearn.utilsr   Zsklearn.utils._testingr   r   r   r   r	   r
   r   r   Zsklearn.utils.sparsefuncsr   Zsklearn.preprocessingr   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   Zsklearn.preprocessing._datar    r!   Zsklearn.metrics.pairwiser"   Zsklearn.exceptionsr#   Zsklearn.baser$   Zsklearn.pipeliner%   Zsklearn.model_selectionr&   Zsklearn.svmr'   r(   Zsklearnr)   Z	load_irisr   r~   r   rH   rO   rN   r   r   r   rI   r   r   rn   ro   tolistrp   ZX_list_1colr1   r:   rB   rS   markZparametrizer[   r  rf   r{   r   r   r   r8   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r  r  r  r  r  r  r  r   r$  r'  r   r-  r.  r0  r6  r7  r@  rD  rI  rQ  rU  r\  re  rg  rh  rk  ro  rp  rr  rt  ru  rx  ry  r{  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r5  r   r9   r  r  r  r  r  r  r  r  r  r  r   r  r  r  r  r  r  r  r5   r5   r5   r6   <module>   s  4"
%  ' 2
&403.

* '
I ,F!	&9= /&%

$		.!810118,6		
		
*


	
	