U
    2d46                     @   s  d dl Z d dlZd dlmZ d dlZd dlmZ d dl	m
Z
 d dl	mZ d dlmZmZmZ dddd	gd	d
ddgd dddgddd	dggZe jddd d d d gdddd gddddgddddggfdd d d d gd d d d gddddgddddggfdd d d d gddddgddddgddddggfgdd Zdd Zdd Ze jddd d d d gd ddd gddddgddddggfdd d d d gd d d d gddddgddddggfdd d d d gd dddgddddgddddggfgdd Ze jddddgdd  Zd!d" Ze jd#edd$d%d& Zd'd( Ze jd)dd d d d ddgd d d d ddgd d ddd*d*gfdd d d d ddgd d ddddgd d ddd+d*gfdd d d dddgd d ddddgd ddd+d*d*gfgd,d- Ze jd.dd/d0d1dgdd2d3dgdd4d/dgdd4d/dggfdd5d6d7d8gd5d6d7d8gd9d:d;d<gd=d>d?d@ggfdd/d0d1dAgdd2d3dBgdd4d/dCgdd4d/dCggfge jdDdEdFdGgdHdI Ze jddddgdJdK ZdLdM Ze jdNdd dd+gfdd dd+gfgdOdP Z dQdR Z!e jdSej"ej#ej$ge jdTdej#ej$ge jdDdEdFdGgdUdV Z%e jdWej"ej#ej$ge jdDdEdFdGgdXdY Z&e j'dZe jd[dd\gd]d^ Z(d_d` Z)dadb Z*dcdd Z+e jdedFdfdg ed+D fdGdhdg ed+D fdEdidg ed+D fgdjdk Z,dS )l    N)clone)KBinsDiscretizer)OneHotEncoder)assert_array_almost_equalassert_array_equalassert_allclose_dense_sparse      ?      @g      g      @      ?   g      @   zstrategy, expecteduniformZkmeansquantilec                 C   s,   t dd| d}|t t||t d S )N   ordinaln_binsencodestrategy)r   fitXr   	transform)r   expectedest r   S/tmp/pip-unpacked-wheel-zrfo1fqw/sklearn/preprocessing/tests/test_discretization.pytest_fit_transform   s    	
r    c                   C   sR   t ddt t tdgd dt t ddtjjttksNt	d S )Nr   r   r   )
r   fit_transformr   nparrayr   n_bins_dtypeintAssertionErrorr   r   r   r   test_valid_n_bins    s    r)   c               	   C   s   t dd} t| d}d}tjt|d |t W 5 Q R X dddg} t| d}d}tjt|d |t W 5 Q R X ddddg} t| d}d}tjt|d |t W 5 Q R X d	dd	dg} t| d}d
}tjt|d |t W 5 Q R X d S )N)r             @r!   z:n_bins must be a scalar or array of shape \(n_features,\).matchr   r   z{KBinsDiscretizer received an invalid number of bins at indices 0, 3. Number of bins must be at least 2, and must be an int.g @z{KBinsDiscretizer received an invalid number of bins at indices 0, 2. Number of bins must be at least 2, and must be an int.)r#   fullr   pytestraises
ValueErrorr"   r   )r   r   err_msgr   r   r   test_invalid_n_bins_array&   s,    




r3   c                 C   s~   t ddddgd| dt}t||t ttjd }|jj|fksNt	t
|j|jD ]\}}|j|d fks\t	q\d S )Nr   r   r   r   r   )r   r   r   r   r   r#   r$   shape
bin_edges_r(   zipr%   )r   r   r   Z
n_features	bin_edgesr   r   r   r   test_fit_transform_n_bins_arrayL   s    	
  r8   r   c              	   C   s   t d tddgddgddgddgg}t| ddd}d	}tjt|d
 || W 5 Q R X |j	d dkstt
||}t|d d df t|jd  d S )Nalwaysr   r   r   r   r   r   )r   r   r   z2Feature 0 is constant and will be replaced with 0.r,   )warningssimplefilterr#   r$   r   r/   warnsUserWarningr   r%   r(   r   r   zerosr4   )r   r   r   warning_messageXtr   r   r   test_same_min_maxa   s    
"
rA   c               	   C   st   t d} tdd}tt ||  W 5 Q R X tdd}|| dd tt ||  W 5 Q R X d S )Nr*   r   r!   r   r   )	r#   aranger   r/   r0   r1   r   reshaper   )r   r   r   r   r   test_transform_1d_behavioro   s    


rD   i	   c                 C   sd   t dddddgdd}t dddddgdd}|d	|   }td
dd|}t|| d S )Nr+         @g      @g       @g      $@r   r   r   
   r   r   r   r   )r#   r$   rC   r   r"   r   )rE   ZX_initZXt_expectedr   r@   r   r   r   test_numeric_stability{   s
    rJ   c                  C   s   t ddddgddt} | t}t ddddgddt} | t}t|rVtttdd dD d	d
	|| t ddddgddt} | t}t|stttdd dD dd
	|
 |
  d S )Nr   r   r   rI   onehot-densec                 S   s   g | ]}t |qS r   r#   rB   .0rE   r   r   r   
<listcomp>   s     z'test_encode_options.<locals>.<listcomp>)r   r   r   r   F)
categoriesZsparse_outputonehotc                 S   s   g | ]}t |qS r   rL   rM   r   r   r   rO      s     T)r   r   r   r   spissparser(   r   r   r"   Ztoarray)r   ZXt_1ZXt_2ZXt_3r   r   r   test_encode_options   s4    

 
 rT   z8strategy, expected_2bins, expected_3bins, expected_5binsr*   r   c                 C   s   t ddddddgdd}td| d	d
}||}t||  td| d	d
}||}t||  td| d	d
}||}t||  d S )Nr   r   r   r   rF   rH   r   r   r   r   r   r      )r#   r$   rC   r   r"   r   Zravel)r   Zexpected_2binsZexpected_3binsZexpected_5binsr   r   r@   r   r   r   test_nonuniform_strategies   s    


rW   zstrategy, expected_invg      r+   g      g      @g      rG   g      g      @g      g      g      g      @g      g      ?g      ?g      @g      g      ?g      g        g      ?r   r   rQ   rK   c                 C   s0   t d| |d}|t}||}t|| d S )Nr   rU   )r   r"   r   inverse_transformr   )r   r   Zexpected_invkbdr@   Xinvr   r   r   test_inverse_transform   s    "

r[   c                 C   s   t ddddgd d d f }td| dd}|| t dd	gd d d f }||}t|jdd
d |j t|jdd
dg d S )Nr   r   r   r   r*   r   rU   r   rV   )Zaxis)	r#   r$   r   r   r   r   maxr%   min)r   r   rY   ZX2ZX2tr   r   r    test_transform_outside_fit_range   s    

r^   c                  C   s   t ddddgd d d f } |  }tddd}|| }t| | | }||}t|| t|t dgdgd	gd	gg d S )
Nr   r   r   r   r   rI   r   r	   r   )r#   r$   copyr   r"   r   rX   )r   ZX_beforer   r@   Z	Xt_beforerZ   r   r   r   test_overwrite   s    



r`   zstrategy, expected_bin_edgesc              	   C   sd   dgdgdgdgdgdgg}t d| d}d}tjt|d || W 5 Q R X t|jd | d S )Nr   r   )r   r   'Consider decreasing the number of bins.r,   )r   r/   r<   r=   r   r   r5   )r   Zexpected_bin_edgesr   rY   r?   r   r   r   test_redundant_bins   s    rb   c               	   C   s   t dddgdd} t ddddddg}t d	d	d
gdd}tdddd}d}tjt|d ||  W 5 Q R X t|j	d	 | t|
| | d S )Ng?gffffff?r   r   gq=
ףp?g=
ףp=?gzG?gp=
ף?r   r*   rH   r   r   r   ra   r,   )r#   r$   rC   r   r/   r<   r=   r   r   r5   r   )r   r7   r@   rY   r?   r   r   r   !test_percentile_numeric_stability
  s    rc   in_dtype	out_dtypec                 C   sr   t jt| d}td||d}|| |d k	r4|}n"|d krP|jt jkrPt j}n|j}||}|j|ksnt	d S N)r&   r   )r   r   r&   )
r#   r$   r   r   r   r&   float16float64r   r(   )rd   re   r   X_inputrY   Zexpected_dtyper@   r   r   r   test_consistent_dtype  s    

rj   input_dtypec                 C   sd   t jt| d}td|t jd}|| ||}td|t jd}|| ||}t|| d S rf   )	r#   r$   r   r   float32r   r   rh   r   )rk   r   ri   Zkbd_32ZXt_32Zkbd_64ZXt_64r   r   r   test_32_equal_64+  s    



rm   z,ignore:In version 1.3 onwards, subsample=2e5	subsamplewarnc                 C   s   t ddddgdd}tdddd	}|| t|}|j| d
 || t|jd |jd D ]\}}t j	
|| qf|jj|jjkstd S )Nr   r	   r
   r   r   rH   r   r   r   rn   r   )r#   r$   rC   r   r   r   
set_paramsr6   r5   ZtestingZassert_allcloser4   r(   )rn   r   kbd_defaultkbd_with_subsamplingZbin_kbd_defaultZbin_kbd_with_subsamplingr   r   r   'test_kbinsdiscretizer_subsample_default@  s    

 rt   c               	   C   sV   t ddddgdd} tdddd	d
}d}tjt|d ||  W 5 Q R X d S )Nr   r	   r
   r   r   rH   r   r   r   )r   r   r   rn   z4`subsample` must be used with `strategy="quantile"`.r,   )r#   r$   rC   r   r/   r0   r1   r   )r   rY   r2   r   r   r   0test_kbinsdiscretizer_subsample_invalid_strategyS  s
    ru   c               	   C   sP   t jdddd} tdddd}d}tjt|d	 ||  W 5 Q R X d S )
NiA r   r   d   r   r   r   >In version 1.3 onwards, subsample=2e5 will be used by default.r,   )	r#   randomrandrC   r   r/   r<   FutureWarningr   )r   rY   msgr   r   r   $test_kbinsdiscretizer_subsample_warn]  s
    r|   c               	   C   s   t jdddd} tdddd}t|}|jtdd	 d
}tj	t
|d ||  W 5 Q R X ||  t |jd |jd krt|jj|jjkstd S )Ni`[ r   r   rH   r   r   r   g     jArp   rw   r,   r   )r#   rx   ry   rC   r   r   rq   r'   r/   r<   rz   r   allr5   r(   r4   )r   rr   rs   r{   r   r   r   &test_kbinsdiscretizer_subsample_valuesg  s    
r~   zencode, expected_namesc                 C   s.   g | ]&}t d D ]}d| dt| qqS r*   feat_rangefloatrN   col_idZbin_idr   r   r   rO   |  s   
 rO   c                 C   s.   g | ]&}t d D ]}d| dt| qqS r   r   r   r   r   r   rO     s   
 c                 C   s   g | ]}d | qS r   r   )rN   r   r   r   r   rO     s     c                 C   s   dddgdddgdddgdd	dgg}t d	| d
|}||}dd tdD }||}|jd |jd ksttt|| dS )z[Check get_feature_names_out for different settings.
    Non-regression test for #22731
    r   r   r
   r   r   r   r   r   r*   rI   c                 S   s   g | ]}d | qS r   r   rM   r   r   r   rO     s     z>test_kbinsdiscrtizer_get_feature_names_out.<locals>.<listcomp>N)r   r   r   r   Zget_feature_names_outr4   r(   r   )r   Zexpected_namesr   rY   r@   Zinput_featuresZoutput_namesr   r   r   *test_kbinsdiscrtizer_get_feature_names_outw  s    $

r   )-r/   Znumpyr#   Zscipy.sparsesparserR   r:   Zsklearnr   Zsklearn.preprocessingr   r   Zsklearn.utils._testingr   r   r   r   markZparametrizer    r)   r3   r8   rA   rD   r   rJ   rT   rW   r[   r^   r`   rb   rc   rg   rl   rh   rj   rm   filterwarningsrt   ru   r|   r~   r   r   r   r   r   <module>   s   ,...
&...



...














 
 
	


	