U
    2dV                     @   s  d dl Zd dlZd dlmZ d dlmZ d dlmZ d dlmZ d dlmZ d dlm	Z	 d dl
mZ d d	lmZ d d
lmZ d dlmZ d dlmZ d dlmZ d dlmZ d dlmZ d dlmZ d dlmZ d dlmZ e Zdd Zdd Zdd Zdd Zedd Z ej!j"dej#ddd dd gd!d"ej#ddd gd!d"ej#d#gd!d"fej#d$d%d&d%d&ge$d"ej#d%d$d&ge$d"ej#d'ge$d"fe#d$d%d&d%d&ge#d%d$d&ge#d'gfgd!d(d)gd*d+d, Z%d-d. Z&ej!"d/d)d(gd0d1 Z'd2d3 Z(ej!j"d4ej#ddd dd gd!d"ej#d$d%d&d%d&ge$d"e#d$d%d&d%d&ggd!d(d)gd*d5d6 Z)d7d8 Z*d9d: Z+d;d< Z,d=d> Z-d?d@ Z.dAdB Z/dCdD Z0dEdF Z1dGdH Z2dIdJ Z3dKdL Z4dMdN Z5dOdP Z6dQdR Z7dSdT Z8dUdV Z9dWdX Z:dYdZ Z;dS )[    N)issparse)
coo_matrix)
csc_matrix)
csr_matrix)
dok_matrix)
lil_matrix)type_of_target)assert_array_equal)ignore_warnings)_to_object_array)LabelBinarizer)MultiLabelBinarizer)LabelEncoder)label_binarize)_inverse_binarize_thresholding)_inverse_binarize_multiclass)datasetsc                 C   s   t | dr|  } | S )Ntoarray)hasattrr   a r   J/tmp/pip-unpacked-wheel-zrfo1fqw/sklearn/preprocessing/tests/test_label.pyr      s    
r   c               
   C   s  ddddg} t dd}tddddggj}|| }t|jdg t|| t|||  t dd}|| }t|s~t	t|jdg t||
  t||
 |  t dd}ddddg} tddddggj}|| }t|jddg t|| tddgddgddgddgg}t|||  dd	d
d	dg} tddddgddddgddddgddddgddddgg}|| }t|jdd
d	dg t|| t|||  d S )NposFsparse_outputr   Tneg   ZspamZhameggs0)r   nparrayTfit_transformr	   classes_inverse_transformr   AssertionErrorr   )inplbexpectedgotZ	to_invertr   r   r   test_label_binarizer%   s:    







"4

r+   c               
   C   s   t  } tdddgdddgdddgg}| dddg}t|| tdddgdddgdddgdddgdddgdddgg}| ddddddg}t|| d S )	Nr   r   bder   cf)r   r    r!   r#   r	   	transform)r(   r)   r*   r   r   r   "test_label_binarizer_unseen_labelsN   s    "
2r2   c               
   C   s   t ddd} tddddg}tddddggj}| |}t|| t| || t ddd} tdddddg}tddddgddddgddddgddddgddddgg}| |}t|| t| || d S )Nr   	neg_label	pos_labelr         )r   r    r!   r"   r#   r	   r%   )r(   r'   r)   r*   r   r   r   'test_label_binarizer_set_label_encoding\   s&    






	

r9   c               	   C   s  t ddddg} t | }dddg}d}tjt|d || W 5 Q R X t }d}tjt|d |g  W 5 Q R X tjt|d |g  W 5 Q R X ddddg}d	}td
dd}tjt|d || W 5 Q R X d}td
d
d}tjt|d || W 5 Q R X d}tdd
dd}tjt|d || W 5 Q R X d}tjt|d* t	t
dd
gd
dggddd
gdd W 5 Q R X g dd
gdgdddgd
gg}d}tjt|d t | W 5 Q R X d}tjt|d, t	t
dd
gd
dggddd
dgdd W 5 Q R X d}tjt|d2 t	t dd
dgd
ddggddd
dgdd W 5 Q R X d}tjt|d$ t t ddgd
dgg W 5 Q R X tjt|d* tt ddgd
dggdd
dgd W 5 Q R X d S )Nr   r7   r8   )r   )r   r7   z@You appear to be using a legacy multi-label data representation.matchz.This LabelBinarizer instance is not fitted yetr   z3neg_label=2 must be strictly less than pos_label=1.r7   r4   z3neg_label=2 must be strictly less than pos_label=2.zqSparse binarization is only supported with non zero pos_label and zero neg_label, got pos_label=2 and neg_label=1Tr5   r6   r   zfoo format is not supportedZfoo)youtput_typeclasses	thresholdr8   z?You appear to be using a legacy multi-label data representationzAThe number of class is not equal to the number of dimension of y.z!output_type='binary', but y.shapebinaryz@Multioutput target data is not supported with label binarizationr@   )r    r!   r   fitpytestraises
ValueErrorr1   r%   r   r   r#   r   )Z	one_classr(   Zmulti_labelerr_msgZinput_labelsZy_seq_of_seqsr   r   r   test_label_binarizer_errorsx   sv    
(rI   zvalues, classes, unknownr7   r   r8   Zint64dtype   r,   r   r/   r-   objectstr)idsc              	   C   s   t  }||  t|j| t|| dddddg t|dddddg|  t  }|| }t|dddddg tjt	dd || W 5 Q R X d S )Nr   r   r7   zunseen labelsr;   )
r   rD   r	   r$   r1   r%   r#   rE   rF   rG   )valuesr@   unknownleretr   r   r   test_label_encoder   s    

rT   c               
   C   s   t  } | ddddddg t| jdddddg t| dddddddgdddddddg t| dddddddgdddddddg tt | ddg W 5 Q R X d S )	Nr   rL      r   r7   r8      )	r   rD   r	   r$   r1   r%   rE   rF   rG   )rR   r   r   r    test_label_encoder_negative_ints   s    , rX   rK   c              	   C   sJ   t  }|tjddg| d d}tjt|d |d W 5 Q R X d S )NZappleZorangerJ   zshould be a 1d arrayr;   )r   rD   r    r!   rE   rF   rG   r1   )rK   rR   msgr   r   r    test_label_encoder_str_bad_shape   s
    rZ   c               	   C   s   t  } tt | g  W 5 Q R X tt | g  W 5 Q R X t  } | dddddg d}tjt|d | dg W 5 Q R X tjt|d | ddd	g W 5 Q R X d
}tjt|d | d W 5 Q R X d S )Nr   r7   r8   rV   z!contains previously unseen labelsr;   r3   z should be a 1d array.+shape \(\) )r   rE   rF   rG   r1   r%   rD   )rR   rY   r   r   r   test_label_encoder_errors  s    r^   rP   c                 C   sH   t  }||  |g }ttg | |g }ttg | d S )N)r   rD   r1   r	   r    r!   r%   )rP   rR   ZtransformedZinverse_transformedr   r   r   test_label_encoder_empty_array  s    



r_   c                  C   s  dd dd dd g} t dddgdddgdddgg}| d  }dD ]}| D ]}t|d}|| }t||kszt|r|jj|jjkst|	 }t
|| t
dd	d
g|j |||kstt|d}|| | }t||kst|r|jj|jjkst|	 }t
|| t
dd	d
g|j |||ksPtqPqFtt2 |tt dddgd	ddgdddgg W 5 Q R X d S )Nc                   S   s
   dddgS Nr:   r   r   r7   r   r   r   r   r   <lambda>/      z9test_sparse_output_multilabel_binarizer.<locals>.<lambda>c                   S   s   ddhdhddhfS Nr7   r8   r   r   r   r   r   r   rc   0  rd   c                   S   s   t t dt dddhgS Nr:   ra   r   r7   iterr   r   r   r   rc   1  rd   r   r   TFr   r7   r8   )r    r!   r   r#   r   r&   indicesrK   Zindptrr   r	   r$   r%   rD   r1   rE   rF   rG   r   )inputsindicator_matinverser   r'   mlbr*   r   r   r   'test_sparse_output_multilabel_binarizer,  s8    "





ro   c                  C   s   dd dd dd g} t dddgdddgdddgg}| d  }| D ]}t }|| }t|| tdddg|j |||kstt }|| 	| }t|| tdddg|j |||ksFtqFd S )	Nc                   S   s
   dddgS r`   r   r   r   r   r   rc   W  rd   z+test_multilabel_binarizer.<locals>.<lambda>c                   S   s   ddhdhddhfS re   r   r   r   r   r   rc   X  rd   c                   S   s   t t dt dddhgS rf   rg   r   r   r   r   rc   Y  rd   r   r   r7   r8   )
r    r!   r   r#   r	   r$   r%   r&   rD   r1   )rk   rl   rm   r'   rn   r*   r   r   r   test_multilabel_binarizerT  s"    "


rp   c                  C   sF   t  } ddgdgg g}tddgddgddgg}t| || d S )Nr   r7   r   )r   r    r!   r	   r#   )rn   r>   Yr   r   r   &test_multilabel_binarizer_empty_samplem  s    rr   c               	   C   s   t  } ddgg}tddgddgg}d}tjt|d" | |ddgddgg}W 5 Q R X tdddgdddgg}t dddgd} tjt|d" | |ddgddgg}W 5 Q R X t|| d S )	Nr   r7   r   zunknown class.* will be ignoredr;   rL   r8   rC   )	r   r    r!   rE   ZwarnsUserWarningrD   r1   r	   )rn   r>   rq   Zwarning_messageZmatrixr   r   r   'test_multilabel_binarizer_unknown_classt  s    
&&rt   c               	   C   sN  dddg} t dddgdddgdddgg}tdddgd}t|| | t|jdddg tdddgd}t|| | | t|jdddg td	dddgd}t|| t dgdgdgg|f t|jd	dddg t	| } tdddgd}t|| | | d
}tddddgd}t
jt|d ||  W 5 Q R X d S )Nr:   ra   rb   r   r   r8   r7   rC   rL   ztThe classes argument contains duplicate classes. Remove these duplicates before passing them to MultiLabelBinarizer.r;   )r    r!   r   r	   r#   r$   rD   r1   Zhstackrh   rE   rF   rG   )r'   rl   rn   rH   r   r   r   'test_multilabel_binarizer_given_classes  s,    
" ru   c                  C   s   dddg} t dddgdddgdddgg}t dddgdddgdddgg}tdddgd}t|| | dddg|_t|| | d S )	Nr:   ra   rb   r   r   r8   r7   rC   )r    r!   r   r	   r#   r@   )r'   rl   Zindicator_mat2rn   r   r   r   (test_multilabel_binarizer_multiple_calls  s    
""rv   c                  C   s   dgdgdgg} t dddgdddgdddgg}t }t|| | t|||  t }t|| | | t|||  d S )Nr   r   r7   )r    r!   r   r	   r#   r%   rD   r1   r'   rl   rn   r   r   r   .test_multilabel_binarizer_same_length_sequence  s    "rx   c               	   C   s<  t dddg} dddgddd	gfd
ddgdddgfdddg| fg}tdddgdddgdddgg}|D ]\}}t }tj|td}t||| t|j| tj||td}t|| t }t|	|
|| t|j| tj||td}t|| qht }tt |i i ddifg W 5 Q R X d S )Nra   r7   r8   )23)1)r}   r{   r}   r{   r|   )r,   r/   r   )r   r,   r   r,   r/   )ry   rz   )ra   )ra   ry   r   r   rJ   )r   r    r!   r   rM   r	   r#   r$   r%   rD   r1   rE   rF   	TypeError)Ztuple_classesrk   rl   r'   r@   rn   Zindicator_mat_invr   r   r   ,test_multilabel_binarizer_non_integer_labels  s*    "
r   c                  C   s0   dg} t ddgg}t }t|| | d S )Nr   r   r   r   r   )r    r!   r   r	   r#   rw   r   r   r   $test_multilabel_binarizer_non_unique  s    r   c               	   C   s   dg} t  }||  tt |tddgg W 5 Q R X |tddgg |tddgg |tddgg tt |tdgg W 5 Q R X tt |tdddgg W 5 Q R X d S )Nr   r   r8   r   )r   r#   rE   rF   rG   r%   r    r!   )r'   rn   r   r   r   ,test_multilabel_binarizer_inverse_validation  s    
 r   c               	   C   s   t ddgddddgd} tddddgddddgg}t| | t ddgddddgd} tddddgddddgg}t| | t ddddgddddgd} tddddgddddgddddgddddgg}t| | d S )Nr   rW   r7   rL   rC   r   r8   )r   r    r!   r	   )outr)   r   r   r   $test_label_binarize_with_class_order  s    

2r   c              
   C   s,  dD ] }|dks|dkrH|rHt t t| ||||d W 5 Q R X qt| ||||d}tt|| t||ksxtt| }|dkrt	||d}nt
||||| d d}tt|t|  t|||d}	|	| }tt|| t||kst|	|}
tt|
t|  t|
t| kstqd S )	Nri   r   r@   r5   r6   r   Z
multiclassrC   g       @)r?   r@   rA   r=   )rE   rF   rG   r   r	   r   r   r&   r   r   r   r   r#   r%   )r>   r@   r6   r5   r)   r   Z	binarizedZy_typeZinversedr(   Zinverse_outputr   r   r   check_binarized_results	  sR    

  

r   c                  C   s   dddg} ddg}d}d}t ddgddgddggd d df d}t| |||| dddg} ddg}d}d}t ddgddgddggd d df d}t| |||| d S )Nr   r   r7   rV   )rV   r   r8   )r    r!   Zreshaper   r>   r@   r6   r5   r)   r   r   r   test_label_binarize_binary<  s    
.
.r   c               	   C   sf   dddg} dddg}d}d}dt d }t| |||| tt t| |d|dd W 5 Q R X d S )Nr   r   r7   r8   rV   Tr   )r    Zeyer   rE   rF   rG   r   r   r   r   r   test_label_binarize_multiclassO  s    

    r   c               	      s   t dddgdddgdddgg dddg} d}d}|  } fddtttttfD } g| D ]}t|| ||| qbt	t
 t|| d|dd W 5 Q R X d S )	Nr   r   r7   c                    s   g | ]}| qS r   r   ).0Zsparse_matrixZy_indr   r   
<listcomp>d  s   z2test_label_binarize_multilabel.<locals>.<listcomp>rV   Tr   )r    r!   r   r   r   r   r   r   rE   rF   rG   r   )r@   r6   r5   r)   Zy_sparser>   r   r   r   test_label_binarize_multilabel^  s.    "

    r   c                	   C   s   t t tddgddgddd W 5 Q R X t jtdd tddgddgd	 W 5 Q R X t jtd
d tddggdddgd	 W 5 Q R X d S )Nr   r7   r   )r@   r6   r5   zcontinuous target data is not r;   g333333?g@rC   zmismatch with the labelsr8   )rE   rF   rG   r   r   r   r   r   !test_invalid_input_label_binarizex  s    "r   c                  C   sF   t tdddgdddgdddggtd} t| tdddg d S )Nr   r   rV   r8   )r   r   r    Zaranger	   r!   )r*   r   r   r    test_inverse_binarize_multiclass  s
     r   c                  C   s8   t  } | dddtjg | tjg}t|dg dS )z]Check that label encoder encodes nans in transform.

    Non-regression test for #22628.
    r   r,   r7   N)r   rD   r    nanr1   r	   )rR   Zy_transr   r   r   test_nan_label_encoder  s    r   )<Znumpyr    rE   Zscipy.sparser   r   r   r   r   r   Zsklearn.utils.multiclassr   Zsklearn.utils._testingr	   r
   Zsklearn.utilsr   Zsklearn.preprocessing._labelr   r   r   r   r   r   Zsklearnr   Z	load_irisZirisr   r+   r2   r9   rI   markZparametrizer!   rM   rT   rX   rZ   r^   r_   ro   rp   rr   rt   ru   rv   rx   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   <module>   s   )
N


	
(#3	