U
    3d                     @   s  d dl Z d dlZd dlZd dlZd dlZd dlmZ d dl	m
Z
mZ d dlmZ d dlmZ d dlmZ d dlmZ d dlmZ d d	lmZ d d
lmZmZ d dlmZmZ d dlmZ eeeegZeeg ZdZejde dZe ddgddgddgddgddgddggZ!e ddddddgZ"ej#$d Z%e%j&ddZ'e%j&ddd k(e)Z*e%j+dddZ,e ddddddgZ-dd Z.dd Z/dd Z0d d! Z1d"d# Z2d$d% Z3d&d' Z4d(d) Z5d*d+ Z6d,d- Z7d.d/ Z8d0d1 Z9ej:d2ed3d4 Z;ej:d2ed5d6 Z<ej:d7ed8d9 Z=d:d; Z>ej:d2ed<d= Z?ej:d2ed>d? Z@ej:d2ed@dA ZAej:d2edBdC ZBej:d2eej:dDdEdFgej:dGdEdFgdHdI ZCej:dJdKdLdM ZDdNdO ZEdPdQ ZFdRdS ZGdTdU ZHdVdW ZIej:dXde dd d gddd gge ddd gddd gge d dgge ddgfddYge dd d gddd gge ddd d gddd d gge d dgge ddYgfde dd gddgge ddgddgge d dgge ddgggdZd[ ZJej:d\ddgddYggd]fgd^d_ ZKd`da ZLdbdc ZMddde ZNej:dfeej:dgddhdigdjgdkdl ZOdmdn ZPej:dfedodp ZQdS )q    N)	logsumexp)load_digits	load_iris)train_test_split)cross_val_score)assert_almost_equal)assert_array_equal)assert_array_almost_equal)assert_allclose)
GaussianNBBernoulliNB)MultinomialNBComplementNB)CategoricalNBz/The default value for `force_alpha` will changezignore:z:FutureWarning      )
      )sizer      )   d   r   c               	   C   s|   t  } | ttt}t|t | t}| t}tt	
||d tjtdd t  jttddgd W 5 Q R X d S )N   z;The target label.* in y do not exist in the initial classesmatchr   r   classes)r   fitXypredictr   predict_probapredict_log_probar	   nplogpytestraises
ValueErrorpartial_fit)clfy_predy_pred_probay_pred_log_proba r/   B/tmp/pip-unpacked-wheel-zrfo1fqw/sklearn/tests/test_naive_bayes.pytest_gnb-   s    


 r1   c                  C   sL   t  tt} ttddgd | jd t  tt	} t| j
 d d S )Nr         @r   r   )r   r   r    r!   r	   r%   arrayclass_prior_X1y1sumr+   r/   r/   r0   test_gnb_priorC   s    r9   c                  C   sB  t d} t tt}t tt| }t|j|j t|j|j t	
tjd } t jtt| d}t jttddg| d d}|jtt| d d t|j|j t|j|j t	dtjd d}t j|tjd d}t t| t| }t tt|}t|j|j t|j|j tdkt j}t jtt|d}d	S )
z5Test whether sample weights are properly used in GNB.r   r   sample_weightr   r   r   r;      )Z	minlengthN)r%   Zonesr   r   r    r!   r	   theta_var_rngZrandshaper*   randintZbincountastypefloat64)swr+   Zclf_swclf1clf2indr;   Zclf_duplr/   r/   r0   test_gnb_sample_weightL   s&    
rI   c               	   C   sB   t tddgd} d}tjt|d | tt W 5 Q R X dS )z:Test whether an error is raised in case of negative priorsg             @priorszPriors must be non-negativer   N	r   r%   r3   r'   r(   r)   r   r    r!   r+   msgr/   r/   r0   test_gnb_neg_priorsq   s    rP   c                  C   sZ   t tddgdtt} t| ddggtddggd t| jtddg dS )	z6Test whether the class prior override is properly used333333?gffffff?rK   皙g[9h?gs\?r   N)	r   r%   r3   r   r    r!   r	   r#   r4   r8   r/   r/   r0   test_gnb_priorsz   s    rS   c                  C   s   t ddgddgddgddgddgddgddgddgd	d	gd
d
gg
} t ddddddddddg
}t dddd	d
dddddg
}t|d}|| | d S )Nr   r   r   r   r      r   g{Gz?gQ?gQ?g{Gz?g)\(?gQ?        r      r   	   r   rK   )r%   r3   r   r   )r    rL   Yr+   r/   r/   r0   test_gnb_priors_sum_isclose   s"    
r\   c               	   C   sF   t tddddgd} d}tjt|d | tt W 5 Q R X dS )z`Test whether an error is raised if the number of prior is different
    from the number of class      ?rK   -Number of priors must match number of classesr   NrM   rN   r/   r/   r0   test_gnb_wrong_nb_priors   s    r_   c               	   C   sB   t tddgd} d}tjt|d | tt W 5 Q R X dS )z?Test if an error is raised if the sum of prior greater than onerJ         ?rK   z!The sum of the priors should be 1r   NrM   rN   r/   r/   r0   test_gnb_prior_greater_one   s    ra   c                  C   sD   t tddgd} | tt | ddggtdgks@tdS )z@Test if good prediction when class prior favor largely one classg{Gz?gGz?rK   rR   r   N)r   r%   r3   r   r    r!   r"   AssertionErrorr8   r/   r/   r0   test_gnb_prior_large_bias   s    rc   c                  C   sP   d} d}d}t dtjd f}t| |||\}}||ks@t||ksLtdS )z4Test when the partial fit is called without any datar   rX   r`   r   r   N)r%   emptyr    rA   r   Z_update_mean_variancerb   )Zprev_pointsmeanvarZx_emptyZtmeantvarr/   r/   r0   "test_gnb_check_update_with_no_data   s    rh   c                  C   s   t  tt} t  tttt}t| j|j t| j	|j	 t| j
|j
 t  tdd dd d f tdd d tt}|tdd d tdd d  t| j|j t| j	|j	 t| j
|j
 d S )Nr   r   r   )r   r   r    r!   r*   r%   uniquer	   r>   r?   r4   )r+   Zclf_pfZclf_pf2r/   r/   r0   test_gnb_partial_fit   s    2 rj   c                     sP   t  } | j| j   fdddD }t|d |d  t|d |d  d S )Nc                    s(   g | ] }t  |  |  qS r/   )r   r   r"   ).0fr    r!   r/   r0   
<listcomp>   s     z9test_gnb_naive_bayes_scale_invariance.<locals>.<listcomp>)绽|=r   g    _Br   r   r   )r   datatargetr   )irislabelsr/   rm   r0   %test_gnb_naive_bayes_scale_invariance   s
    rt   DiscreteNaiveBayesc                 C   s6   |   tt}tttdddgd |jd d S )Nr   r2   r   )r   X2y2r	   r%   r&   r3   class_log_prior_)ru   r+   r/   r/   r0   test_discretenb_prior   s      ry   c                 C   s  |  }| ddgddgddggdddg |  }|jddgddgddggdddgddgd t|j|j | tkrtt|jD ]}t|j| |j|  qnt|j|j |  }|jddggdgddgd |ddggdg |ddggdg t|j|j | tkrtt|jD ]J}t|j| j	|j| j	 tt
j|j| ddt
j|j| dd qt|jd d t
ddg t|jd d t
ddg t|jd d t
ddg t|jd d t
ddg nt|j|j d S )Nr   r   r   Zaxisr   )r   r*   r   class_count_r   rangelencategory_count_feature_count_rA   r%   r7   r3   )ru   rF   rG   iclf3r/   r/   r0   test_discretenb_partial_fit   s:    $,

 
 r   
NaiveBayesc              	   C   sx   t jtdd |  tt W 5 Q R X |  }|jttttd t jtdd |jtttdd W 5 Q R X d S )Nz8classes must be passed on the first call to partial_fit.r   r   .is not the same as on last call to partial_fit*   )	r'   r(   r)   r*   rv   rw   r%   ri   Zarange)r   r+   r/   r/   r0   $test_NB_partial_fit_no_first_classes  s      r   c                  C   s  dddgdddgdddgg} ddgddgddgg}dddg}t ttg| |gD ]v\}}| ||}||dd  dks~t||d gjdkstt||d d j	dd	t
d
d
gd qNdddg}t ttg| |gD ]\}}| ||}||dd jdkst||d d jdks4ttt
	||d gd tt
	||d gd tt
	t
|jd qd S )Nr   r   r   r   rW   r   r   )r   r   rz   r`   r   )r   r   )r   r   )zipr   r   r   r"   rb   r#   rA   r	   r7   r%   r3   r   exprx   )ZX_bernoulliZX_multinomialr!   ru   r    r+   r/   r/   r0   test_discretenb_predict_proba%  s4    
   
 r   c                 C   sT   |  }|j dd |dgdgdggdddg t|j}t|tddg d S )NF)	fit_priorr   r         ?)Z
set_paramsr   r%   r   rx   r	   r3   )ru   r+   priorr/   r/   r0   test_discretenb_uniform_priorF  s
    r   c              	   C   s   | ddgd}| dgdgdggdddg t|j}t|tddg d}tjt|d$ | dgdgdggdddg W 5 Q R X d}tjt|d( |j	dgdggddgdddgd	 W 5 Q R X d S )
Nr   class_priorr   r   r^   r   r   r   r   )
r   r%   r   rx   r	   r3   r'   r(   r)   r*   )ru   r+   r   rO   r/   r/   r0   test_discretenb_provide_priorR  s    (r   c           	      C   s   t  }t|j|jddd\}}}}d dddgfD ]X}| |d}||j|j | |d}|j||dddgd	 ||| t|j|j q0d S )
N皙?i  )Z	test_sizeZrandom_staterQ   r   r   r   r   r   )r   r   rp   rq   r   r*   r	   rx   )	ru   rr   Z
iris_data1Z
iris_data2Ziris_target1Ziris_target2r   Zclf_fullZclf_partialr/   r/   r0   .test_discretenb_provide_prior_with_partial_fite  s"       

 r   c                 C   s   dddgdddgdddgdddgg}ddddg}t jddddgt jd}||  }|  j|||d}t||ddddg |  }|j|d d |d d dddg|d d d |j|dd |dd |dd d |j|dd  |dd  |dd  d t||ddddg d S )Nr   r   r   )Zdtyper:   r<   r   )r%   r3   rD   r7   r   r   r"   r*   )ru   r    r!   r;   r+   r/   r/   r0   (test_discretenb_sample_weight_multiclassz  s    0((r   use_partial_fitFTtrain_on_single_class_yc                 C   s  dddgdddgdddgg}dddg}|rB|d d }|d d }t tt|}t|}|  }|rv|j|||d n||| ||d d |d kstdddd	d
g}|D ]V}	t||	d }
|
d krqt	|
t
jr|
jd |kstq|
D ]}|jd |kstqqd S )Nr   r   r   r   r   Zclasses_r{   rx   r   feature_log_prob_)sortedlistsetr}   r*   r   r"   rb   getattr
isinstancer%   ZndarrayrA   )ru   r   r   r    r!   r   Znum_classesr+   Zattribute_namesZattribute_name	attributeelementr/   r/   r0   )test_discretenb_degenerate_one_class_case  s4    
r   kind)densesparsec              	   C   s  | dkrt }n| dkr"tjt }t }d}tjt|d || t	 W 5 Q R X ||t	
|}t|t	 ||}||}tt||d t }|j|d d t	d d tt	d ||dd t	dd  ||dd  t	dd   |
|}t|t	 ||}	||}
tt|	|
d t|	| t|
| t }|j|t	tt	d |
|}t|t	 ||}||}tt||d t|| t|| d S )	Nr   r   z!Negative values in data passed tor   r   r   r   r   )rv   scipyr   
csr_matrixr   r'   r(   r)   r   rw   r"   r   r#   r$   r	   r%   r&   r*   ri   )r   r    r+   rO   r,   r-   r.   rG   Zy_pred2Zy_pred_proba2Zy_pred_log_proba2r   Zy_pred3Zy_pred_proba3Zy_pred_log_proba3r/   r/   r0   	test_mnnb  sB    


&










r   c               	   C   s,  t ddgddgg} t ddg}t }t ( tdt |j| |dddgd W 5 Q R X |ddggdksxt	|ddggdkst	|ddggdkst	t & tdt |ddggdg W 5 Q R X |ddggdkst	|ddggdkst	|ddggdks(t	d S )Nr   r   errorr   r   )
r%   r3   r   warningscatch_warningssimplefilterRuntimeWarningr*   r"   rb   )r    r!   r+   r/   r/   r0   !test_mnb_prior_unobserved_targets  s    
 
r   c                  C   s  t ddddddgddddddgddddddgddddddgg} t ddddg}tdd}|| | t ddg}tt |j| t ddd	ddd	gd
ddd
d
dgg}tt |j| t ddddddgg}t ddgg}|t | }t|	|| d S )Nr   r   r`   alphag      ?r]   r   g?g?UUUUUU?UUUUUU?g@fg;u?gy?)
r%   r3   r   r   r	   r   rx   r   r7   r#   )r    r[   r+   r   feature_probZX_testZunnorm_predict_probar#   r/   r/   r0   test_bnb  s$    :
r   c               	   C   s   t dddgdddgdddgdddgdddgg} t dddddg}tdd}|| | t |jd }t t |jd | jd dfj	}t
|j||  d S )Nr   r   r   r`   r   rJ   )r%   r3   r   r   r&   r   Ztiler{   rA   Tr	   r   )r    r[   r+   numZdenomr/   r/   r0   test_bnb_feature_log_probI  s    2
$r   c                  C   s  t ddddddgddddddgddddddgddddddgg} t ddddg}t ddddddgddddddgg}t |j}t |j}tdD ]0}t ||  ||< || ||   ||< qtd	d
}t	d}t
jt|d ||  | W 5 Q R X || | t ddddddgddddddgg}t|j| t ddg}	t|j|	 t ddddddg}
t|j|
 t|j| td	dd}|| | t|j| d S )Nr   r   qq?gqq?gUUUUUU?r   gUUUUUU?r   r`   r   z8Negative values in data passed to ComplementNB (input X)r   r   rW   T)r   Znorm)r%   r3   zerosrA   r|   r&   r7   r   reescaper'   r(   r)   r   r   r   r{   Zfeature_all_r	   r   )r    r[   thetaweightsZnormed_weightsr   r+   rO   Zfeature_countZclass_countZfeature_allr/   r/   r0   test_cnb`  sR    :	

&r   c               	   C   sp  t  } | ttt}t|t tddgddgg}tddg}t ddd} | || t| jtddg td	d
gg}tdg}t	
d}tjt|d | | W 5 Q R X tjt|d | || W 5 Q R X tddgg}tddgg}| }	t| |||	  t| j|jd ks:ttd	d	gd	dgd	d	gddgg}tddddg}t ddd} | || t| td	d	ggtdg t| jtddg dD ]}
td	d	gd	dgd	d	gddgg}tddddg}tddddg|
 }t ddd} | j|||d t| td	d	ggtdg t| jtddg qd S )Nr   rW   r   r   F)r   r   r   r   r   r   z9Negative values in data passed to CategoricalNB (input X)r   r   qq?)r`   rQ   r   g-C6?r   皙?r:   )r   r   rv   rw   r"   r   r%   r3   n_categories_r   r   r'   r(   r)   r7   r	   r#   r}   r~   rA   rb   )r+   r,   ZX3Zy3r    r!   	error_msgZX3_testZbayes_numeratorZbayes_denominatorZfactorr;   r/   r/   r0   test_categoricalnb  sJ    

 "$"$r   zDmin_categories, exp_X1_count, exp_X2_count, new_X, exp_n_categories_rW   c                 C   s   t ddgddgddgddgg}t ddddg}t dg}tdd| d}||| |j\}	}
t|	| t|
| ||}t|| t|j| d S )Nr   r   r   Fr   r   min_categories)r%   r3   r   r   r~   r   r"   r   )r   Zexp_X1_countZexp_X2_countZnew_XZexp_n_categories_ZX_n_categoriesZy_n_categoriesZexpected_predictionr+   ZX1_countZX2_countZpredictionsr/   r/   r0   &test_categoricalnb_with_min_categories  s    ""




r   zmin_categories, error_msgz"'min_categories' should have shapec              	   C   sl   t ddgddgddgddgg}t ddddg}tdd| d}tjt|d ||| W 5 Q R X d S )Nr   r   r   Fr   r   )r%   r3   r   r'   r(   r)   r   )r   r   r    r!   r+   r/   r/   r0   (test_categoricalnb_min_categories_errors  s
    "r   c               	   C   sB  t ddgddgg} t ddg}tdd}d}tjt|d |j| |ddgd W 5 Q R X tjt|d || | W 5 Q R X t ddgddgg}t|	| | t
dd}tjt|d |j| |ddgd W 5 Q R X tjt|d || | W 5 Q R X t dd	gddgg}t|	| | tdd}tjt|d || | W 5 Q R X t d
dgdd
gg}t|	| | tj| } tdd}tjt|d || | W 5 Q R X t ddgddgg}t|	| | t
dd}tjt|d || | W 5 Q R X t dd	gddgg}t|	| | d S )Nr   r   rX   r   zFalpha too small will result in numeric errors, setting alpha = 1.0e-10r   r   r   r   r`   )r%   r3   r   r'   warnsUserWarningr*   r   r	   r#   r   r   r   r   r   )r    r!   nbrO   probr/   r/   r0   
test_alpha  sB    




r   c            	   	   C   sr  t ddgddgg} t ddg}t ddg}t|d}|j| |ddgd t ddgddgg}t|jt | t d	d
gddgg}t|| | t ddg}t|d}d}tj	t
|d || | W 5 Q R X d}t |d dg}t|d}|j| |ddgd t| |dgdd t dddg}t|d}d}tj	t
|d || | W 5 Q R X d S )Nr   r   r   r   r   r   r   g333333?grq?r   gS?gևX?r`   rR   z+All values in alpha must be greater than 0.r   ro      )decimalrJ   g      @z7When alpha is an array, it should contains `n_features`)r%   r3   r   r*   r	   r   r&   r#   r'   r(   r)   r   _check_alpha)	r    r!   r   r   r   r   Zm_nbZexpected_msgZ	ALPHA_MINr/   r/   r0   test_alpha_vectorF  s0    



r   c                  C   sF  t dd\} }t|dk|dk}| | ||  }}ttdd| |dd}| dksZtttdd||dd}| d	kstttdd| d
k|dd}| dkstttdd|d
k|dd}| dksttt | |dd}| dkstttdd| |dd}| dksttt ||dd}| dksBtd S )NT)Z
return_X_yr   r   r   r   )ZcvgQ?gGz?rW   g(\?gq=
ףp?gp=
ף?r   )Zvar_smoothingg{Gz?)	r   r%   
logical_orr   r   re   rb   r   r   )r    r!   Z
binary_3v8ZX_3v8Zy_3v8Zscoresr/   r/   r0   test_check_accuracy_on_digitsn  s"    r   	Estimatorr   r   gdy=g-q=c              	   C   s   | t krt|trtd tddgddgg}tddg}d}d}| |d	}| |d
d}t||k rtjt	|d |
|| W 5 Q R X n|
|| |
|| d S )Nz7CategoricalNB does not support array-like alpha values.r   r   r   rW   r   ro   z9The default value for `force_alpha` will change to `True`r   Tr   Zforce_alphar   )r   r   r   r'   skipr%   r3   minr   FutureWarningr   )r   r   r    r!   Z	alpha_minrO   estZ	est_forcer/   r/   r0   test_force_alpha_deprecation  s    

r   c               	   C   s  d} t ddd}| dks ttddg}t |dd}|jd |_t| | d|  }t ddd}tj	t
|d	 | | kstW 5 Q R X t dd
}tj	t
|d	 | | kstW 5 Q R X t |dd}|jd |_tj	t
|d	 t| t| dg W 5 Q R X dS )zThe provided value for alpha must only be
    used if alpha < _ALPHA_MIN and force_alpha is True.

    Non-regression test for:
    https://github.com/scikit-learn/scikit-learn/issues/10772
    ro   r   Tr   rX   r`   zCalpha too small will result in numeric errors, setting alpha = %.1eFr   r   N)r   r   rb   r%   r3   rA   Zn_features_in_r   r'   r   r   )Z
_ALPHA_MINbalphasrO   r/   r/   r0   test_check_alpha  s*    
r   c                 C   sH   |   tt}|t}t|dd}|t|j }t|	t| d S )Nr   rz   )
r   rv   rw   Zpredict_joint_log_probar   r%   Z
atleast_2dr   r
   r$   )r   r   ZjllZ
log_prob_xZlog_prob_x_yr/   r/   r0   test_predict_joint_proba  s
    
r   )Rr   Znumpyr%   Zscipy.sparser   r'   r   Zscipy.specialr   Zsklearn.datasetsr   r   Zsklearn.model_selectionr   r   Zsklearn.utils._testingr   r   r	   r
   Zsklearn.naive_bayesr   r   r   r   r   ZDISCRETE_NAIVE_BAYES_CLASSESZALL_NAIVE_BAYES_CLASSESrO   markfilterwarningsZ
pytestmarkr3   r    r!   randomZRandomStater@   normalr5   rC   intr6   rB   rv   rw   r1   r9   rI   rP   rS   r\   r_   ra   rc   rh   rj   rt   Zparametrizery   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r/   r/   r/   r0   <module>   s   
.	%	
		

1
!



2
6/G5	



*("%