U
    3d8                     @   s  d dl Z d dlZd dlmZ d dlmZ d dlmZ d dl	m
Z
 d dlZd dlmZ d dlmZmZ d dlmZ d d	lmZ d d
lmZ d dlmZ d dlmZ d dlmZ dd Zdd Zdd Zdd Zdd Zdd Zdd Z ej!"dddd Z#d d! Z$d"d# Z%eej!"ddd$d% Z&ed&d' Z'd(d) Z(ej!"ddd*d+ Z)ej!"ddd,d- Z*d.d/ Z+d0d1 Z,d2d3 Z-d4d5 Z.d6d7 Z/d8d9 Z0ej!"d:d;d<d=d>d?gd@dA Z1dBdC Z2ej!"dDdEdFdG Z3ej!"dDdEdHdI Z4dS )J    N)
block_diag)
csr_matrix)psi)assert_array_equal)LatentDirichletAllocation)_dirichlet_expectation_1d_dirichlet_expectation_2d)assert_allclose)assert_array_almost_equal)assert_almost_equal)!if_safe_multiprocessing_with_blas)NotFittedError)StringIOc                  C   s6   d} t jd| td}|g|  }t| }t|}| |fS )N   )r   r   )dtype)npfullintr   r   )n_componentsblockblocksX r   O/tmp/pip-unpacked-wheel-zrfo1fqw/sklearn/decomposition/tests/test_online_lda.py_build_sparse_mtx   s    
r   c                  C   sP   t  \} }d|  }t| ||dd}t| dd}||}||}t|| d S )Ng      ?r   )r   Zdoc_topic_priorZtopic_word_priorrandom_stater   r   )r   r   fit_transformr   )r   r   Zpriorlda_1lda_2Ztopic_distr_1Ztopic_distr_2r   r   r   test_lda_default_prior_params%   s    


r    c                  C   s|   t jd} t \}}t|dd| d}|| dddg}|jD ]6}t| dd  d d d	 }t	t
||ks@tq@d S )
Nr      batch)r   evaluate_everylearning_methodr   r   r!      r                  r   randomRandomStater   r   fitcomponents_setargsorttuplesortedAssertionErrorrngr   r   ldacorrect_idx_grps	componenttop_idxr   r   r   test_lda_fit_batch6   s    



r@   c                  C   s~   t jd} t \}}t|ddd| d}|| dddg}|jD ]6}t| d	d  d d d
 }t	t
||ksBtqBd S )Nr         $@r!   online)r   learning_offsetr#   r$   r   r%   r'   r*   r.   r/   r0   r:   r   r   r   test_lda_fit_onlineI   s    



rD   c                  C   s   t jd} t \}}t|dd| d}tdD ]}|| q.dddg}|jD ]6}t|	 d	d  d d d
 }t
t||ksNtqNd S )Nr   rA   d   r   rC   total_samplesr   r   r%   r'   r*   r.   r/   r   r1   r2   r   r   rangeZpartial_fitr4   r5   r6   r7   r8   r9   r;   r   r   r<   ir=   cr?   r   r   r   test_lda_partial_fit]   s    


rM   c                  C   s~   t jd} t \}}t|d| d}||  dddg}|jD ]6}t|	 dd  d d d }t
t||ksBtqBd S )	Nr   r"   r   r$   r   r%   r'   r*   r.   r/   )r   r1   r2   r   r   r3   toarrayr4   r5   r6   r7   r8   r9   r:   r   r   r   test_lda_dense_inputq   s    
  

rP   c                  C   sh   t jd} | jddd}d}t|| d}||}|dk sDttt j	|dd	t 
|jd  d S )
Nr   r)      
   sizer   r   g        r!   Zaxis)r   r1   r2   randintr   r   anyr9   r
   sumonesshape)r;   r   r   r<   X_transr   r   r   test_lda_transform   s    
r]   method)rB   r"   c                 C   sL   t jd}|jddd}td| |d}||}||}t||d d S )Nr   rS   )2   rR   rT   r)   rN   r(   )r   r1   r2   rW   r   r   	transformr
   )r^   r;   r   r<   ZX_fitr\   r   r   r   test_lda_fit_transform   s      

ra   c               	   C   s>   t dd} t }d}tjt|d ||  W 5 Q R X d S )N)r)   rS         z^Negative values in data passedmatch)r   r   r   pytestraises
ValueErrorr3   )r   r<   regexr   r   r   test_lda_negative_input   s
    ri   c               	   C   sL   t jd} | jddd}t }d}tjt|d || W 5 Q R X d S )Nr   r(   rQ   rT   z}This LatentDirichletAllocation instance is not fitted yet. Call 'fit' with appropriate arguments before using this estimator.rc   )	r   r1   r2   rW   r   re   rf   r   
perplexity)r;   r   r<   rh   r   r   r   test_lda_no_component_error   s    rk   c                 C   s~   t  \}}tjd}t|d| d|d}|| dddg}|jD ]6}t| dd  d d d	 }t	t
||ksBtqBd S )
Nr   r&   r!   )r   n_jobsr$   r#   r   r%   r'   r*   r.   r/   )r   r   r1   r2   r   r3   r4   r5   r6   r7   r8   r9   )r^   r   r   r;   r<   r=   rL   r?   r   r   r   test_lda_multi_jobs   s    



rm   c                  C   s   t jd} t \}}t|ddd| d}tdD ]}|| q0dddg}|jD ]6}t|	 d	d  d d d
 }t
t||ksPtqPd S )Nr   r&         @   )r   rl   rC   rG   r   r%   r'   r*   r.   r/   rH   rJ   r   r   r   test_lda_partial_fit_multi_jobs   s    


rp   c               	   C   s   t jd} | dd}| dd}t jjd|dfd}t|dd| d	}|| | jd|d
 |fd}tjtdd |	|| W 5 Q R X | jd||d
 fd}tjtdd |	|| W 5 Q R X d S )Nr   r   r+   rS   r(   rT   rn   rR   rF   r!   zNumber of samplesrc   zNumber of topics)
r   r1   r2   rW   r   r3   re   rf   rg   Z_perplexity_precomp_distr)r;   r   Z	n_samplesr   r<   Zinvalid_n_samplesZinvalid_n_componentsr   r   r   test_lda_preplexity_mismatch   s"    
rq   c           	      C   s   t  \}}t|d| ddd}t|d| ddd}|| |j|dd}|| |j|dd}||ksjt|j|dd}|j|dd}||kstd S )	Nr!   rE   r   r   max_iterr$   rG   r   rS   FZsub_samplingT)r   r   r3   rj   r9   )	r^   r   r   r   r   perp_1perp_2Zperp_1_subsamplingZperp_2_subsamplingr   r   r   test_lda_perplexity   s.    


rw   c                 C   sf   t  \}}t|d| ddd}t|d| ddd}|| ||}|| ||}||ksbtd S )Nr!   rE   r   rr   rS   )r   r   r   scorer9   )r^   r   r   r   r   Zscore_1Zscore_2r   r   r   test_lda_score  s(    




ry   c                  C   sL   t  \} }t| ddddd}|| ||}|| }t|| d S )Nr!   r"   rE   r   rr   )r   r   r3   rj   rO   r   )r   r   r<   ru   rv   r   r   r   test_perplexity_input_format-  s    


rz   c                  C   sb   t  \} }t| ddd}|| |j|dd}||}td|t|j  }t	|| d S )NrS   r   )r   rs   r   Frt   rb   )
r   r   r3   rj   rx   r   exprY   datar   )r   r   r<   Zperplexity_1rx   Zperplexity_2r   r   r   test_lda_score_perplexity>  s    
  

r}   c                  C   sD   t  \} }t| ddddd}|| |j}||}t|| d S )Nr!   r"   r   )r   rs   r$   r   r#   )r   r   r3   Zbound_rj   r   )r   r   r<   Zperplexity1Zperplexity2r   r   r   test_lda_fit_perplexityL  s    


r~   c                  C   sR   t d} | t| fD ]6}tdd|}t|jjddt |jj	d  qdS )z+Test LDA on empty document (all-zero rows).)r)   r(   i  )rs   r   rV   r!   N)
r   zerosr   r   r3   r   r4   rY   rZ   r[   )Zr   r<   r   r   r   test_lda_empty_docsb  s    
 r   c               	   C   s   t ddd} t | }t| d| t|t t| tt |  dd | dd} tt	| t| tt j| dd	d
d
t j
f  ddd d
S )z9Test Cython version of Dirichlet expectation calculation.irS   i'  r   gҶOɃ;)atolrE   r!   rV   Ngdy=gA:)>)Zrtolr   )r   ZlogspaceZ
empty_liker   r	   r{   r   rY   Zreshaper   Znewaxis)xZexpectationr   r   r   test_dirichlet_expectationl  s    
&&r   c                 C   s   t  \}}t|dd| |dd}t }tj| }t_z|| W 5 |t_X | d}	| d}
||	ksrt||
ks~td S )Nr   r"   r   )r   rs   r$   verboser#   r   
rj   )	r   r   r   sysstdoutr3   getvaluecountr9   )r   r#   expected_linesexpected_perplexitiesr   r   r<   outZold_outZn_linesZn_perplexityr   r   r   check_verbosity|  s$    
r   z;verbose,evaluate_every,expected_lines,expected_perplexities)Fr!   r   r   )Fr   r   r   )Tr   r   r   )Tr!   r   r   )Tr&   r   r!   c                 C   s   t | ||| d S )N)r   )r   r#   r   r   r   r   r   test_verbosity  s    r   c                  C   s>   t  \} }t| d|}| }tdd t| D | dS )z6Check feature names out for LatentDirichletAllocation.)r   c                 S   s   g | ]}d | qS )Zlatentdirichletallocationr   ).0rK   r   r   r   
<listcomp>  s     z.test_lda_feature_names_out.<locals>.<listcomp>N)r   r   r3   Zget_feature_names_outr   rI   )r   r   r<   namesr   r   r   test_lda_feature_names_out  s    
 r   r$   )r"   rB   c                 C   s^   t jd}|jddj|dd}tdd| d}|| |jj|ksJt	|j
j|ksZt	dS )	z2Check data type preservation of fitted attributes.r   rQ   rT   F)copyr)   r   r   r$   N)r   r1   r2   uniformastyper   r3   r4   r   r9   Zexp_dirichlet_component_)r$   Zglobal_dtyper;   r   r<   r   r   r   test_lda_dtype_match  s      
r   c                 C   st   t j|}|jdd}|t j}td|| d|}td|| d|}t|j	|j	 t|
||
| dS )z>Check numerical consistency between np.float32 and np.float64.rQ   rT   r)   r   N)r   r1   r2   r   r   Zfloat32r   r3   r	   r4   r`   )r$   Zglobal_random_seedr;   ZX64ZX32Zlda_64Zlda_32r   r   r   test_lda_numerical_consistency  s&        r   )5r   Znumpyr   Zscipy.linalgr   Zscipy.sparser   Zscipy.specialr   Znumpy.testingr   re   Zsklearn.decompositionr   Z&sklearn.decomposition._online_lda_fastr   r   Zsklearn.utils._testingr	   r
   r   r   Zsklearn.exceptionsr   ior   r   r    r@   rD   rM   rP   r]   markZparametrizera   ri   rk   rm   rp   rq   rw   ry   rz   r}   r~   r   r   r   r   r   r   r   r   r   r   r   <module>   sp   
	






