U
    3d.                     @   s^  d dl Z d dlZd dlZd dlZd dlmZ d dlmZ d dlmZ d dl	Z	d dl
Zd dlmZ d dlmZ d dlmZ d dlmZ d d	lmZ d d
lmZ d dlmZ d dlmZ d dlmZ d dlmZ d dlmZ d dlmZmZ d dlmZ d dlmZ d dlm Z  d dl!m"Z" dd Z#e	j$dddd Z%e	j$dddd Z&e	j$dd Z'e	j$dd  Z(d!d" Z)d#d$ Z*d%d& Z+d'd( Z,d)d* Z-e	j./d+d,gd,d-gfd.d/ Z0e	j./d0d1d2d3d4d5d6gfd7d8d9d:d;d<gfd=d>d?d@dAgfgdBdC Z1dDdE Z2e	j./dFdGi dHdIgfdJi dHgfdKdLdMidNdOgfgdPdQ Z3dRdS Z4dTdU Z5dVdW Z6dXdY Z7dZd[ Z8e	j./d\ed]d^d_d`dagfedbdcddd`g fededfddd`dagfedgdgddd`dhdigfedjdkdd`g fedldmdId`g feedndodpdqdId`g fgdrds Z9e	j./dteej:e;feej:ej:feej:e;feej:e;feej:ej:feej:e;fgdudv Z<dwdx Z=dydz Z>d{d| Z?d}d~ Z@dS )    N)loads)dumps)partial)get_data_home)clear_data_home)
load_files)load_sample_images)load_sample_image)load_digits)load_diabetes)load_linnerud)	load_iris)load_breast_cancer)	load_wine)load_csv_dataload_gzip_compressed_csv_data)scale)Bunch_is_resourcecheck_as_framec                 C   s   t j| rt|  d S N)ospathisdirshutilrmtree)r    r   D/tmp/pip-unpacked-wheel-zrfo1fqw/sklearn/datasets/tests/test_base.py_remove_dir    s    r    module)Zscopec                 c   s    t | d}|V  t| d S )NZscikit_learn_data_home_teststrmktempr    Ztmpdir_factoryZtmp_filer   r   r   	data_home%   s    r&   c                 c   s    t | d}|V  t| d S )NZscikit_learn_load_files_testr"   r%   r   r   r   load_files_root,   s    r'   c                 c   sB   t j| d}t j|dd}|d |  t|V  t| d S )NdirF)r)   delete   Hello World!
)tempfilemkdtempNamedTemporaryFilewritecloser#   r    )r'   Ztest_category_dir1Zsample_filer   r   r   test_category_dir_13   s    

r1   c                 c   s"   t j| d}t|V  t| d S )Nr(   )r,   r-   r#   r    )r'   Ztest_category_dir2r   r   r   test_category_dir_2=   s    
r2   c                 C   s^   t | d} | | ksttj| s&tt| d tj| r@tt | d} tj| sZtd S )Nr&   )r   AssertionErrorr   r   existsr   r3   r   r   r   test_data_homeD   s    


r6   c                 C   s>   t | }t|jdkstt|jdks,t|jd ks:td S )Nr   )r   len	filenamesr4   target_namesDESCR)r'   resr   r   r   test_default_empty_load_filesS   s    r<   c                 C   sN   t |}t|jdkstt|jdks,t|jd ks:t|jdgksJtd S )N      r+   )r   r7   r8   r4   r9   r:   datar1   r2   r'   r;   r   r   r   test_default_load_filesZ   s
    rA   c                 C   sl   t j| d }t|d|dd}t|jdks8tt|j	dksJt|j
dksXt|jdgkshtd S )N/testzutf-8)description
categoriesencodingr=   zHello World!
)r   r   abspathsplitpopr   r7   r8   r4   r9   r:   r?   )r1   r2   r'   categoryr;   r   r   r   .test_load_files_w_categories_desc_and_encodingb   s       rK   c                 C   sT   t |dd}t|jdkstt|jdks0t|jd ks>t|dd ksPtd S )NF)Zload_contentr=   r>   r?   )r   r7   r8   r4   r9   r:   getr@   r   r   r   test_load_files_wo_load_contento   s
    rM   allowed_extensionsz.txtz.jsonc                    sn   | d    d}fdd|D }|D ]}|d q*t|  d}t fdd|D t|jksjtdS )	z;Check the behaviour of `allowed_extension` in `load_files`.sub)z	file1.txtz
file2.jsonz
file3.jsonzfile4.mdc                    s   g | ]} | qS r   r   .0f)dr   r   
<listcomp>   s     z6test_load_files_allowed_extensions.<locals>.<listcomp>s   hellorN   c                    s   g | ]}|j  krt|qS r   )suffixr#   )rQ   prU   r   r   rT      s     
 N)mkdirwrite_bytesr   setr8   r4   )Ztmp_pathrN   filespathsrW   r;   r   )rN   rS   r   "test_load_files_allowed_extensionsy   s    r]   zHfilename, expected_n_samples, expected_n_features, expected_target_nameszwine_data.csv      Zclass_0Zclass_1Zclass_2iris.csv      ZsetosaZ
versicolorZ	virginicazbreast_cancer.csv9     Z	malignantZbenignc                 C   sV   t | \}}}|jd |ks t|jd |ks2t|jd |ksDttj|| d S )Nr   r=   )r   shaper4   nptestingassert_array_equal)filenameZexpected_n_samplesZexpected_n_featuresZexpected_target_namesactual_dataZactual_targetZactual_target_namesr   r   r   test_load_csv_data   s
    rk   c                  C   s   d} d}t | d}t | |d}t|dks.tt|dks>ttj|d |d  tj|d |d  tj|d	 |d	  |d
 dstd S )Nr`   ziris.rstdata_file_namerm   descr_file_namerb      r   r=   r>   z.. _iris_dataset:)r   r7   r4   rf   rg   rh   
startswith)rm   ro   Zres_without_descrZres_with_descrr   r   r   test_load_csv_data_with_descr   s    
 rs   z filename, kwargs, expected_shapezdiabetes_data_raw.csv.gz  
   diabetes_target.csv.gzzdigits.csv.gz	delimiter,  A   c                 C   s"   t | f|}|jt|kstd S r   )r   re   tupler4   )ri   kwargsZexpected_shaperj   r   r   r   "test_load_gzip_compressed_csv_data   s    	r}   c                  C   sB   d} d}t | d}t | |d\}}tj|| |ds>td S )Nrv   zdiabetes.rstrl   rn   z.. _diabetes_dataset:)r   rf   rg   rh   rr   r4   )rm   ro   Zexpected_datarj   descrr   r   r   -test_load_gzip_compressed_csv_data_with_descr   s    

r   c                  C   s   zt  } t| jdkstt| jdks,t| j}t|d ddd d f tjdddgtjdkshtt|d ddd d f tjddd	gtjdkst| j	stW n t
k
r   td
 Y nX d S )Nr>   r            )dtyper=      r_   3Could not load sample images, PIL is not available.)r   r7   imagesr4   r8   rf   allarrayuint8r:   ImportErrorwarningswarn)r;   r   r   r   r   test_load_sample_images   s    66r   c                  C   sL   z(t d} | jdkst| jdks&tW n tk
rF   td Y nX d S )Nz	china.jpgr   )i  i  rp   r   )r	   r   r4   re   r   r   r   )Zchinar   r   r   test_load_sample_image   s    r   c                	   C   s,   t d t t td W 5 Q R X d S )NZPILzblop.jpg)pytestZimportorskipraisesAttributeErrorr	   r   r   r   r   $test_load_missing_sample_image_error   s    
r   c                  C   sn   t dd} | jjdkst| jjs*tdt| jdks<t| jsFtt  }t	j
jt| jd |jdd d	S )
zTest to check that we load a scaled version by default but that we can
    get an unscaled version when setting `scaled=False`.F)Zscaledrt   ru   rt   ru   gT5@g-C6?)ZatolN)r   r?   re   r4   targetsizer7   feature_namesr:   rf   rg   Zassert_allcloser   )Zdiabetes_rawZdiabetes_defaultr   r   r   test_load_diabetes_raw   s    

  r   zEloader_func, data_shape, target_shape, n_target, has_descr, filenames)rc   rd   )rc   r>   Tri   )r^   r_   )r^   rp   )ra   rb   )ra   )   rp   Zdata_filenameZtarget_filenamer   )rt   )ry   @   )ry   	   )Zn_class)Q  r   )r   c                    s   |   t  tst jj|ks$t jj|ks4tt drTt j|d ksTt|d k	rnt j	|ksnt|r| j
s|t|rd kstt fdd|D std S )Nr   r=   data_modulec                    s&   g | ]}| ko t  d   | qS )r   r   rP   bunchr   r   rT     s   ztest_loader.<locals>.<listcomp>)
isinstancer   r4   r?   re   r   hasattrr7   r   r9   r:   r   )loader_funcZ
data_shapeZtarget_shapeZn_targetZ	has_descrr8   r   r   r   test_loader   s"    


r   z%loader_func, data_dtype, target_dtypec                 C   s   |  }t || ||d d S )N)Zexpected_data_dtypeZexpected_target_dtyper   )r   Z
data_dtypeZtarget_dtypeZdefault_resultr   r   r   test_toy_dataset_frame_dtype  s    r   c                  C   s2   t dd} tt| }d|_|d |jks.td S )Nx)r   y)r   r   r   r   r4   r   Zbunch_from_pklr   r   r   test_loads_dumps_bunch4  s    
r   c                  C   sf   t dd} d| jd< tt| }|jdks.t|d dks>td|_|jdksRt|d dksbtd S )Noriginal)keyzset from __dict__r   changed)r   __dict__r   r   r   r4   r   r   r   r   8test_bunch_pickle_generated_with_0_16_and_read_with_0_17;  s    

r   c                  C   s   t  } dt| kstd S )Nr?   )r   r)   r4   )r?   r   r   r   test_bunch_dirP  s    r   c               	   C   sX   d} t jt| d ddlm} W 5 Q R X d} t jt| d ddlm} W 5 Q R X dS )zLCheck that we raise the ethical warning when trying to import `load_boston`.z8The Boston housing prices dataset has an ethical problem)matchr   )load_bostonzBcannot import name 'non_existing_function' from 'sklearn.datasets')non_existing_functionN)r   r   r   sklearn.datasetsr   r   )msgr   r   r   r   r   test_load_boston_errorV  s    r   )Ar   r   r,   r   pickler   r   	functoolsr   r   Znumpyrf   r   r   r   r   r   r	   r
   r   r   r   r   r   Zsklearn.datasets._baser   r   Zsklearn.preprocessingr   Zsklearn.utilsr   Zsklearn.utils.fixesr   Z"sklearn.datasets.tests.test_commonr   r    Zfixturer&   r'   r1   r2   r6   r<   rA   rK   rM   markZparametrizer]   rk   rs   r}   r   r   r   r   r   r   Zfloat64intr   r   r   r   r   r   r   r   r   <module>   s   




	






	






