U
    3d                   	   @   s   d dl mZ d dlZd dlZd dlmZmZmZmZ ej	
dddddgd	d
gfddddgd	gfddddgg fgdd Zdd Zej	
deegdd Zej	
deegdd Zej	
dejeej	jdegdd ZdS )    )BytesION)_liac_arff_parser_pandas_arff_parser_post_process_frameload_arff_from_gzip_filezfeature_names, target_namescol_int_as_integercol_int_as_numericcol_float_as_realcol_float_as_numericcol_categorical
col_stringc              
   C   s   t d}|dddgdddgdddgdddgdd	d
gdd	d
gd}t|| |\}}t||jsftt|dkrt||jstn*t|dkrt||jstn|dkstdS )zNCheck the behaviour of the post-processing function for splitting a dataframe.pandas         g      ?g       @g      @abc)r   r   r	   r
   r   r   N)pytestimportorskipZ	DataFramer   
isinstanceAssertionErrorlenSeries)feature_namestarget_namespdZ
X_originalXy r   K/tmp/pip-unpacked-wheel-zrfo1fqw/sklearn/datasets/tests/test_arff_parser.pytest_post_process_frame   s"    "
r!   c               	   C   s4   d} t jt| d tdddddd W 5 Q R X dS )z3An error will be raised if the parser is not known.z8Unknown parser: 'xxx'. Should be 'liac-arff' or 'pandas')matchZxxxN)r   Zraises
ValueErrorr   )err_msgr   r   r    *test_load_arff_from_gzip_file_error_parserG   s    r%   parser_funcc           	      C   s   t d}ttdd}dddddddd	dd
ddd}ddd	g}dg}d| tkrbdnd| tkrpdnddd}| |d|||d\}}}}|j || kst	|j
|jd |j|dd dS )z9Check that we properly strip single quotes from the data.r   a5  
            @relation 'toy'
            @attribute 'cat_single_quote' {'A', 'B', 'C'}
            @attribute 'str_single_quote' string
            @attribute 'str_nested_quote' string
            @attribute 'class' numeric
            @data
            'A','some text','"expect double quotes"',0
            utf-8nominalcat_single_quoteZ	data_typenamestringstr_single_quotestr_nested_quotenumericclass)r)   r-   r.   r0   A	some textz'some text'z"expect double quotes"z'"expect double quotes"'r   Zoutput_arrays_typeZopenml_columns_infoZfeature_names_to_selectZtarget_names_to_selectr+   N)r   r   r   textwrapdedentencoder   columnstolistr   testingassert_series_equalilocr   	r&   r   Z	arff_fileZcolumns_infor   r   Zexpected_values_framer   r   r    +test_pandas_arff_parser_strip_single_quotesQ   sT    

r@   c           	      C   s   t d}ttdd}dddddddd	dd
ddd}ddd	g}dg}ddddd}| |d|||d\}}}}|j || kst|j	
|jd |j|dd dS )z9Check that we properly strip double quotes from the data.r   a5  
            @relation 'toy'
            @attribute 'cat_double_quote' {"A", "B", "C"}
            @attribute 'str_double_quote' string
            @attribute 'str_nested_quote' string
            @attribute 'class' numeric
            @data
            "A","some text","'expect double quotes'",0
            r'   r(   cat_double_quoter*   r,   str_double_quoter.   r/   r0   )rA   rB   r.   r0   r1   r2   z'expect double quotes'r   r3   r4   Nr   r   r   r5   r6   r7   r8   r9   r   r:   r;   r<   r   r=   r   r   r    +test_pandas_arff_parser_strip_double_quotes   sP    

rD   )Zmarksc           	      C   s   t d}ttdd}dddddddd	dd
ddd}ddd	g}dg}ddddd}| |d|||d\}}}}|j || kst|j	
|jd |j|dd dS )z7Check that we properly parse with no quotes characters.r   a'  
            @relation 'toy'
            @attribute 'cat_without_quote' {A, B, C}
            @attribute 'str_without_quote' string
            @attribute 'str_internal_quote' string
            @attribute 'class' numeric
            @data
            A,some text,'internal' quote,0
            r'   r(   cat_without_quoter*   r,   str_without_quotestr_internal_quoter/   r0   )rE   rF   rG   r0   r1   r2   z'internal' quoter   r3   r4   NrC   r=   r   r   r    'test_pandas_arff_parser_strip_no_quotes   sP    


rH   )ior   r5   r   Zsklearn.datasets._arff_parserr   r   r   r   markZparametrizer!   r%   r@   rD   paramZxfailrH   r   r   r   r    <module>   sR    


F
?