U
    sVc                     @   s   d Z ddlZddlmZ ddlZddlZddlmZmZ ddl	Z
ddlZddlmZ ddlmZ ddlmZmZmZmZ ddlm  mZ ddlZddlmZ ddlmZ dd	l m!Z!m"Z"m#Z#m$Z$m%Z% zddl&Z&d
Z'W n e(k
r   dZ'Y nX z>e * edde)d edde*d ddl+Z+W 5 Q R X d
Z,W n e(k
rH   dZ,Y nX ej-dZ.ej/ej0dej-j1e, pxeddkdddej0dej-j1e' dddgddd Z2ej/dd Z3ej/dd Z4ej/d d! Z5ej/d"d# Z6ej/d$d% Z7ej/ej8ej9j:ej8ej9j;ej8ej9j<ej=d&d'ej=d(d'ej=d)d'ej=d*d'gdd+d, Z>dKd.d/Z?d0d1 Z@d2d3 ZAd4d5 ZBd6d7 ZCd8d9 ZDd:d; ZEd<d= ZFd>d? ZGd@dA ZHG dBdC dCZIG dDdE dEeIZJej-dFG dGdH dHeIZKG dIdJ dJeIZLdS )Lz test parquet compat     N)BytesIO)catch_warningsfilterwarnings)
get_option)is_platform_windows)pa_version_under2p0pa_version_under5p0pa_version_under6p0pa_version_under8p0)Version)FastParquetImplPyArrowImpl
get_engineread_parquet
to_parquetTFignorez	`np.bool`)categoryz.*Int64Index.*z4ignore:RangeIndex.* is deprecated:DeprecationWarningfastparquetmode.data_managerarrayz4fastparquet is not installed or ArrayManager is usedreason)Zmarkspyarrowpyarrow is not installed)paramsc                 C   s   | j S Nparamrequest r    @/tmp/pip-unpacked-wheel-xj8nt62q/pandas/tests/io/test_parquet.pyengineB   s    r"   c                   C   s   t std dS )Nr   r   )_HAVE_PYARROWpytestskipr    r    r    r!   paW   s    
r&   c                   C   s*   t std ntddkr&td dS )Nzfastparquet is not installedr   r   z.ArrayManager is not supported with fastparquetr   )_HAVE_FASTPARQUETr$   r%   r   r    r    r    r!   fp^   s
    
r(   c                   C   s   t dddgddS )N         fooAB)pd	DataFramer    r    r    r!   	df_compatg   s    r2   c               
   C   sD   t tdttddtjdddddd	dgt jd
ddd} | S )Nabcr)            @      @float64dtypeTF20130101r+   periods)abdef)r0   r1   listrangenparange
date_range)dfr    r    r!   df_cross_compatl   s    rH   c                   C   s   t tddtjdgdd dgdddgddd	gttd
dtdddtjdddddtjdgdddgt jdddt 	dt j
t 	dgdS )Nr3   r=   c   foo   bars   bazr,   barbazr)   r4   r+      u1r5   r6   r7   r8          @      @TFr:   r;   Z20130103)stringZstring_with_nanZstring_with_nonebytesunicodeintZuintfloatZfloat_with_nanbooldatetimeZdatetime_with_nat)r0   r1   rB   rD   nanrC   rE   astyperF   	TimestampZNaTr    r    r    r!   df_full~   s$    

r\   z2019-01-04T16:41:24+0200z%Y-%m-%dT%H:%M:%S%zz2019-01-04T16:41:24+0215z2019-01-04T16:41:24-0200z2019-01-04T16:41:24-0215c                 C   s   | j S r   r   r   r    r    r!   timezone_aware_date_list   s    r]   r*   c
              	      s   p
ddipi dkr |r4|d< |d<  fdd}
dkrtt  |
|	 W 5 Q R X n|
|	 dS )a  Verify parquet serializer and deserializer produce the same results.

    Performs a pandas to disk and disk to pandas round trip,
    then compares the 2 resulting DataFrames to verify equality.

    Parameters
    ----------
    df: Dataframe
    engine: str, optional
        'pyarrow' or 'fastparquet'
    path: str, optional
    write_kwargs: dict of str:str, optional
    read_kwargs: dict of str:str, optional
    expected: DataFrame, optional
        Expected deserialization result, otherwise will be equal to `df`
    check_names: list of str, optional
        Closed set of column names to be compared
    check_like: bool, optional
        If True, ignore the order of index & columns.
    repeat: int, optional
        How many times to repeat the test
    compressionNr"   c              
      sV   t | D ]H}jf tdd tf}W 5 Q R X tj| d qd S )NTrecord)check_names
check_likecheck_dtype)rC   r   r   r   tmassert_frame_equal)repeat_actualrc   rb   ra   rG   expectedpathread_kwargswrite_kwargsr    r!   compare   s    z!check_round_trip.<locals>.compare)rd   ensure_clean)rG   r"   rk   rm   rl   rj   ra   rb   rc   rf   rn   r    ri   r!   check_round_trip   s    "
rp   c                 C   s|   t rLddlm} |j| dd}t|jjt|ks6t|jjt|ksxtn,ddl	m
} |j
| dd}|jjj|ksxtdS )zCheck partitions of a parquet file are as expected.

    Parameters
    ----------
    path: str
        Path of the dataset.
    expected: iterable of str
        Expected partition names.
    r   NF)Zvalidate_schemaZhive)partitioning)r   pyarrow.parquetparquetZParquetDatasetlenZ
partitionsZpartition_namesAssertionErrorsetZpyarrow.datasetdatasetrq   schemanames)rk   rj   pqrw   Zdsr    r    r!   check_partition_names   s    
r{   c              	   C   s.   d}t jt|d t| dd W 5 Q R X d S )Nz.engine must be one of 'pyarrow', 'fastparquet'matchr,   rL   )r$   raises
ValueErrorrp   )r2   msgr    r    r!   test_invalid_engine   s    r   c              	   C   s$   t dd t|  W 5 Q R X d S )Nio.parquet.enginer   r0   option_contextrp   )r2   r&   r    r    r!   test_options_py  s    r   c              	   C   s$   t dd t|  W 5 Q R X d S )Nr   r   r   )r2   r(   r    r    r!   test_options_fp
  s    r   c              	   C   s$   t dd t|  W 5 Q R X d S )Nr   autor   )r2   r(   r&   r    r    r!   test_options_auto  s    r   c              	   C   s  t tdtstt tdts$ttdd< t tdtsDtt tdtsVtt tdtshtW 5 Q R X tdd< t tdtstt tdtstt tdtstW 5 Q R X tdd> t tdtstt tdtstt tdtstW 5 Q R X d S )Nr   r   r   r   )
isinstancer   r   ru   r   r0   r   )r(   r&   r    r    r!   test_options_get_engine  s    r   c               	   C   s.  ddl m}  | d}| d}ts(dnttjt|k }tsBdnttjt|k }to\| }tof| }|s*|s*|rd| d}t	j
t|d td	 W 5 Q R X n&d
}t	j
t|d td	 W 5 Q R X |rd| d}t	j
t|d td	 W 5 Q R X n&d}t	j
t|d td	 W 5 Q R X d S )Nr   )VERSIONSr   r   FzPandas requires version .z. or newer of .pyarrow.r|   r   z%Missing optional dependency .pyarrow.z. or newer of .fastparquet.z)Missing optional dependency .fastparquet.)Zpandas.compat._optionalr   getr#   r   r   __version__r'   r   r$   r~   ImportErrorr   )r   Z
pa_min_verZ
fp_min_verZhave_pa_bad_versionZhave_fp_bad_versionZhave_usable_paZhave_usable_fpr}   r    r    r!   "test_get_engine_auto_error_message,  s8    



r   c              	   C   sj   | }t  T}|j||d d t||d}t || t||ddgd}t ||ddg  W 5 Q R X d S )Nr"   r^   r"   r=   r?   r"   columns)rd   ro   r   r   re   rH   r&   r(   rG   rk   resultr    r    r!   test_cross_engine_pa_fpW  s    
r   c              
   C   s   | }t  j}|j||d d tddD t||d}t || t||ddgd}t ||ddg  W 5 Q R X W 5 Q R X d S )Nr   Tr_   r   r=   r?   r   )rd   ro   r   r   r   re   r   r    r    r!   test_cross_engine_fp_pae  s    
r   c                   @   s:   e Zd Zdd Zdd Zejjejddddd	 Z	d
S )Basec              
   C   sB   t  0}tj||d t|||d d W 5 Q R X W 5 Q R X d S )Nr|   r^   )rd   ro   r$   r~   r   )selfrG   r"   excerr_msgrk   r    r    r!   check_error_on_writet  s    
zBase.check_error_on_writec              
   C   s>   t  ,}t | t|||d d W 5 Q R X W 5 Q R X d S )Nr   )rd   ro   external_error_raisedr   )r   rG   r"   r   rk   r    r    r!   check_external_error_on_writez  s    
z"Base.check_external_error_on_writedhttps://raw.githubusercontent.com/pandas-dev/pandas/main/pandas/tests/io/data/parquet/simple.parquetT)urlZcheck_before_testc                 C   s.   |dkrt | d}t|}t|| d S )Nr   r   )r$   importorskipr   rd   re   )r   r2   r"   r   rG   r    r    r!   test_parquet_read_from_url  s    	
zBase.test_parquet_read_from_urlN)
__name__
__module____qualname__r   r   r$   marknetworkrd   r   r    r    r    r!   r   s  s   r   c                   @   s   e Zd Zdd Zdd Zdd Zejddd	d
dgdd Z	dd Z
dd Zdd Zdd Zdd Zdd Zdd Zdd Zdd Zd d! Zd"d# Zejd$d%d&d'd(d)d*d+d,d-g	d.d/ ZdS )0	TestBasicc              	   C   sJ   t dddgddt dtdddgfD ]}d}| ||t| q,d S )Nr)   r*   r+   r,   r:   z+to_parquet only supports IO with DataFrames)r0   Seriesr[   rD   r   r   r   )r   r"   objr   r    r    r!   
test_error  s    zTestBasic.test_errorc                 C   s6   t tdttddd}ddg|_t|| d S )Nr3   r)   r4   rR   rU   r,   rL   )r0   r1   rB   rC   r   rp   )r   r"   rG   r    r    r!   test_columns_dtypes  s    
zTestBasic.test_columns_dtypesc                 C   s   t tdttddd}d}ddg|_| ||t| ddg|_| ||t| td	ddddtd	ddddg|_| ||t| d S )
Nr3   r)   r4   r   %parquet must have string column namesr   rJ   rK   i  )r0   r1   rB   rC   r   r   r   rX   )r   r"   rG   r   r    r    r!   test_columns_dtypes_invalid  s    

z%TestBasic.test_columns_dtypes_invalidr^   Ngzipsnappybrotlic                 C   sP   |dkrt d n|dkr&t d tddddgi}t||d|id d S )	Nr   r   r.   r)   r*   r+   r^   rm   )r$   r   r0   r1   rp   )r   r"   r^   rG   r    r    r!   test_compression  s    
zTestBasic.test_compressionc                 C   sJ   t tdttddd}t dtdi}t|||ddgid d S )Nr3   r)   r4   r   rR   r   rj   rl   )r0   r1   rB   rC   rp   )r   r"   rG   rj   r    r    r!   test_read_columns  s       zTestBasic.test_read_columnsc                 C   s   |dk}t ddddgi}t|| dddgt jdddtd	dddgg}|D ]2}||_t|t jrt|jd |_t|||d
 qPdddg|_d|j_	t|| d S )Nr   r.   r)   r*   r+   r4   r:   r;   r3   )ra   r   r,   )
r0   r1   rp   rF   rB   indexr   ZDatetimeIndex
_with_freqname)r   r"   ra   rG   Zindexesr   r    r    r!   test_write_index  s     
zTestBasic.test_write_indexc                 C   s>   |}t ddddgi}t jdddg}||_t|| d S )Nr.   r)   r*   r+   r=   r)   r=   r*   r>   r)   )r0   r1   
MultiIndexfrom_tuplesr   rp   )r   r&   r"   rG   r   r    r    r!   test_write_multiindex  s
    zTestBasic.test_write_multiindexc                 C   s   |}t jdddd}t jtjdt| dtdd}t jj	d	d
g|gddgd}|j
d d}||fD ]4}||_t|| t||dddgi|ddg d qhd S )Nz01-Jan-2018z01-Dec-2018ZMS)freqr*   r+   ABCr   Level1Level2leveldate)ry   r   r.   r/   rl   rj   )r0   rF   r1   rD   randomrandnrt   rB   r   Zfrom_productcopyr   rp   )r   r&   r"   datesrG   Zindex1index2r   r    r    r!   test_multiindex_with_columns  s"    $
 
  
 
z&TestBasic.test_multiindex_with_columnsc              	   C   s   t dddgdddgd}d dd	}|jd
d}t||||d t jdddgdddgddddgd}t||||d ddddddddgddddddddgg}t jttddd tdD d|d}|jd
d}t||||d d S )Nr)   r*   r+   qrs)r=   r>   F)r^   r   T)droprm   rj   ZzyxZwvuZtsr)r   rL   rM   r,   quxonetwo   c                 S   s   g | ]
}| qS r    r    ).0ir    r    r!   
<listcomp>  s     z7TestBasic.test_write_ignoring_index.<locals>.<listcomp>)r   r   )r0   r1   Zreset_indexrp   rB   rC   )r   r"   rG   rm   rj   arraysr    r    r!   test_write_ignoring_index  s$    
  z#TestBasic.test_write_ignoring_indexc                 C   sB   t jdddg}t jtjdd|d}d}| ||t| d S )Nr   r   r   r4   r+   r   Y\s*parquet must have string column names for all values in\s*each level of the MultiIndex)	r0   r   r   r1   rD   r   r   r   r   )r   r"   Z
mi_columnsrG   r   r    r    r!   test_write_column_multiindex  s
    z&TestBasic.test_write_column_multiindexc              	   C   sh   |}ddddddddgddddddddgg}t jtjdd|d}d	d
g|j_d}| ||t| d S )NrL   rM   r,   r   r)   r*   r   r   r   r   r   )	r0   r1   rD   r   r   r   ry   r   r   r   r&   r"   r   rG   r   r    r    r!   &test_write_column_multiindex_nonstring)  s    z0TestBasic.test_write_column_multiindex_nonstringc              	   C   s^   |}ddddddddgddddddddgg}t jtjdd|d}d	d
g|j_t|| d S )NrL   rM   r,   r   r   r   r   r   Z	ColLevel1Z	ColLevel2)r0   r1   rD   r   r   r   ry   rp   r   r&   r"   r   rG   r    r    r!   #test_write_column_multiindex_string;  s    z-TestBasic.test_write_column_multiindex_stringc                 C   s>   |}ddddg}t jtjdd|d}d|j_t|| d S )	NrL   rM   r,   r   r   r4   r   Z	StringCol)r0   r1   rD   r   r   r   r   rp   r   r    r    r!   test_write_column_index_stringJ  s
    z(TestBasic.test_write_column_index_stringc                 C   sH   |}ddddg}t jtjdd|d}d|j_d}| ||t| d S )	Nr)   r*   r+   r4   r   r   ZNonStringColr   )	r0   r1   rD   r   r   r   r   r   r   r   r    r    r!   !test_write_column_index_nonstringV  s    z+TestBasic.test_write_column_index_nonstringc           
      C   s  dd l m} |dkr.tjjdd}|j| tt	dddd gdt	dddd gd	t	d
ddd gt	dddd gt	ddddgdt	dddd gdt	dddd gdd}t
 ,}||| t||d}t||dd}W 5 Q R X |d
 jtdkstttj	dddd gddtj	dddd gddtj	d
ddd gddtj	dddd gddtj	ddddgddtj	dddd gddtj	dddd gddd}	|dkr|jddd}|	jddd}	t
||	 d S ) Nr   r   z.Fastparquet nullable dtype support is disabledr   r)   r*   r+   Zint64Zuint8r=   r>   rI   TFr4         ?rP   rQ   Zfloat32r7   )r=   r>   rI   r?   r@   rA   gr   r"   use_nullable_dtypesInt64r8   UInt8rR   booleanZFloat32Float64)Zaxis)rr   rs   r$   r   xfailnode
add_markerr   tabler   rd   ro   Zwrite_tabler   r9   rD   ru   r0   r1   r   re   )
r   r"   r   rz   r   r   rk   Zresult1Zresult2rj   r    r    r!   test_use_nullable_dtypesb  sF    

z"TestBasic.test_use_nullable_dtypesr9   r   r   r   objectzdatetime64[ns, UTC]rV   z	period[D]r   rR   c                 C   sT   t dt jg |di}d }|dkr<t dt jg ddi}t||ddi|d d S )Nvaluer8   rV   r   r   Tr   )r0   r1   r   rp   )r   r&   r9   rG   rj   r    r    r!   test_read_empty_array  s$         zTestBasic.test_read_empty_array)r   r   r   r   r   r   r$   r   parametrizer   r   r   r   r   r   r   r   r   r   r   r   r   r    r    r    r!   r     s<   
		 
1r   z8ignore:CategoricalBlock is deprecated:DeprecationWarningc                   @   s  e Zd Zdd Zdd Zdd Zdd Zd	d
 Zdd Zdd Z	e
jje dde
jdeejgdd Zdd Ze
jjdd Ze
jjdd Ze
jjede
jddgg gdd Zedd d! Zedd"d# Zd$d% Zd&d' Ze
jdeejgd(d) Zd*d+ Zd,d- Z edd.d/ Z!ejdd0d1d2d3 Z"ejdd4d1d5d6 Z#d7d8 Z$d9d: Z%ejdd0d1d;d< Z&d=d> Z'd?S )@TestParquetPyArrowc                 C   sB   |}t jdddd}|d }||d< dd dg|d< t|| d S )Nr:   r+   Europe/Brusselsr<   tzdatetime_tzTZbool_with_none)r0   rF   r   rp   )r   r&   r\   rG   dtir    r    r!   
test_basic  s    
zTestParquetPyArrow.test_basicc                 C   s<   |}t jdddd|d< t|||ddg dddgid	 d S )
Nr:   r+   r   r   r   rR   rU   r   r   )r0   rF   rp   )r   r&   r\   rG   r    r    r!   test_basic_subset_columns  s    

z,TestParquetPyArrow.test_basic_subset_columnsc                 C   s:   |j |d}t|tstt|}t|}t|| d S )Nr   )r   r   rS   ru   r   r   rd   re   )r   r&   r\   Z	buf_bytesZ
buf_streamresr    r    r!   *test_to_bytes_without_path_or_buf_provided  s
    z=TestParquetPyArrow.test_to_bytes_without_path_or_buf_providedc                 C   s8   t jtdddtdd }| ||td d S )N   r4   r+   aaar   zDuplicate column names found	r0   r1   rD   rE   ZreshaperB   r   r   r   r   r&   rG   r    r    r!   test_duplicate_columns  s    $z)TestParquetPyArrow.test_duplicate_columnsc                 C   s:   t dt jdddi}tr,| ||t n
t|| d S )Nr=   1 dayr+   r;   )r0   r1   timedelta_ranger
   r   NotImplementedErrorrp   r  r    r    r!   test_timedelta  s    z!TestParquetPyArrow.test_timedeltac                 C   s(   t ddddgi}| ||tj d S )Nr=   r)   rP   )r0   r1   r   r   ArrowExceptionr  r    r    r!   test_unsupported  s    z#TestParquetPyArrow.test_unsupportedc                 C   s6   t jddt jd}tj|dgd}| ||tj d S )Nr*   
   r8   fp16datar   )rD   rE   float16r0   r1   r   r   r
  )r   r&   r  rG   r    r    r!   test_unsupported_float16  s    z+TestParquetPyArrow.test_unsupported_float16zqPyArrow does not cleanup of partial files dumps when unsupported dtypes are passed to_parquet function in windowsr   	path_typec              
   C   sx   t jddt jd}tj|dgd}t D}||}ttj	 |j
||d W 5 Q R X tj|rjtW 5 Q R X d S )Nr*   r  r8   r  r  )rk   r"   )rD   rE   r  r0   r1   rd   ro   r   r   r
  r   osrk   isfileru   )r   r&   r  r  rG   path_strrk   r    r    r!    test_unsupported_float16_cleanup  s    
z3TestParquetPyArrow.test_unsupported_float16_cleanupc                 C   sx   t  }t td|d< t jddddd dgt dddgd|d< t jddddddgddd	gd
d|d< t|| d S )NZabcdefr=   rL   r,   rM   r8   r>   rI   r?   T)
categoriesZordered)r0   r1   CategoricalrB   ZCategoricalDtyperp   r  r    r    r!   test_categorical  s    
  
z#TestParquetPyArrow.test_categoricalc                 C   s4   t d}|jf |}d|i}t||d||d d S )Ns3fs
filesystemzpandas-test/pyarrow.parquetrk   rl   rm   )r$   r   ZS3FileSystemrp   )r   r2   s3_resourcer&   s3sor  Zs3kwr    r    r!   test_s3_roundtrip_explicit_fs  s    
z0TestParquetPyArrow.test_s3_roundtrip_explicit_fsc                 C   s   d|i}t ||d||d d S )Nstorage_optionsz s3://pandas-test/pyarrow.parquetr  rp   )r   r2   r  r&   r  r    r    r!   test_s3_roundtrip+  s    z$TestParquetPyArrow.test_s3_roundtripr  partition_colr.   c              
   C   sR   |  }|r*trdnd}|| |||< t|||dd|i|d |dddd d S )	NZint32r   zs3://pandas-test/parquet_dirr!  )partition_colsr^   r!  Tr)   )rj   rk   rl   rm   rb   rf   )r   r   rZ   rp   )r   r2   r  r&   r$  r  Zexpected_dfZpartition_col_typer    r    r!   test_s3_roundtrip_for_dir7  s&    z,TestParquetPyArrow.test_s3_roundtrip_for_dirr   c                 C   s(   t  }|| t|}t|| d S r   )r   r   r   rd   re   )r   r2   bufferZdf_from_bufr    r    r!   test_read_file_like_obj_supportb  s    
z2TestParquetPyArrow.test_read_file_like_obj_supportc              	   C   sb   | dd | dd tjtdd td W 5 Q R X tjtdd |d W 5 Q R X d S )NHOMEZTestingUserUSERPROFILEz.*TestingUser.*r|   z~/file.parquet)Zsetenvr$   r~   OSErrorr   r   )r   r2   Zmonkeypatchr    r    r!   test_expand_useri  s    z#TestParquetPyArrow.test_expand_userc              	   C   sR   ddg}|}t  4}|j||d d t|| t|j|jksDtW 5 Q R X d S )NrW   rU   r%  r^   rd   ensure_clean_dirr   r{   r   shaperu   )r   r&   r\   r%  rG   rk   r    r    r!   test_partition_cols_supportedr  s    

z0TestParquetPyArrow.test_partition_cols_supportedc              	   C   sT   d}|g}|}t  4}|j||d d t|| t|j|jksFtW 5 Q R X d S )NrW   r-  r.  )r   r&   r\   r%  partition_cols_listrG   rk   r    r    r!   test_partition_cols_string{  s    

z-TestParquetPyArrow.test_partition_cols_stringc           	   	   C   sP   d}|g}|}t  0}||}|j||d t|j|jksBtW 5 Q R X d S )Nr/   )r%  )rd   r/  r   r   r0  ru   )	r   r&   r2   r  r%  r2  rG   r  rk   r    r    r!   test_partition_cols_pathlib  s    
z.TestParquetPyArrow.test_partition_cols_pathlibc                 C   s   t  }t|| d S r   )r0   r1   rp   r  r    r    r!   test_empty_dataframe  s    z'TestParquetPyArrow.test_empty_dataframec                 C   sV   dd l }tdddgi}||jd| dg}|t}t||d|i|d d S )Nr   xr)   )typerx   r   )	r   r0   r1   rx   fieldZbool_rZ   rW   rp   )r   r&   r   rG   rx   Zout_dfr    r    r!   test_write_with_schema  s
    
z)TestParquetPyArrow.test_write_with_schemac                 C   sz   t t jdddgddt jdddgddt jdd dgd	dd
}t|| t dt jdddd gddi}t|| d S )Nr)   r*   r+   r   r8   ZUInt32r=   rI   rR   )r=   r>   rI   )r0   r1   r   rp   r  r    r    r!    test_additional_extension_arrays  s    
 z3TestParquetPyArrow.test_additional_extension_arraysz1.0.0)min_versionc              	   C   sV   t dt jdd dgddi}t d|" t|||d| dd W 5 Q R X d S )	Nr=   rI   zstring[pyarrow]r8   string_storagezstring[]rj   )r0   r1   r   r   rp   rZ   )r   r&   r<  rG   r    r    r!    test_pyarrow_backed_string_array  s    z3TestParquetPyArrow.test_pyarrow_backed_string_arrayz2.0.0c                 C   sN   t t jdddgt jddddt jt jddddd	}t|| d S )
N)r   r)   )r)   r*   )r+   r4   z
2012-01-01r+   D)r<   r   r4   )rI   r?   r@   )r0   r1   ZIntervalIndexr   period_rangeZfrom_breaksrF   rp   r  r    r    r!   test_additional_extension_types  s    
z2TestParquetPyArrow.test_additional_extension_typesc                 C   s>   t s
d}nd}tdtjddddi}t||d|id	 d S )
Nz2.6z2.0r=   z
2017-01-01Z1nr  r   r<   versionr   )r	   r0   r1   rF   rp   )r   r&   verrG   r    r    r!   test_timestamp_nanoseconds  s
    z-TestParquetPyArrow.test_timestamp_nanosecondsc                 C   sV   t s(|jtjjkr(|jtjj	dd d|g }t
j|d|id}t||dd d S )Nzitemporary skip this test until it is properly resolved: https://github.com/pandas-dev/pandas/issues/37286r      index_as_colr   r  F)rc   )r   tzinforX   timezoneutcr   r   r$   r   r   r0   r1   rp   )r   r   r&   r]   idxrG   r    r    r!   test_timezone_aware_index  s    
z,TestParquetPyArrow.test_timezone_aware_indexc              	   C   s^   t dttddi}t $}||| t||dgdd}W 5 Q R X t|dksZt	d S )Nr=   r   r+   r=   z==r   F)filtersZuse_legacy_datasetr)   )
r0   r1   rB   rC   rd   ro   r   r   rt   ru   )r   r&   rG   rk   r   r    r    r!   test_filter_row_groups  s    
   z)TestParquetPyArrow.test_filter_row_groupsc              	   C   s~   t jtjdddddgd}t }||| t||}W 5 Q R X |rdt	|j
t jjjsztnt	|j
t jjjsztd S )Nr  r+   r.   r/   Cr   )r0   r1   rD   r   r   rd   ro   r   r   r   Z_mgrcoreZ	internalsZArrayManagerru   ZBlockManager)r   r&   Zusing_array_managerrG   rk   r   r    r    r!   test_read_parquet_manager  s    
z,TestParquetPyArrow.test_read_parquet_managerN)(r   r   r   r   r   r   r  r	  r  r  r$   r   r   r   r   strpathlibPathr  r  
single_cpur   r#  tdZ
skip_if_nor&  r(  r,  r1  r3  r4  r5  r9  r:  r?  rB  rF  rN  rQ  rT  r    r    r    r!   r     s`   

"

	






r   c                   @   s   e Zd Zdd Zdd Zdd Zdd Zd	d
 Zdd Ze	j
jdd Zdd Zdd Zdd Zdd Zdd Zdd Zdd Zdd Zdd  Zd!S )"TestParquetFastParquetc                 C   sF   |}t jdddd}|d }||d< t jddd|d< t|| d S )	Nr:   r+   z
US/Easternr   r   r  r;   	timedelta)r0   rF   r   r  rp   )r   r(   r\   rG   r   r    r    r!   r     s    
z!TestParquetFastParquet.test_basicc                 C   s<   t jtdddtdd }d}| ||t| d S )Nr  r4   r+   r  r   z9Cannot create parquet dataset with duplicate column namesr  r   r(   rG   r   r    r    r!   r    s    $z-TestParquetFastParquet.test_duplicate_columnsc                 C   sB   t ddd dgi}t jddtjdgidd}t|||dd d S )	Nr=   TFr   g        r  r8   )rj   rc   )r0   r1   rD   rY   rp   r   r(   rG   rj   r    r    r!   test_bool_with_none  s    z*TestParquetFastParquet.test_bool_with_nonec                 C   sV   t dt jddddi}| ||td  t ddddgi}d}| ||t| d S )	Nr=   Z2013Mr+   rC  r)   rP   z"Can't infer object conversion type)r0   r1   rA  r   r   r\  r    r    r!   r    s
    z'TestParquetFastParquet.test_unsupportedc                 C   s&   t dt tdi}t|| d S )Nr=   r3   )r0   r1   r  rB   rp   )r   r(   rG   r    r    r!   r  &  s    z'TestParquetFastParquet.test_categoricalc              	   C   sf   dt tddi}t|}t (}|j||d dd t||dgd}W 5 Q R X t|dksbt	d S )Nr=   r   r+   r)   )r^   Zrow_group_offsetsrO  )rP  )
rB   rC   r0   r1   rd   ro   r   r   rt   ru   )r   r(   r?   rG   rk   r   r    r    r!   rQ  *  s    

z-TestParquetFastParquet.test_filter_row_groupsc                 C   s    t ||dd|id |dd d S )Nz$s3://pandas-test/fastparquet.parquetr!  )r^   r!  r  r"  )r   r2   r  r(   r  r    r    r!   r#  2  s    z(TestParquetFastParquet.test_s3_roundtripc              	   C   sl   ddg}|}t  N}|j|d|d d tj|s8tdd l}||dj	}t
|dks^tW 5 Q R X d S )NrW   rU   r   r"   r%  r^   r   Fr*   rd   r/  r   r  rk   existsru   r   ZParquetFileZcatsrt   r   r(   r\   r%  rG   rk   r   Zactual_partition_colsr    r    r!   r1  =  s    
z4TestParquetFastParquet.test_partition_cols_supportedc              	   C   sh   d}|}t  N}|j|d|d d tj|s4tdd l}||dj	}t
|dksZtW 5 Q R X d S )NrW   r   r`  r   Fr)   ra  rc  r    r    r!   r3  N  s    
z1TestParquetFastParquet.test_partition_cols_stringc              	   C   sl   ddg}|}t  N}|j|dd |d tj|s8tdd l}||dj	}t
|dks^tW 5 Q R X d S )NrW   rU   r   )r"   r^   partition_onr   Fr*   ra  rc  r    r    r!   test_partition_on_supported_  s    
z2TestParquetFastParquet.test_partition_on_supportedc              
   C   sV   ddg}|}d}t jt|d. t }|j|dd ||d W 5 Q R X W 5 Q R X d S )NrW   rU   zYCannot use both partition_on and partition_cols. Use partition_cols for partitioning datar|   r   )r"   r^   rd  r%  )r$   r~   r   rd   r/  r   )r   r(   r\   r%  rG   r   rk   r    r    r!   3test_error_on_using_partition_cols_and_partition_onp  s    
zJTestParquetFastParquet.test_error_on_using_partition_cols_and_partition_onc                 C   s*   t  }| }d|j_t|||d d S )Nr   r>  r0   r1   r   r   r   rp   r]  r    r    r!   r5    s    z+TestParquetFastParquet.test_empty_dataframec                 C   s>   d|g }t j|d|id}| }d|j_t|||d d S )NrG  rH  rI  r   r>  rg  )r   r(   r]   rM  rG   rj   r    r    r!   rN    s
    
z0TestParquetFastParquet.test_timezone_aware_indexc              
   C   s\   t dddgi}t 8}|| tjtdd t|ddd W 5 Q R X W 5 Q R X d S )	Nr=   r)   r*   z!not supported for the fastparquetr|   r   Tr   )	r0   r1   rd   ro   r   r$   r~   r   r   )r   r(   rG   rk   r    r    r!   &test_use_nullable_dtypes_not_supported  s
    

z=TestParquetFastParquet.test_use_nullable_dtypes_not_supportedc              
   C   sb   t dN}t|d tjtdd t|dd W 5 Q R X t|j	dd W 5 Q R X d S )	Ntest.parquets   breakit r|   r   r   F)
missing_ok)
rd   ro   rV  rW  write_bytesr$   r~   	Exceptionr   unlink)r   rk   r    r    r!   $test_close_file_handle_on_read_error  s
    z;TestParquetFastParquet.test_close_file_handle_on_read_errorc              
   C   sp   t jddgddgdd}td6}t| d}|| W 5 Q R X t||d}W 5 Q R X t|| d S )Nr   r)   r-   )r  ri  wbr   )	r0   r1   rd   ro   openencoder   r   re   )r   r"   rG   rk   rA   r   r    r    r!   test_bytes_file_name  s    z+TestParquetFastParquet.test_bytes_file_nameN)r   r   r   r   r  r^  r  r  rQ  r$   r   rX  r#  r1  r3  re  rf  r5  rN  rh  ro  rs  r    r    r    r!   rZ    s"   	

	rZ  )	NNNNNTFTr*   )M__doc__rX   ior   r  rV  warningsr   r   ZnumpyrD   r$   Zpandas._configr   Zpandas.compatr   Zpandas.compat.pyarrowr   r   r	   r
   Zpandas.util._test_decoratorsutilZ_test_decoratorsrY  Zpandasr0   Zpandas._testingZ_testingrd   Zpandas.util.versionr   Zpandas.io.parquetr   r   r   r   r   r   r#   r   DeprecationWarningFutureWarningr   r'   r   Z
pytestmarkZfixturer   Zskipifr"   r&   r(   r2   rH   r\   nowrK  rL  minmaxstrptimer]   rp   r{   r   r   r   r   r   r   r   r   r   r   r   rZ  r    r    r    r!   <module>   s   

 






         
A+   $
  N