U
    %d	                     @   s   d dl Z d dlZd dlmZ d dlmZmZmZmZ d dl	Z	d dl
mZ d dlmZ ee ee eeeeeeeeef f dddZG d	d
 d
eZdS )    N)Path)DictListTupleUnion)Tensor)Dataset)lineheaderpathfolder_audio	ext_audioreturnc           
      C   s`   |d dkst | d }tj|||}||s:||7 }t|\}}tt|| }	|||	fS )N   r   )	AssertionErrorosr   joinendswith
torchaudioloaddictzip)
r	   r
   r   r   r   ZfileidfilenameZwaveformZsample_rateZdic r   C/tmp/pip-unpacked-wheel-lbdmvq91/torchaudio/datasets/commonvoice.pyload_commonvoice_item   s    
r   c                   @   sh   e Zd ZdZdZdZdZdeee	f edddd	Z
eeeeeeef f d
ddZedddZdS )COMMONVOICEa  Create a Dataset for *CommonVoice* [:footcite:`ardila2020common`].

    Args:
        root (str or Path): Path to the directory where the dataset is located.
             (Where the ``tsv`` file is present.)
        tsv (str, optional):
            The name of the tsv file used to construct the metadata, such as
            ``"train.tsv"``, ``"test.tsv"``, ``"dev.tsv"``, ``"invalidated.tsv"``,
            ``"validated.tsv"`` and ``"other.tsv"``. (default: ``"train.tsv"``)
    z.txtz.mp3Zclips	train.tsvN)roottsvr   c              	   C   s\   t || _t j| j|| _t| jd(}tj|dd}t	|| _
t|| _W 5 Q R X d S )Nr	)	delimiter)r   fspath_pathr   r   Z_tsvopencsvreadernext_headerlist_walker)selfr   r   Ztsv_walkerr   r   r   __init__-   s    
zCOMMONVOICE.__init__)nr   c                 C   s"   | j | }t|| j| j| j| jS )a  Load the n-th sample from the dataset.

        Args:
            n (int): The index of the sample to be loaded

        Returns:
            (Tensor, int, Dict[str, str]): ``(waveform, sample_rate, dictionary)``,  where dictionary
            is built from the TSV file with the following keys: ``client_id``, ``path``, ``sentence``,
            ``up_votes``, ``down_votes``, ``age``, ``gender`` and ``accent``.
        )r+   r   r)   r$   _folder_audio
_ext_audio)r,   r/   r	   r   r   r   __getitem__8   s    
zCOMMONVOICE.__getitem__)r   c                 C   s
   t | jS )N)lenr+   )r,   r   r   r   __len__F   s    zCOMMONVOICE.__len__)r   )__name__
__module____qualname____doc__Z_ext_txtr1   r0   r   strr   r.   intr   r   r   r2   r4   r   r   r   r   r      s   "r   )r&   r   pathlibr   typingr   r   r   r   r   Ztorchr   Ztorch.utils.datar   r9   r:   r   r   r   r   r   r   <module>   s       