U
    %d`                     @  sL  d dl mZ d dlmZ d dlmZmZmZmZ d dl	Z	d dl
Z
eG dd dZeG dd deZeG d	d
 d
eZd ZdZdZdZdZdZdZdZdZdZdZdZdd ZeG dd dZdd Zddddd Zd!dddd"d#d$Zd%d& Zd'Z d(Z!d)Z"d*Z#d+Z$d,Z%d-Z&ee e!e"e$e%d.Z'ee e!e#e$e%e&d/Z(G d0d1 d1Z)dS )2    )annotations)	dataclass)DictIteratorOptionalTupleNc                   @  sJ   e Zd ZU dZded< ded< ded< ded< ded	< ded
< ded< dS )StreamReaderSourceStreamak  StreamReaderSourceStream()

    The metadata of a source stream. This class is used when representing streams of
    media type other than `audio` or `video`.

    When source stream is `audio` or `video` type, :py:class:`SourceAudioStream` and
    :py:class:`SourceVideoStream`, which reports additional media-specific attributes,
    are used respectively.
    str
media_typecodeccodec_long_nameOptional[str]formatOptional[int]bit_rate
num_framesbits_per_sampleN__name__
__module____qualname____doc____annotations__ r   r   @/tmp/pip-unpacked-wheel-lbdmvq91/torchaudio/io/_stream_reader.pyr   
   s   

	r   c                   @  s"   e Zd ZU dZded< ded< dS )StreamReaderSourceAudioStreama
  StreamReaderSourceAudioStream()

    The metadata of an audio source stream.

    In addition to the attributes reported by :py:func:`StreamReaderSourceStream`,
    when the source stream is audio type, then the following additional attributes
    are reported.
    floatsample_rateintnum_channelsNr   r   r   r   r   r   B   s   
	r   c                   @  s*   e Zd ZU dZded< ded< ded< dS )StreamReaderSourceVideoStreama	  StreamReaderSourceVideoStream()

    The metadata of a video source stream.

    In addition to the attributes reported by :py:func:`StreamReaderSourceStream`,
    when the source stream is audio type, then the following additional attributes
    are reported.
    r   widthheightr   
frame_rateNr   r   r   r   r   r    S   s
   
	r                            	   
      c                 C  s   | t  }| t }| t }| t }| t }| t }| t }|dkrbt|||||||| t | t	 d	S |dkrt
|||||||| t | t | t d
S t|||d d d d dS )Naudio)	r
   r   r   r   r   r   r   r   r   video)
r
   r   r   r   r   r   r   r!   r"   r#   )r
   r   r   r   r   r   r   )_MEDIA_TYPE_CODEC_CODEC_LONG_FORMAT	_BIT_RATE_NUM_FRAMES_BPSr   _SAMPLE_RATE_NUM_CHANNELSr    _WIDTH_HEIGHT_FRAME_RATEr   )ir
   Z
codec_namer   fmtr   r   Zbpsr   r   r   	_parse_six   sR    r?   c                   @  s"   e Zd ZU dZded< ded< dS )StreamReaderOutputStreamzNOutputStream()

    Output stream configured on :py:class:`StreamReader`.
    r   Zsource_indexr	   Zfilter_descriptionNr   r   r   r   r   r@      s   
r@   c                 C  s   t | d | d S )Nr   r$   )r@   r=   r   r   r   	_parse_oi   s    rB   r   r   )r   r>   c                 C  sF   g }| d k	r| d|   |d k	r4| d|  |rBd|S d S )Nz
aresample=zaformat=sample_fmts=,appendjoin)r   r>   descsr   r   r   _get_afilter_desc   s    rH   Optional[float])r#   r!   r"   r>   c                 C  s   g }| d k	r| d|   g }|d k	r8| d|  |d k	rP| d|  |rj| dd|  |d k	r| d|  |rd|S d S )Nzfps=zwidth=zheight=zscale=:zformat=pix_fmts=rC   rD   )r#   r!   r"   r>   rG   Zscalesr   r   r   _get_vfilter_desc   s    rK   c                    s    fdd}|S )Nc                   s   | j jf  | _ | S N)r   r   )objkwargsr   r   	decorator   s    z_format_doc.<locals>.decoratorr   )rO   rP   r   rN   r   _format_doc   s    rQ   zNumber of frames returned as one chunk.
                If the source stream is exhausted before enough frames are buffered,
                then the chunk is returned as-is.zInternal buffer size.
                When the number of chunks buffered exceeds this number, old frames are
                dropped.

                Default: ``3``.zcThe source audio stream index.
                If omitted, :py:attr:`default_audio_stream` is used.zcThe source video stream index.
                If omitted, :py:attr:`default_video_stream` is used.zThe name of the decoder to be used.
                When provided, use the specified decoder instead of the default one.

                To list the available decoders, you can use `ffmpeg -decoders` command.

                Default: ``None``.zOptions passed to decoder.
                Mapping from str to str.

                To list decoder options for a decoder, you can use
                `ffmpeg -h decoder=<DECODER>` command.

                Default: ``None``.a  Enable hardware acceleration.

                When video is decoded on CUDA hardware, for example
                `decode="h264_cuvid"`, passing CUDA device indicator to `hw_accel`
                (i.e. `hw_accel="cuda:0"`) will place the resulting frames
                directly on the specifiec CUDA device.

                If `None`, the frame will be moved to CPU memory.
                Default: ``None``.)frames_per_chunkbuffer_chunk_sizestream_indexdecoderdecoder_option)rR   rS   rT   rU   rV   hw_accelc                   @  st  e Zd ZdZdDdddddd	d
Zedd Zedd Zedd Zedd Z	dddddZ
dddddZddddZedEddd dddd d!d"d#ZedFddd ddddd d d d%
d&d'ZedGddd dddd(d)d*ZedHddd ddddd+d,d-Zdd.d/d0ZdId2ddd3d4d5Zd6d7 Zd8d9d:d;Zd<d9d=d>Zd2ddd3d?d@ZdJd2ddAd3dBdCZdS )KStreamReadera	  Fetch and decode audio/video streams chunk by chunk.

    For the detailed usage of this class, please refer to the tutorial.

    Args:
        src (str or file-like object): The media source.
            If string-type, it must be a resource indicator that FFmpeg can
            handle. This includes a file path, URL, device identifier or
            filter expression. The supported value depends on the FFmpeg found
            in the system.

            If file-like object, it must support `read` method with the signature
            `read(size: int) -> bytes`.
            Additionally, if the file-like object has `seek` method, it uses
            the method when parsing media metadata. This improves the reliability
            of codec detection. The signagure of `seek` method must be
            `seek(offset: int, whence: int) -> int`.

            Please refer to the following for the expected signature and behavior
            of `read` and `seek` method.

            - https://docs.python.org/3/library/io.html#io.BufferedIOBase.read
            - https://docs.python.org/3/library/io.html#io.IOBase.seek

        format (str or None, optional):
            Override the input format, or specify the source sound device.
            Default: ``None`` (no override nor device input).

            This argument serves two different usecases.

            1) Override the source format.
               This is useful when the input data do not contain a header.

            2) Specify the input source device.
               This allows to load media stream from hardware devices,
               such as microphone, camera and screen, or a virtual device.


            .. note::

               This option roughly corresponds to ``-f`` option of ``ffmpeg`` command.
               Please refer to the ffmpeg documentations for the possible values.

               https://ffmpeg.org/ffmpeg-formats.html

               For device access, the available values vary based on hardware (AV device) and
               software configuration (ffmpeg build).

               https://ffmpeg.org/ffmpeg-devices.html

        option (dict of str to str, optional):
            Custom option passed when initializing format context (opening source).

            You can use this argument to change the input source before it is passed to decoder.

            Default: ``None``.

        buffer_size (int):
            The internal buffer size in byte. Used only when `src` is file-like object.

            Default: `4096`.
    N   r	   r   zOptional[Dict[str, str]]r   )srcr   optionbuffer_sizec                 C  s   t |tr tjj|||| _n(t|dr@tj	||||| _nt
d| j }|dk r^d n|| _| j }|dk rzd n|| _d S )Nreadz0`src` must be either string or file-like object.r   )
isinstancer	   torchclasses
torchaudioZffmpeg_StreamReader_behasattrZ_torchaudio_ffmpegZStreamReaderFileObj
ValueErrorZfind_best_audio_stream_default_audio_streamZfind_best_video_stream_default_video_stream)selfrZ   r   r[   r\   r=   r   r   r   __init__U  s    



zStreamReader.__init__c                 C  s
   | j  S )zRNumber of streams found in the provided media source.

        :type: int
        )rb   num_src_streamsrg   r   r   r   ri   h  s    zStreamReader.num_src_streamsc                 C  s
   | j  S )zPNumber of output streams configured by client code.

        :type: int
        )rb   num_out_streamsrj   r   r   r   rk   p  s    zStreamReader.num_out_streamsc                 C  s   | j S )znThe index of default audio stream. ``None`` if there is no audio stream

        :type: Optional[int]
        )re   rj   r   r   r   default_audio_streamx  s    z!StreamReader.default_audio_streamc                 C  s   | j S )znThe index of default video stream. ``None`` if there is no video stream

        :type: Optional[int]
        )rf   rj   r   r   r   default_video_stream  s    z!StreamReader.default_video_streamz&torchaudio.io.StreamReaderSourceStream)r=   returnc                 C  s   t | j|S )zGet the metadata of source stream

        Args:
            i (int): Stream index.
        Returns:
            SourceStream
        )r?   rb   get_src_stream_inforg   r=   r   r   r   ro     s    z StreamReader.get_src_stream_infoz&torchaudio.io.StreamReaderOutputStreamc                 C  s   t | j|S )zGet the metadata of output stream

        Args:
            i (int): Stream index.
        Returns:
            OutputStream
        )rB   rb   get_out_stream_inforp   r   r   r   rq     s    z StreamReader.get_out_stream_infor   )	timestampc                 C  s   | j | dS )z}Seek the stream to the given timestamp [second]

        Args:
            timestamp (float): Target time in second.
        N)rb   seek)rg   rr   r   r   r   rs     s    zStreamReader.seekr&   fltpr   )rR   rS   rT   rU   rV   r   r   c              
   C  s   |  |||||t|| dS )a  Add output audio stream

        Args:
            frames_per_chunk (int): {frames_per_chunk}

            buffer_chunk_size (int, optional): {buffer_chunk_size}

            stream_index (int or None, optional): {stream_index}

            decoder (str or None, optional): {decoder}

            decoder_option (dict or None, optional): {decoder_option}

            format (str, optional): Output sample format (precision).

                If ``None``, the output chunk has dtype corresponding to
                the precision of the source audio.

                Otherwise, the sample is converted and the output dtype is changed
                as following.

                - ``"u8p"``: The output is ``torch.uint8`` type.
                - ``"s16p"``: The output is ``torch.int16`` type.
                - ``"s32p"``: The output is ``torch.int32`` type.
                - ``"s64p"``: The output is ``torch.int64`` type.
                - ``"fltp"``: The output is ``torch.float32`` type.
                - ``"dblp"``: The output is ``torch.float64`` type.

                Default: ``"fltp"``.

            sample_rate (int or None, optional): If provided, resample the audio.
        N)add_audio_streamrH   )rg   rR   rS   rT   rU   rV   r   r   r   r   r   add_basic_audio_stream  s    +z#StreamReader.add_basic_audio_streamrgb24)
rR   rS   rT   rU   rV   rW   r   r#   r!   r"   c                 C  s$   |  ||||||t||	|
| dS )a'  Add output video stream

        Args:
            frames_per_chunk (int): {frames_per_chunk}

            buffer_chunk_size (int, optional): {buffer_chunk_size}

            stream_index (int or None, optional): {stream_index}

            decoder (str or None, optional): {decoder}

            decoder_option (dict or None, optional): {decoder_option}

            hw_accel (str or None, optional): {hw_accel}

            format (str, optional): Change the format of image channels. Valid values are,

                - ``"rgb24"``: 8 bits * 3 channels (R, G, B)
                - ``"bgr24"``: 8 bits * 3 channels (B, G, R)
                - ``"yuv420p"``: 8 bits * 3 channels (Y, U, V)
                - ``"gray"``: 8 bits * 1 channels

                Default: ``"rgb24"``.

            frame_rate (int or None, optional): If provided, change the frame rate.

            width (int or None, optional): If provided, change the image width. Unit: Pixel.

            height (int or None, optional): If provided, change the image height. Unit: Pixel.
        N)add_video_streamrK   )rg   rR   rS   rT   rU   rV   rW   r   r#   r!   r"   r   r   r   add_basic_video_stream  s    ,z#StreamReader.add_basic_video_stream)rR   rS   rT   rU   rV   filter_descc                 C  s@   |dkr| j n|}|dkr"td| j||||||p8i  dS )ad  Add output audio stream

        Args:
            frames_per_chunk (int): {frames_per_chunk}

            buffer_chunk_size (int, optional): {buffer_chunk_size}

            stream_index (int or None, optional): {stream_index}

            decoder (str or None, optional): {decoder}

            decoder_option (dict or None, optional): {decoder_option}

            filter_desc (str or None, optional): Filter description.
                The list of available filters can be found at
                https://ffmpeg.org/ffmpeg-filters.html
                Note that complex filters are not supported.

        NzThere is no audio stream.)rl   RuntimeErrorrb   ru   )rg   rR   rS   rT   rU   rV   rz   r=   r   r   r   ru     s    zStreamReader.add_audio_stream)rR   rS   rT   rU   rV   rW   rz   c           	   	   C  sB   |dkr| j n|}|dkr"td| j||||||p8i | dS )a  Add output video stream

        Args:
            frames_per_chunk (int): {frames_per_chunk}

            buffer_chunk_size (int, optional): {buffer_chunk_size}

            stream_index (int or None, optional): {stream_index}

            decoder (str or None, optional): {decoder}

            decoder_option (dict or None, optional): {decoder_option}

            hw_accel (str or None, optional): {hw_accel}

            filter_desc (str or None, optional): Filter description.
                The list of available filters can be found at
                https://ffmpeg.org/ffmpeg-filters.html
                Note that complex filters are not supported.
        NzThere is no video stream.)rm   r{   rb   rx   )	rg   rR   rS   rT   rU   rV   rW   rz   r=   r   r   r   rx   7  s    zStreamReader.add_video_streamrA   c                 C  s   | j | dS )zoRemove an output stream.

        Args:
            i (int): Index of the output stream to be removed.
        N)rb   remove_streamrp   r   r   r   r|   c  s    zStreamReader.remove_stream      $@rI   )timeoutbackoffrn   c                 C  s   | j ||S )a  Read the source media and process one packet.

        If a packet is read successfully, then the data in the packet will
        be decoded and passed to corresponding output stream processors.

        If the packet belongs to a source stream that is not connected to
        an output stream, then the data are discarded.

        When the source reaches EOF, then it triggers all the output stream
        processors to enter drain mode. All the output stream processors
        flush the pending frames.

        Args:
            timeout (float or None, optional): Timeout in milli seconds.

                This argument changes the retry behavior when it failed to
                process a packet due to the underlying media resource being
                temporarily unavailable.

                When using a media device such as a microphone, there are cases
                where the underlying buffer is not ready.
                Calling this function in such case would cause the system to report
                `EAGAIN (resource temporarily unavailable)`.

                * ``>=0``: Keep retrying until the given time passes.

                * ``0<``: Keep retrying forever.

                * ``None`` : No retrying and raise an exception immediately.

                Default: ``None``.

                Note:

                    The retry behavior is applicable only when the reason is the
                    unavailable resource. It is not invoked if the reason of failure is
                    other.

            backoff (float, optional): Time to wait before retrying in milli seconds.

                This option is effective only when `timeout` is effective. (not ``None``)

                When `timeout` is effective, this `backoff` controls how long the function
                should wait before retrying. Default: ``10.0``.

        Returns:
            int:
                ``0``
                A packet was processed properly. The caller can keep
                calling this function to buffer more frames.

                ``1``
                The streamer reached EOF. All the output stream processors
                flushed the pending frames. The caller should stop calling
                this method.
        )rb   process_packet)rg   r~   r   r   r   r   r   k  s    9zStreamReader.process_packetc                 C  s   | j   dS )z%Process packets until it reaches EOF.N)rb   process_all_packetsrj   r   r   r   r     s    z StreamReader.process_all_packetsbool)rn   c                 C  s
   | j  S )zFReturns true if all the output streams have at least one chunk filled.)rb   is_buffer_readyrj   r   r   r   r     s    zStreamReader.is_buffer_readyzTuple[Optional[torch.Tensor]]c                 C  s
   | j  S )zPop one chunk from all the output stream buffers.

        Returns:
            Tuple[Optional[Tensor]]:
                Buffer contents.
                If a buffer does not contain any frame, then `None` is returned instead.
        )rb   
pop_chunksrj   r   r   r   r     s    zStreamReader.pop_chunksc                 C  s&   |   s"| ||}|dkr |S q dS )a  Keep processing packets until all buffers have at least one chunk

        Returns:
            int:
                ``0``
                Packets are processed properly and buffers are
                ready to be popped once.

                ``1``
                The streamer reached EOF. All the output stream processors
                flushed the pending frames. The caller should stop calling
                this method.
        r   )r   r   )rg   r~   r   coder   r   r   _fill_buffer  s
    zStreamReader._fill_bufferz,Iterator[Tuple[Optional[torch.Tensor], ...]]c                 c  sV   | j dkrtd| ||r q,|  V  q|  }tdd |D rJdS |V  q,dS )a  Return an iterator that generates output tensors

        Arguments:
            timeout (float or None, optional): See
                :py:func:`~StreamReader.process_packet`. (Default: ``None``)

            backoff (float, optional): See
                :py:func:`~StreamReader.process_packet`. (Default: ``10.0``)

        Returns:
            Iterator[Tuple[Optional[torch.Tensor], ...]]:
                Iterator that yields a tuple of chunks that correspond to the output
                streams defined by client code.
                If an output stream is exhausted, then the chunk Tensor is substituted
                with ``None``.
                The iterator stops if all the output streams are exhausted.
        r   zNo output stream is configured.c                 s  s   | ]}|d kV  qd S rL   r   ).0cr   r   r   	<genexpr>  s     z&StreamReader.stream.<locals>.<genexpr>N)rk   r{   r   r   all)rg   r~   r   chunksr   r   r   stream  s    
zStreamReader.stream)NNrY   )r&   NNNrt   N)	r&   NNNNrw   NNN)r&   NNNN)r&   NNNNN)Nr}   )Nr}   )r   r   r   r   rh   propertyri   rk   rl   rm   ro   rq   rs   _format_audio_argsrv   _format_video_argsry   ru   rx   r|   r   r   r   r   r   r   r   r   r   r   rX     sv   B   





      3         $5     (      +;
   rX   )*
__future__r   Zdataclassesr   typingr   r   r   r   r_   ra   r   r   r    r1   r2   r3   r4   r5   r6   r7   r8   r9   r:   r;   r<   r?   r@   rB   rH   rK   rQ   Z_frames_per_chunkZ_buffer_chunk_sizeZ_audio_stream_indexZ_video_stream_index_decoderZ_decoder_optionZ	_hw_accelr   r   rX   r   r   r   r   <module>   sh   7,			
