U
    (dM                     @   s  d dl Z d dlZd dlmZ d dlmZmZmZmZm	Z	 d dl
Z
ddlmZ zed dZW n eefk
rx   dZY nX ed d	ZG d
d dZG dd dZeeef ddddZe
je
je
je
je
je
jedddZe
je
jeeef e
jdddZddd d d d dedd d defeeeeeeeeeef eeeeeeef eee
je
jef dddZeeee ee ef dddZeeddd Zd0e
jeeeeeeeeef eeeeeeeef eeee
je
jf d!d"d#Ze
jeee ee ef d$d%d&Ze
jed$d'd(Z d1ee	eef ee	eef  eee
je
jeeef f d*d+d,Z!d2eeee	ee ee f ee f d-d.d/Z"dS )3    N)Fraction)ListTupleDictOptionalUnion   )_load_libraryvideo_readerTF   c                   @   s0   e Zd ZeedZddgZeeddddZdS )Timebase	numeratordenominatorr   r   N)r   r   returnc                 C   s   || _ || _d S )Nr   )selfr   r    r   =/tmp/pip-unpacked-wheel-vx7f76es/torchvision/io/_video_opt.py__init__   s    zTimebase.__init__)__name__
__module____qualname__int__annotations__	__slots__r   r   r   r   r   r      s   
r   c                	   @   sD   e Zd ZeeeeeeeedZdddddddd	gZd
dddZd
S )VideoMetaData)	has_videovideo_timebasevideo_duration	video_fps	has_audioaudio_timebaseaudio_durationaudio_sample_rater   r   r   r   r    r!   r"   r#   N)r   c                 C   s@   d| _ tdd| _d| _d| _d| _tdd| _d| _d| _d S )NFr   r   g        )	r   r   r   r   r   r    r!   r"   r#   )r   r   r   r   r   9   s    zVideoMetaData.__init__)	r   r   r   boolr   floatr   r   r   r   r   r   r   r   #   s&   r   )	pts_ranger   c                 C   s@   | d | d   krdkr<n nt d| d  d| d  d S )Nr   r   z=Start pts should not be smaller than end pts, got start pts: z and end pts: )
ValueError)r&   r   r   r   _validate_ptsD   s     r(   )	vtimebasevfps	vduration	atimebaseasample_rate	adurationr   c                 C   s$  t  }|  dkrvtt| d  t| d  |_| d  t| d   }| dkrvd|_t| | |_| dkrt| |_	| dkrtt|d  t|d  |_
|d  t|d   }| dkrd|_t| | |_| dkr t| |_|S )zE
    Build update VideoMetaData struct with info about the video
    r   r   T)r   numelr   r   itemr   r%   r   r   r   r!   r    r"   r#   )r)   r*   r+   r,   r-   r.   metaZtimebaser   r   r   
_fill_infoL   s$    $$r2   )aframes
aframe_ptsaudio_pts_ranger   c           	      C   s   |d |d  }}|  d}t|| d t| }d}|}||d k r\t|d | | }|d dkr||d krt|d | | }| ||d d f S )Nr   r   )sizer%   r   )	r3   r4   r5   startendZnum_samplesZstep_per_aframeZs_idxZe_idxr   r   r   _align_audio_framesl   s    
r:         ?r   r6   )filenameseek_frame_marginread_video_streamvideo_widthvideo_heightvideo_min_dimensionvideo_max_dimensionvideo_pts_ranger   read_audio_streamaudio_samplesaudio_channelsr5   r!   r   c                 C   s   t | t | tjj| |d||||||d |d |j|j|	|
||d |d |j|j}|\
}}}}}}}}}}t||||||}| dkrt	|||}|||fS )am  
    Reads a video from a file, returning both the video frames as well as
    the audio frames

    Args:
    filename (str): path to the video file
    seek_frame_margin (double, optional): seeking frame in the stream is imprecise. Thus,
        when video_start_pts is specified, we seek the pts earlier by seek_frame_margin seconds
    read_video_stream (int, optional): whether read video stream. If yes, set to 1. Otherwise, 0
    video_width/video_height/video_min_dimension/video_max_dimension (int): together decide
        the size of decoded frames:

            - When video_width = 0, video_height = 0, video_min_dimension = 0,
                and video_max_dimension = 0, keep the original frame resolution
            - When video_width = 0, video_height = 0, video_min_dimension != 0,
                and video_max_dimension = 0, keep the aspect ratio and resize the
                frame so that shorter edge size is video_min_dimension
            - When video_width = 0, video_height = 0, video_min_dimension = 0,
                and video_max_dimension != 0, keep the aspect ratio and resize
                the frame so that longer edge size is video_max_dimension
            - When video_width = 0, video_height = 0, video_min_dimension != 0,
                and video_max_dimension != 0, resize the frame so that shorter
                edge size is video_min_dimension, and longer edge size is
                video_max_dimension. The aspect ratio may not be preserved
            - When video_width = 0, video_height != 0, video_min_dimension = 0,
                and video_max_dimension = 0, keep the aspect ratio and resize
                the frame so that frame video_height is $video_height
            - When video_width != 0, video_height == 0, video_min_dimension = 0,
                and video_max_dimension = 0, keep the aspect ratio and resize
                the frame so that frame video_width is $video_width
            - When video_width != 0, video_height != 0, video_min_dimension = 0,
                and video_max_dimension = 0, resize the frame so that frame
                video_width and  video_height are set to $video_width and
                $video_height, respectively
    video_pts_range (list(int), optional): the start and end presentation timestamp of video stream
    video_timebase (Fraction, optional): a Fraction rational number which denotes timebase in video stream
    read_audio_stream (int, optional): whether read audio stream. If yes, set to 1. Otherwise, 0
    audio_samples (int, optional): audio sampling rate
    audio_channels (int optional): audio channels
    audio_pts_range (list(int), optional): the start and end presentation timestamp of audio stream
    audio_timebase (Fraction, optional): a Fraction rational number which denotes time base in audio stream

    Returns
        vframes (Tensor[T, H, W, C]): the `T` video frames
        aframes (Tensor[L, K]): the audio frames, where `L` is the number of points and
            `K` is the number of audio_channels
        info (Dict): metadata for the video and audio. Can contain the fields video_fps (float)
            and audio_fps (int)
    r   r   )
r(   torchopsr
   read_video_from_filer   r   r2   r/   r:   )r=   r>   r?   r@   rA   rB   rC   rD   r   rE   rF   rG   r5   r!   resultvframes_vframe_ptsr)   r*   r+   r3   r4   r,   r-   r.   infor   r   r   _read_video_from_file{   s8    ArO   )r=   r   c                 C   s~   t jj| dddddddddddddddddd}|\
}}}}}}}}	}
}t||||	|
|}|  }|  }|||fS )z
    Decode all video- and audio frames in the video. Only pts
    (presentation timestamp) is returned. The actual frame pixel data is not
    copied. Thus, it is much faster than read_video(...)
    r   r   r6   )rH   rI   r
   rJ   r2   numpytolist)r=   rK   _vframes
vframe_ptsr)   r*   r+   _aframesr4   r,   r-   r.   rN   r   r   r    _read_video_timestamps_from_file   s4    rU   c           	      C   s4   t jj| }|\}}}}}}t||||||}|S )zO
    Probe a video file and return VideoMetaData with info about the video
    )rH   rI   r
   Zprobe_video_from_filer2   )	r=   rK   r)   r*   r+   r,   r-   r.   rN   r   r   r   _probe_video_from_file   s    rV   )
video_datar>   r?   r@   rA   rB   rC   rD   video_timebase_numeratorvideo_timebase_denominatorrE   rF   rG   r5   audio_timebase_numeratoraudio_timebase_denominatorr   c                 C   s   t | t | t| tjs,tj| tjd} tjj| |d||||||d |d ||	|
|||d |d ||}|\
}}}}}}}}}}|	 dkrt
|||}||fS )a  
    Reads a video from memory, returning both the video frames as well as
    the audio frames
    This function is torchscriptable.

    Args:
    video_data (data type could be 1) torch.Tensor, dtype=torch.int8 or 2) python bytes):
        compressed video content stored in either 1) torch.Tensor 2) python bytes
    seek_frame_margin (double, optional): seeking frame in the stream is imprecise.
        Thus, when video_start_pts is specified, we seek the pts earlier by seek_frame_margin seconds
    read_video_stream (int, optional): whether read video stream. If yes, set to 1. Otherwise, 0
    video_width/video_height/video_min_dimension/video_max_dimension (int): together decide
        the size of decoded frames:

            - When video_width = 0, video_height = 0, video_min_dimension = 0,
                and video_max_dimension = 0, keep the original frame resolution
            - When video_width = 0, video_height = 0, video_min_dimension != 0,
                and video_max_dimension = 0, keep the aspect ratio and resize the
                frame so that shorter edge size is video_min_dimension
            - When video_width = 0, video_height = 0, video_min_dimension = 0,
                and video_max_dimension != 0, keep the aspect ratio and resize
                the frame so that longer edge size is video_max_dimension
            - When video_width = 0, video_height = 0, video_min_dimension != 0,
                and video_max_dimension != 0, resize the frame so that shorter
                edge size is video_min_dimension, and longer edge size is
                video_max_dimension. The aspect ratio may not be preserved
            - When video_width = 0, video_height != 0, video_min_dimension = 0,
                and video_max_dimension = 0, keep the aspect ratio and resize
                the frame so that frame video_height is $video_height
            - When video_width != 0, video_height == 0, video_min_dimension = 0,
                and video_max_dimension = 0, keep the aspect ratio and resize
                the frame so that frame video_width is $video_width
            - When video_width != 0, video_height != 0, video_min_dimension = 0,
                and video_max_dimension = 0, resize the frame so that frame
                video_width and  video_height are set to $video_width and
                $video_height, respectively
    video_pts_range (list(int), optional): the start and end presentation timestamp of video stream
    video_timebase_numerator / video_timebase_denominator (float, optional): a rational
        number which denotes timebase in video stream
    read_audio_stream (int, optional): whether read audio stream. If yes, set to 1. Otherwise, 0
    audio_samples (int, optional): audio sampling rate
    audio_channels (int optional): audio audio_channels
    audio_pts_range (list(int), optional): the start and end presentation timestamp of audio stream
    audio_timebase_numerator / audio_timebase_denominator (float, optional):
        a rational number which denotes time base in audio stream

    Returns:
        vframes (Tensor[T, H, W, C]): the `T` video frames
        aframes (Tensor[L, K]): the audio frames, where `L` is the number of points and
            `K` is the number of channels
    Zdtyper   r   )r(   
isinstancerH   Tensor
frombufferuint8rI   r
   read_video_from_memoryr/   r:   )rW   r>   r?   r@   rA   rB   rC   rD   rX   rY   rE   rF   rG   r5   rZ   r[   rK   rL   rM   r)   r*   r+   r3   r4   r,   r-   r.   r   r   r   _read_video_from_memory	  s:    Frb   )rW   r   c                 C   s   t | tjstj| tjd} tjj| dddddddddddddddddd}|\
}}}}}}}}	}
}t||||	|
|}|	 
 }|	 
 }|||fS )z
    Decode all frames in the video. Only pts (presentation timestamp) is returned.
    The actual frame pixel data is not copied. Thus, read_video_timestamps(...)
    is much faster than read_video(...)
    r\   r   r   r6   )r]   rH   r^   r_   r`   rI   r
   ra   r2   rP   rQ   )rW   rK   rR   rS   r)   r*   r+   rT   r4   r,   r-   r.   rN   r   r   r   "_read_video_timestamps_from_memoryt  s8    rc   c           	      C   sP   t | tjstj| tjd} tjj| }|\}}}}}}t||||||}|S )zy
    Probe a video in memory and return VideoMetaData with info about the video
    This function is torchscriptable
    r\   )	r]   rH   r^   r_   r`   rI   r
   Zprobe_video_from_memoryr2   )	rW   rK   r)   r*   r+   r,   r-   r.   rN   r   r   r   _probe_video_from_memory  s    rd   pts)r=   	start_ptsend_ptspts_unitr   c              	      s    d krt d dkr"td t| }|j}|j} fdd}d}t}	|rlt|jj	|jj
}	||	}d}
t}|rt|jj	|jj
}||}
t| d||	d|
|d\}}}i }|r|j|d	< |r|j|d
< |||fS )Ninfre   mThe pts_unit 'pts' gives wrong results and will be removed in a follow-up version. Please use pts_unit 'sec'.c                    s`   } }dkrHt td|   }|tdkrHt t d|   }|tdkrXd}||fS )Nsecr   ri   r6   )r   mathfloorr%   ceil)Z	time_baseZstart_offsetZ
end_offsetrg   rh   rf   r   r   get_pts  s    z_read_video.<locals>.get_ptsr<   T)r?   rD   r   rE   r5   r!   r   Z	audio_fps)r%   warningswarnrV   r   r    default_timebaser   r   r   r   r!   rO   r   r#   )r=   rf   rg   rh   rN   r   r    rp   rD   r   r5   r!   rL   r3   _infor   ro   r   _read_video  sF    	

ru   )r=   rh   r   c                    sd   |dkrt d t| \}}}|dkrLt|jj|jj  fdd|D }|jrX|jnd }||fS )Nre   rj   rk   c                    s   g | ]}|  qS r   r   ).0xZvideo_time_baser   r   
<listcomp>  s     z*_read_video_timestamps.<locals>.<listcomp>)	rq   rr   rU   r   r   r   r   r   r   )r=   rh   re   _rN   r   r   rx   r   _read_video_timestamps  s    r{   )r;   r   r   r   r   r   r<   r   r   r   r   r   r<   r   r   )r   Nre   )re   )#rl   rq   Z	fractionsr   typingr   r   r   r   r   rH   	extensionr	   Z_HAS_VIDEO_OPTImportErrorOSErrorrs   r   r   r   r(   r^   r2   r:   strr%   r$   rO   rU   rV   rb   rc   rd   ru   r{   r   r   r   r   <module>   s   

!	!  


a"#               

l(   
>  