U
    (d-V                  
   @   sX  d dl Z d dlmZ d dlmZmZmZmZ d dlZd dlm	Z	m
Z
 d dlmZ d dlmZmZmZmZ G dd dZejje	e	e	e	d	d
dZG dd dZG dd dZG dd dZG dd deZe
jeddddZe
jeeef ee dddZejj e	edddZ!e	eeedddZ"d#e#ee	e	e	eee#ef  e	d d!d"Z$dS )$    N)OrderedDict)DictListOptionalTuple)Tensornn)
functional)FrozenBatchNorm2dcomplete_box_iou_lossdistance_box_iou_lossgeneralized_box_iou_lossc                   @   sF   e Zd ZdZeeddddZee e	ee ee f dddZ
dS )	BalancedPositiveNegativeSamplerz`
    This class samples batches, ensuring that they contain a fixed proportion of positives
    N)batch_size_per_imagepositive_fractionreturnc                 C   s   || _ || _dS )z
        Args:
            batch_size_per_image (int): number of elements to be selected per image
            positive_fraction (float): percentage of positive elements per batch
        N)r   r   )selfr   r    r   G/tmp/pip-unpacked-wheel-vx7f76es/torchvision/models/detection/_utils.py__init__   s    z(BalancedPositiveNegativeSampler.__init__)matched_idxsr   c                 C   s   g }g }|D ]}t |dkd }t |dkd }t| j| j }t| |}| j| }t| |}t j| |jdd| }	t j| |jdd| }
||	 }||
 }t j	|t j
d}t j	|t j
d}d||< d||< || || q||fS )a  
        Args:
            matched idxs: list of tensors containing -1, 0 or positive values.
                Each tensor corresponds to a specific image.
                -1 values are ignored, 0 are considered as negatives and > 0 as
                positives.

        Returns:
            pos_idx (list[tensor])
            neg_idx (list[tensor])

        Returns two lists of binary masks for each image.
        The first list contains the positive elements that were selected,
        and the second list the negative example.
           r   deviceNdtype)torchwhereintr   r   minnumelZrandpermr   Z
zeros_likeZuint8append)r   r   Zpos_idxZneg_idxZmatched_idxs_per_imageZpositivenegativenum_posZnum_negZperm1Zperm2Zpos_idx_per_imageZneg_idx_per_imageZpos_idx_per_image_maskZneg_idx_per_image_maskr   r   r   __call__   s(    

z(BalancedPositiveNegativeSampler.__call__)__name__
__module____qualname____doc__r   floatr   r   r   r   r$   r   r   r   r   r      s   	r   )reference_boxes	proposalsweightsr   c                 C   s~  |d }|d }|d }|d }|dddf  d}|dddf  d}|dddf  d}	|dddf  d}
| dddf  d}| dddf  d}| dddf  d}| dddf  d}|	| }|
| }|d|  }|d|  }|| }|| }|d|  }|d|  }|||  | }|||  | }|t||  }|t||  }tj||||fdd}|S )z
    Encode a set of proposals with respect to some
    reference boxes

    Args:
        reference_boxes (Tensor): reference boxes
        proposals (Tensor): boxes to be encoded
        weights (Tensor[4]): the weights for ``(x, y, w, h)``
    r   r         N      ?Zdim)	unsqueezer   logcat)r*   r+   r,   wxwywwwhZproposals_x1Zproposals_y1Zproposals_x2Zproposals_y2Zreference_boxes_x1Zreference_boxes_y1Zreference_boxes_x2Zreference_boxes_y2Z	ex_widthsZ
ex_heightsZex_ctr_xZex_ctr_yZ	gt_widthsZ
gt_heightsZgt_ctr_xZgt_ctr_yZ
targets_dxZ
targets_dyZ
targets_dwZ
targets_dhtargetsr   r   r   encode_boxesJ   s4    r9   c                   @   s   e Zd ZdZedfeeeeef eddddZe	e
 e	e
 e	e
 ddd	Ze
e
e
dd
dZe
e	e
 e
dddZe
e
e
dddZdS )BoxCoderz~
    This class encodes and decodes a set of bounding boxes into
    the representation used for training the regressors.
    g     @O@N)r,   bbox_xform_clipr   c                 C   s   || _ || _dS )za
        Args:
            weights (4-element tuple)
            bbox_xform_clip (float)
        N)r,   r;   )r   r,   r;   r   r   r   r      s    zBoxCoder.__init__r*   r+   r   c                 C   sB   dd |D }t j|dd}t j|dd}| ||}||dS )Nc                 S   s   g | ]}t |qS r   )len.0br   r   r   
<listcomp>   s     z#BoxCoder.encode.<locals>.<listcomp>r   r0   )r   r3   encode_singlesplit)r   r*   r+   boxes_per_imager8   r   r   r   encode   s
    zBoxCoder.encodec                 C   s.   |j }|j}tj| j||d}t|||}|S )z
        Encode a set of proposals with respect to some
        reference boxes

        Args:
            reference_boxes (Tensor): reference boxes
            proposals (Tensor): boxes to be encoded
        r   r   )r   r   r   Z	as_tensorr,   r9   )r   r*   r+   r   r   r,   r8   r   r   r   rB      s
    	zBoxCoder.encode_single	rel_codesboxesr   c                 C   s   t t|ttfd t t|t jd dd |D }t j|dd}d}|D ]}||7 }qN|dkrp||d}| ||}|dkr||dd}|S )	Nz2This function expects boxes of type list or tuple.z5This function expects rel_codes of type torch.Tensor.c                 S   s   g | ]}| d qS )r   sizer>   r   r   r   rA      s     z#BoxCoder.decode.<locals>.<listcomp>r   r0      )	r   _assert
isinstancelisttupler   r3   Zreshapedecode_single)r   rH   rI   rD   Zconcat_boxesZbox_sumval
pred_boxesr   r   r   decode   s&    

zBoxCoder.decodec                 C   s  | |j}|dddf |dddf  }|dddf |dddf  }|dddf d|  }|dddf d|  }| j\}}}	}
|dddddf | }|dddddf | }|dddddf |	 }|dddddf |
 }tj|| jd}tj|| jd}||dddf  |dddf  }||dddf  |dddf  }t||dddf  }t||dddf  }tjd|j|jd	| }tjd|j|jd	| }|| }|| }|| }|| }tj	||||fdd

d}|S )z
        From a set of original boxes and encoded relative box offsets,
        get the decoded boxes.

        Args:
            rel_codes (Tensor): encoded boxes
            boxes (Tensor): reference boxes.
        Nr-   r   r.   r   r/   rM   )maxrF   r0   )tor   r,   r   clampr;   exptensorr   stackflatten)r   rH   rI   ZwidthsZheightsctr_xctr_yr4   r5   r6   r7   ZdxZdyZdwZdhZ
pred_ctr_xZ
pred_ctr_yZpred_wZpred_hZc_to_c_hZc_to_c_wpred_boxes1pred_boxes2pred_boxes3pred_boxes4rT   r   r   r   rR      s0    
  $$zBoxCoder.decode_single)r%   r&   r'   r(   mathr2   r   r)   r   r   r   rE   rB   rU   rR   r   r   r   r   r:   z   s    r:   c                   @   sF   e Zd ZdZdeddddZeeeddd	Zeeed
ddZdS )BoxLinearCoderz
    The linear box-to-box transform defined in FCOS. The transformation is parameterized
    by the distance from the center of (square) src box to 4 edges of the target box.
    TN)normalize_by_sizer   c                 C   s
   || _ dS )zq
        Args:
            normalize_by_size (bool): normalize deltas by the size of src (anchor) boxes.
        N)re   )r   re   r   r   r   r      s    zBoxLinearCoder.__init__r<   c                 C   s  d|dddf |dddf   }d|dddf |dddf   }||dddf  }||dddf  }|dddf | }|dddf | }t j||||fdd}	| jr|dddf |dddf  }
|dddf |dddf  }t j|
||
|fdd}|	| }	|	S )aB  
        Encode a set of proposals with respect to some reference boxes

        Args:
            reference_boxes (Tensor): reference boxes
            proposals (Tensor): boxes to be encoded

        Returns:
            Tensor: the encoded relative box offsets that can be used to
            decode the boxes.
        r/   Nr   r-   r   r.   r0   )r   r[   re   )r   r*   r+   Zreference_boxes_ctr_xZreference_boxes_ctr_yZtarget_lZtarget_tZtarget_rZtarget_br8   Zreference_boxes_wZreference_boxes_hZreference_boxes_sizer   r   r   rB      s     $$  
 zBoxLinearCoder.encode_singlerG   c                 C   s"  | |j}d|dddf |dddf   }d|dddf |dddf   }| jr|dddf |dddf  }|dddf |dddf  }tj||||fdd}|| }||dddf  }||dddf  }	||dddf  }
||dddf  }tj||	|
|fdd}|S )a:  
        From a set of original boxes and encoded relative box offsets,
        get the decoded boxes.

        Args:
            rel_codes (Tensor): encoded boxes
            boxes (Tensor): reference boxes.

        Returns:
            Tensor: the predicted boxes with the encoded relative box offsets.
        r/   Nr   r-   r   r.   r0   )rW   r   re   r   r[   )r   rH   rI   r]   r^   Zboxes_wZboxes_hZ
boxes_sizer_   r`   ra   rb   rT   r   r   r   rR     s    $$  zBoxLinearCoder.decode_single)T)	r%   r&   r'   r(   boolr   r   rB   rR   r   r   r   r   rd      s   !rd   c                   @   s\   e Zd ZdZdZdZeedZdeee	dddd	Z
eed
ddZeeeddddZdS )Matchera  
    This class assigns to each predicted "element" (e.g., a box) a ground-truth
    element. Each predicted element will have exactly zero or one matches; each
    ground-truth element may be assigned to zero or more predicted elements.

    Matching is based on the MxN match_quality_matrix, that characterizes how well
    each (ground-truth, predicted)-pair match. For example, if the elements are
    boxes, the matrix may contain box IoU overlap values.

    The matcher returns a tensor of size N containing the index of the ground-truth
    element m that matches to prediction n. If there is no match, a negative value
    is returned.
    rL   )BELOW_LOW_THRESHOLDBETWEEN_THRESHOLDSFN)high_thresholdlow_thresholdallow_low_quality_matchesr   c                 C   s2   d| _ d| _t||kd || _|| _|| _dS )a  
        Args:
            high_threshold (float): quality values greater than or equal to
                this value are candidate matches.
            low_threshold (float): a lower quality threshold used to stratify
                matches into three levels:
                1) matches >= high_threshold
                2) BETWEEN_THRESHOLDS matches in [low_threshold, high_threshold)
                3) BELOW_LOW_THRESHOLD matches in [0, low_threshold)
            allow_low_quality_matches (bool): if True, produce additional matches
                for predictions that have only low-quality match candidates. See
                set_low_quality_matches_ for more details.
        rL   rh   z)low_threshold should be <= high_thresholdN)ri   rj   r   rN   rk   rl   rm   )r   rk   rl   rm   r   r   r   r   G  s    zMatcher.__init__match_quality_matrixr   c                 C   s   |  dkr,|jd dkr$tdntd|jdd\}}| jrL| }nd}|| jk }|| jk|| jk @ }| j||< | j	||< | jr|dkrt
dd n| ||| |S )a  
        Args:
            match_quality_matrix (Tensor[float]): an MxN tensor, containing the
            pairwise quality between M ground-truth elements and N predicted elements.

        Returns:
            matches (Tensor[int64]): an N tensor where N[i] is a matched gt in
            [0, M - 1] or a negative value indicating that prediction i could not
            be matched.
        r   zENo ground-truth boxes available for one of the images during trainingzANo proposal boxes available for one of the images during trainingr0   NFzall_matches should not be None)r    shape
ValueErrorrV   rm   clonerl   rk   ri   rj   r   rN   set_low_quality_matches_)r   ro   Zmatched_valsmatchesall_matchesZbelow_low_thresholdZbetween_thresholdsr   r   r   r$   \  s"    




zMatcher.__call__)rt   ru   ro   r   c                 C   sB   |j dd\}}t||dddf k}|d }|| ||< dS )ax  
        Produce additional matches for predictions that have only low-quality matches.
        Specifically, for each ground-truth find the set of predictions that have
        maximum overlap with it (including ties); for each prediction in that set, if
        it is unmatched, then match it to the ground-truth with which it has the highest
        quality value.
        r   r0   N)rV   r   r   )r   rt   ru   ro   Zhighest_quality_foreach_gt_Z gt_pred_pairs_of_highest_qualityZpred_inds_to_updater   r   r   rs     s    	z Matcher.set_low_quality_matches_)F)r%   r&   r'   r(   ri   rj   r   __annotations__r)   rf   r   r   r$   rs   r   r   r   r   rg   0  s   (rg   c                       s8   e Zd Zedd fddZeed fddZ  ZS )
SSDMatcherN)	thresholdr   c                    s   t  j||dd d S )NF)rm   )superr   )r   ry   	__class__r   r   r     s    zSSDMatcher.__init__rn   c                    s>   t  |}|jdd\}}tj|dtj|jd||< |S )Nr   r0   r   rF   )rz   r$   rV   r   ZarangerK   Zint64r   )r   ro   rt   rv   Zhighest_quality_pred_foreach_gtr{   r   r   r$     s      
zSSDMatcher.__call__)r%   r&   r'   r)   r   r   r$   __classcell__r   r   r{   r   rx     s   rx   )modelepsr   c                 C   s"   |   D ]}t|tr||_qdS )a  
    This method overwrites the default eps values of all the
    FrozenBatchNorm2d layers of the model with the provided value.
    This is necessary to address the BC-breaking change introduced
    by the bug-fix at pytorch/vision#2933. The overwrite is applied
    only when the pretrained weights are loaded to maintain compatibility
    with previous versions.

    Args:
        model (nn.Module): The model on which we perform the overwrite.
        eps (float): The new value of eps.
    N)modulesrO   r
   r   )r~   r   moduler   r   r   overwrite_eps  s    
r   )r~   rK   r   c              	   C   s   | j }|   t f t|  j}tjdd|d |d f|d}| |}t|tj	rft
d|fg}dd | D }W 5 Q R X |r|   |S )a  
    This method retrieves the number of output channels of a specific model.

    Args:
        model (nn.Module): The model for which we estimate the out_channels.
            It should return a single Tensor or an OrderedDict[Tensor].
        size (Tuple[int, int]): The size (wxh) of the input.

    Returns:
        out_channels (List[int]): A list of the output channels of the model.
    r   r.   r   r   0c                 S   s   g | ]}| d qS )r   rJ   )r?   xr   r   r   rA     s     z)retrieve_out_channels.<locals>.<listcomp>)Ztrainingevalr   Zno_gradnext
parametersr   zerosrO   r   r   valuesZtrain)r~   rK   Zin_trainingr   Ztmp_imgfeaturesZout_channelsr   r   r   retrieve_out_channels  s    
r   )vr   c                 C   s   | S )Nr   )r   r   r   r   _fake_cast_onnx  s    r   )input	orig_kvalaxisr   c                 C   sZ   t j st|| |S t | | d}t t t j|g|j	d|fd}t
|S )aw  
    ONNX spec requires the k-value to be less than or equal to the number of inputs along
    provided dim. Certain models use the number of elements along a particular axis instead of K
    if K exceeds the number of elements along that axis. Previously, python's min() function was
    used to determine whether to use the provided k-value or the specified dim axis value.

    However in cases where the model is being exported in tracing mode, python min() is
    static causing the model to be traced incorrectly and eventually fail at the topk node.
    In order to avoid this situation, in tracing mode, torch.min() is used instead.

    Args:
        input (Tensor): The orignal input tensor.
        orig_kval (int): The provided k-value.
        axis(int): Axis along which we retreive the input size.

    Returns:
        min_kval (int): Appropriately selected k-value.
    r   r   )r   jit
is_tracingr   rK   Z_shape_as_tensorr1   r3   rZ   r   r   )r   r   r   Zaxis_dim_valZmin_kvalr   r   r   	_topk_min  s
    
$r   )type	box_coderanchors_per_imagematched_gt_boxes_per_imagebbox_regression_per_imagecnfr   c           
      C   s   t | dkd|   | dkr:|||}tj||ddS | dkr||||}|d k	rfd|krf|d nd}tj||d|d	S |||}|d k	rd
|kr|d
 nd}	| dkrt||d|	dS | dkrt||d|	dS t	||d|	dS d S )N)l1	smooth_l1cioudiouZgiouzUnsupported loss: r   sum)	reductionr   betag      ?)r   r   r   gHz>r   )r   r   r   )
r   rN   rB   FZl1_lossZsmooth_l1_lossrR   r   r   r   )
r   r   r   r   r   r   Ztarget_regressionr   Zbbox_per_imager   r   r   r   	_box_loss  s    r   )N)%rc   collectionsr   typingr   r   r   r   r   r   r   Ztorch.nnr	   r   Ztorchvision.opsr
   r   r   r   r   r   Z_script_if_tracingr9   r:   rd   rg   rx   Moduler)   r   r   r   Zunusedr   r   strr   r   r   r   r   <module>   s8   ?/iMr   