U
    (d                     @   s&  d dl mZmZmZmZ d dlZd dlm  mZ	 d dl
Z
d dlmZmZ d dlmZmZ ddlmZ dd Zd	d
 Zdd Zdd Zdd Zdd Zejjdd Zdd Zdd Zdd Zdd Zdd Z ejj!dd  Z"d!d" Z#d#d$ Z$d%d& Z%ejjd'd( Z&d-d)d*Z'G d+d, d,ej(Z)dS ).    )OptionalListDictTupleN)nnTensor)boxes	roi_align   )_utilsc           
      C   s   t j|dd}t j|dd}t| |}t |dkd }|| }| j\}}|||dd d}tj|||f || ddd}	|	|	  }	||	fS )a  
    Computes the loss for Faster R-CNN.

    Args:
        class_logits (Tensor)
        box_regression (Tensor)
        labels (list[BoxList])
        regression_targets (Tensor)

    Returns:
        classification_loss (Tensor)
        box_loss (Tensor)
    r   Zdim   gqq?sum)betaZ	reduction)
torchcatFcross_entropywhereshapereshapesizeZsmooth_l1_lossnumel)
class_logitsbox_regressionlabelsregression_targetsZclassification_lossZsampled_pos_inds_subsetZ
labels_posNnum_classesZbox_loss r    J/tmp/pip-unpacked-wheel-vx7f76es/torchvision/models/detection/roi_heads.pyfastrcnn_loss   s    

r"   c                 C   sd   |   }| jd }dd |D }t|}tj||jd}|||f dddf }|j|dd}|S )a  
    From the results of the CNN, post process the masks
    by taking the mask corresponding to the class with max
    probability (which are of fixed size and directly output
    by the CNN) and return the masks in the mask field of the BoxList.

    Args:
        x (Tensor): the mask logits
        labels (list[BoxList]): bounding boxes that are used as
            reference, one for ech image

    Returns:
        results (list[BoxList]): one BoxList for each image, containing
            the extra field mask
    r   c                 S   s   g | ]}|j d  qS r   r   ).0labelr    r    r!   
<listcomp>I   s     z&maskrcnn_inference.<locals>.<listcomp>deviceNr   )Zsigmoidr   r   r   aranger)   split)xr   	mask_probZ	num_masksboxes_per_imageindexr    r    r!   maskrcnn_inference4   s    

r0   c                 C   s\   | |}tj|dddf |gdd}| dddf  |} t| |||fddddf S )a%  
    Given segmentation masks and the bounding boxes corresponding
    to the location of the masks in the image, this function
    crops and resizes the masks in the position defined by the
    boxes. This prepares the masks for them to be fed to the
    loss computation as the targets.
    Nr
   r   g      ?r   )tor   r   r	   )gt_masksr   matched_idxsMroisr    r    r!   project_masks_on_boxesR   s    	
r6   c                    s   | j d  dd t||D } fddt|||D }tj|dd}tj|dd}| dkrl|  d S t| tj|j d |j	d|f |}|S )z
    Args:
        proposals (list[BoxList])
        mask_logits (Tensor)
        targets (list[BoxList])

    Return:
        mask_loss (Tensor): scalar tensor containing the loss
    r   c                 S   s   g | ]\}}|| qS r    r    )r%   Zgt_labelZidxsr    r    r!   r'   n   s     z!maskrcnn_loss.<locals>.<listcomp>c                    s    g | ]\}}}t ||| qS r    )r6   )r%   mpidiscretization_sizer    r!   r'   o   s    r   r   r(   )
r   zipr   r   r   r   r   Z binary_cross_entropy_with_logitsr*   r)   )mask_logits	proposalsr2   	gt_labelsZmask_matched_idxsr   Zmask_targetsZ	mask_lossr    r:   r!   maskrcnn_lossa   s    


 r@   c                 C   s  |d d df }|d d df }||d d df |d d df   }||d d df |d d df   }|d d d f }|d d d f }|d d d f }|d d d f }| d }| d }||d d df d d d f k}	||d d df d d d f k}
|| | }|   }|| | }|   }|d ||	< |d ||
< |dk|dk@ ||k @ ||k @ }| d dk}||@  }|| | }|| }||fS )Nr   r
         ).r   ).r
   ).rA   )floorlong)	keypointsr5   Zheatmap_sizeoffset_xoffset_yZscale_xZscale_yr,   yZx_boundary_indsZy_boundary_indsZ	valid_locZvisvalidZlin_indheatmapsr    r    r!   keypoints_to_heatmap   s0    $$   rK   c                 C   s  t j| dt jd}|| }	|| }
tj|d d d f t|t|fdddd d df }t j|dt jd}||djdd	}|| }|| | }t j	d
t j
d|jt j
d |	jt j
d }t j	d
t j
d|jt j
d |
jt j
d }||jt j
d }||jt j
d }t j|jt j
d}t |jt j
d|jt j
d|jt j
dgd}|| | d }t |}|jt jd| }|d|jt jdd|jt jddd|jt jd}||fS )Nr
   dtypebicubicFr   modeZalign_cornersr   rA   r   r         ?)r   scalar_tensorr   int64r   interpolateintr   argmaxtensorfloat32r1   onesr   stackr*   Zindex_selectview)mapsZmaps_iroi_map_widthroi_map_heightZwidths_iZ	heights_iZ
offset_x_iZ
offset_y_inum_keypointswidth_correctionheight_correctionroi_mapwposx_inty_intr,   rH   Zxy_preds_i_0Zxy_preds_i_1Zxy_preds_i_2
xy_preds_ibaseindend_scores_ir    r    r!   _onnx_heatmaps_to_keypoints   sZ       
  

  rk   c	                 C   s   t jddt|ft j| jd}	t jdt|ft j| jd}
tt|dD ]}t| | | || || || || || || \}}t |	j	t jd|
dj	t jdfd}	t |
j	t jd|j	t jd
dfd}
qL|	|
fS )Nr   rB   rM   r)   rL   )r   zerosrU   rX   r)   ranger   rk   r   r1   	unsqueeze)r\   r5   widths_ceilheights_ceilwidthsheightsrF   rG   r_   xy_preds
end_scoresr9   rg   rj   r    r    r!    _onnx_heatmaps_to_keypoints_loop   s&           *  rv   c                 C   sf  |dddf }|dddf }|dddf |dddf  }|dddf |dddf  }|j dd}|j dd}| }| }| jd }t rt| |||||||tj|tjd	\}	}
|		ddd|
fS tj
t|d|ftj| jd}	tj
t||ftj| jd}
tt|D ]6}t||  }t||  }|| | }|| | }tj| | dddf ||fd	d
ddddf }|jd }||djdd}|| }tj|| |dd}| d | }| d | }|||  |	|dddf< |||  |	|dddf< d|	|dddf< |tj||jd||f |
|ddf< q|		ddd|
fS )zExtract predicted keypoint locations from heatmaps. Output has shape
    (#rois, 4, #keypoints) with the 4 rows corresponding to (x, y, logit, prob)
    for each keypoint.
    Nr   r
   rA   rB   minrL   rl   rN   FrO   r   r   rC   )Zrounding_moderQ   r(   )clampceilr   torchvision_is_tracingrv   r   rR   rS   Zpermuterm   lenrX   r)   rn   rU   itemr   rT   r   rV   divfloatr*   )r\   r5   rF   rG   rr   rs   rp   rq   r_   rt   ru   r9   r]   r^   r`   ra   rb   rc   rd   re   rf   r,   rH   r    r    r!   heatmaps_to_keypoints   s`    
  
   

*r   c                 C   s
  | j \}}}}||kr*td| d| |}g }	g }
t|||D ]B\}}}|| }t|||\}}|	|d |
|d qBtj|	dd}tj|
ddjtj	d}
t
|
d }
| dkst|
dkr|  d S | || || } t| |
 ||
 }|S )Nz_keypoint_logits height and width (last two elements of shape) should be equal. Instead got H = z	 and W = r   r   r   rL   )r   
ValueErrorr<   rK   appendr[   r   r   r1   Zuint8r   r   r}   r   r   r   )keypoint_logitsr>   gt_keypointsZkeypoint_matched_idxsr   KHWr;   rJ   rI   Zproposals_per_imageZgt_kp_in_imageZmidxZkpZheatmaps_per_imageZvalid_per_imageZkeypoint_targetsZkeypoint_lossr    r    r!   keypointrcnn_loss*  s*    r   c           
      C   sb   g }g }dd |D }| j |dd}t||D ]*\}}t||\}}	|| ||	 q.||fS )Nc                 S   s   g | ]}| d qS r#   )r   )r%   boxr    r    r!   r'   N  s     z*keypointrcnn_inference.<locals>.<listcomp>r   r   )r+   r<   r   r   )
r,   r   Zkp_probs	kp_scoresr.   Zx2ZxxZbbZkp_probscoresr    r    r!   keypointrcnn_inferenceI  s    
r   c                 C   s   | d d df | d d df  d }| d d df | d d df  d }| d d df | d d df  d }| d d df | d d df  d }|j tjd| }|j tjd| }|| }|| }|| }|| }	t||||	fd}
|
S )NrA   r   rQ   rB   r
   rL   )r1   r   rX   rZ   )r   scalew_halfh_halfx_cy_cZ
boxes_exp0Z
boxes_exp1Z
boxes_exp2Z
boxes_exp3	boxes_expr    r    r!   _onnx_expand_boxesY  s    $$$$r   c                 C   s  t  rt| |S | d d df | d d df  d }| d d df | d d df  d }| d d df | d d df  d }| d d df | d d df  d }||9 }||9 }t| }|| |d d df< || |d d df< || |d d df< || |d d df< |S )NrA   r   rQ   rB   r
   )r{   r|   r   r   Z
zeros_like)r   r   r   r   r   r   r   r    r    r!   expand_boxesn  s    
$$$$
r   c                 C   s,   t | d|  t jt | t j S )NrA   )r   rW   r1   rX   )r4   paddingr    r    r!   expand_masks_tracing_scale  s    r   c                 C   sN   | j d }tj r t||}nt|d|  | }t| |fd }||fS )Nr   rA   r   )r   r   Z_CZ_get_tracing_stater   r   r   pad)maskr   r4   r   Zpadded_maskr    r    r!   expand_masks  s    

r   c                 C   s  d}t |d |d  | }t |d |d  | }t|d}t|d}| d} tj| ||fddd} | d d } tj||f| j| jd	}t|d d}t	|d d |}	t|d d}
t	|d d |}| |
|d  ||d  ||d  |	|d  f ||
|||	f< |S )
Nr
   rA   r   rB   )r
   r
   r   r   bilinearFrO   rl   )
rU   maxexpandr   rT   r   rm   rM   r)   rx   )r   r   im_him_wZ	TO_REMOVErc   him_maskx_0x_1y_0y_1r    r    r!   paste_mask_in_image  s    


Dr   c                 C   s  t jdt jd}t jdt jd}|d |d  | }|d |d  | }t t ||f}t t ||f}| dd| d| df} tj	| t
|t
|fddd} | d d } t t |d d|f}t t |d d| |df}	t t |d d|f}
t t |d d| |df}| |
|d  ||d  ||d  |	|d  f }t |
|d}t || |d}t ||jt jd|fdd|d d f }t |d|}t |d||	 }t |||fdd d d |f }|S )	Nr
   rL   rA   r   rB   r   FrO   )r   rY   rS   rm   r   r   r   r   r   rT   rU   ro   rx   r1   rX   )r   r   r   r   ZoneZzerorc   r   r   r   r   r   Zunpaded_im_maskZzeros_y0Zzeros_y1Zconcat_0Zzeros_x0Zzeros_x1r   r    r    r!   _onnx_paste_mask_in_image  s*    ((4,"r   c                 C   sX   t d||}t| dD ]6}t| | d || ||}|d}t ||f}q|S Nr   )r   rm   rn   r   r   ro   r   )masksr   r   r   Z
res_appendr9   Zmask_resr    r    r!   _onnx_paste_masks_in_image_loop  s    
r   c                    s   t | |d\} }t||jtjd}|\ t rft| |tj tjdtjtjdd d d f S  fddt	| |D }t
|dkrtj|ddd d d f }n| dd f}|S )N)r   rL   c                    s"   g | ]\}}t |d  | qS r#   )r   )r%   r7   br   r   r    r!   r'     s     z(paste_masks_in_image.<locals>.<listcomp>r   r   r
   )r   r   r1   r   rS   r{   r|   r   rR   r<   r}   rZ   Z	new_empty)r   r   Z	img_shaper   r   resretr    r   r!   paste_masks_in_image  s"       r   c                       sz   e Zd ZejejejdZd fdd	Zdd Z	dd Z
d	d
 Zdd Zdd Zdd Zdd Zdd ZdddZ  ZS )RoIHeads)	box_coderproposal_matcherfg_bg_samplerNc                    s   t    tj| _tj||dd| _t||| _	|d kr>d}t
|| _|| _|| _|| _|	| _|
| _|| _|| _|| _|| _|| _|| _|| _d S )NF)Zallow_low_quality_matches)      $@r         @r   )super__init__box_opsbox_iouZbox_similarity	det_utilsMatcherr   BalancedPositiveNegativeSamplerr   BoxCoderr   box_roi_poolbox_headbox_predictorscore_thresh
nms_threshdetections_per_imgmask_roi_pool	mask_headmask_predictorkeypoint_roi_poolkeypoint_headkeypoint_predictor)selfr   r   r   Zfg_iou_threshZbg_iou_threshZbatch_size_per_imageZpositive_fractionZbbox_reg_weightsr   r   r   r   r   r   r   r   r   	__class__r    r!   r     s&    
zRoIHeads.__init__c                 C   s.   | j d krdS | jd krdS | jd kr*dS dS NFT)r   r   r   r   r    r    r!   has_mask&  s    


zRoIHeads.has_maskc                 C   s.   | j d krdS | jd krdS | jd kr*dS dS r   )r   r   r   r   r    r    r!   has_keypoint/  s    


zRoIHeads.has_keypointc                 C   s   g }g }t |||D ]\}}}| dkrf|j}	tj|jd ftj|	d}
tj|jd ftj|	d}n`t||}| 	|}|j
dd}
||
 }|jtjd}|| j	jk}d||< || j	jk}d||< ||
 || q||fS )Nr   rl   rw   rL   r   )r<   r   r)   r   rm   r   rS   r   r   r   ry   r1   ZBELOW_LOW_THRESHOLDZBETWEEN_THRESHOLDSr   )r   r>   gt_boxesr?   r3   r   Zproposals_in_imagegt_boxes_in_imageZgt_labels_in_imager)   Zclamped_matched_idxs_in_imageZlabels_in_imageZmatch_quality_matrixZmatched_idxs_in_imageZbg_indsZignore_indsr    r    r!   assign_targets_to_proposals8  s.    
  

z$RoIHeads.assign_targets_to_proposalsc           	      C   sN   |  |\}}g }tt||D ](\}\}}t||B d }|| q |S r   )r   	enumerater<   r   r   r   )	r   r   Zsampled_pos_indsZsampled_neg_indssampled_indsZimg_idxZpos_inds_imgZneg_inds_imgimg_sampled_indsr    r    r!   	subsample[  s    zRoIHeads.subsamplec                 C   s   dd t ||D }|S )Nc                 S   s   g | ]\}}t ||fqS r    )r   r   )r%   ZproposalZgt_boxr    r    r!   r'   f  s     z-RoIHeads.add_gt_proposals.<locals>.<listcomp>)r<   )r   r>   r   r    r    r!   add_gt_proposalsd  s    zRoIHeads.add_gt_proposalsc                 C   sj   |d krt dtdd |D s*t dtdd |D sDt d|  rftdd |D sft dd S )	Ntargets should not be Nonec                 S   s   g | ]}d |kqS r   r    r%   tr    r    r!   r'   n  s     z*RoIHeads.check_targets.<locals>.<listcomp>z0Every element of targets should have a boxes keyc                 S   s   g | ]}d |kqS r   r    r   r    r    r!   r'   p  s     z1Every element of targets should have a labels keyc                 S   s   g | ]}d |kqS r   r    r   r    r    r!   r'   s  s     z0Every element of targets should have a masks key)r   allr   )r   targetsr    r    r!   check_targetsj  s    zRoIHeads.check_targetsc                    s  |  | |d krtd|d j |d j} fdd|D }dd |D }| ||}| |||\}}| |}g }	t|}
t|
D ]r}|| }|| | ||< || | ||< || | ||< || }|	 dkrt
jd |d}|	|||   q| j|	|}||||fS )Nr   r   c                    s   g | ]}|d    qS r   )r1   r   rL   r    r!   r'     s     z4RoIHeads.select_training_samples.<locals>.<listcomp>c                 S   s   g | ]}|d  qS r   r    r   r    r    r!   r'     s     )r
   r   rl   )r   r   rM   r)   r   r   r   r}   rn   r   r   rm   r   r   encode)r   r>   r   r)   r   r?   r3   r   r   Zmatched_gt_boxes
num_imagesimg_idr   r   r   r    rL   r!   select_training_samplesv  s.    



z RoIHeads.select_training_samplesc                 C   s  |j }|jd }dd |D }| j||}t|d}	||d}
|	|d}g }g }g }t|
||D ]F\}}}t	||}t
j||d}|dd|}|d d dd f }|d d dd f }|d d dd f }|dd}|d}|d}t
|| jkd }|| || ||   }}}tj|dd	}|| || ||   }}}t|||| j}|d | j }|| || ||   }}}|| || || qh|||fS )
Nr   c                 S   s   g | ]}|j d  qS r#   r$   )r%   Zboxes_in_imager    r    r!   r'     s     z3RoIHeads.postprocess_detections.<locals>.<listcomp>r   r(   r
   r   g{Gz?)Zmin_size)r)   r   r   decoder   Zsoftmaxr+   r<   r   Zclip_boxes_to_imager   r*   r[   Z	expand_asr   r   r   Zremove_small_boxesZbatched_nmsr   r   r   )r   r   r   r>   image_shapesr)   r   r.   Z
pred_boxesZpred_scoresZpred_boxes_listZpred_scores_listZ	all_boxesZ
all_scoresZ
all_labelsr   r   Zimage_shaper   ZindsZkeepr    r    r!   postprocess_detections  s>    




zRoIHeads.postprocess_detectionsc           ,      C   sf  |dk	r|D ]z}t jt jt jf}|d j|krBtd|d j |d jt jksZtd|  r|d jt jkstd|d j q| j	r| 
||\}}}}	nd}d}	d}| |||}
| |
}
| |
\}}g }i }| j	r&|dkrtd|	dkrtd	t||||	\}}||d
}nL| ||||\}}}t|}t|D ]$}||| || || d qL|  rdd |D }| j	r |dkrtdt|}g }g }t|D ]@}t || dkd }||| |  ||| |  qnd}| jdk	r4| |||}| |}| |}ntdi }| j	r|dksf|dksf|dkrntddd |D }dd |D }t|||||}d|i}n6dd |D }t||}t||D ]\} }!| |!d< q|| | jdk	r^| jdk	r^| j dk	r^dd |D }"| j	rt|}g }"g }|dkr@tdt|D ]@}t || dkd }|"|| |  ||| |  qHnd}| ||"|}#| |#}#|  |#}$i }%| j	r |dks|dkrtddd |D }&t!|$|"|&|}'d|'i}%nT|$dks|"dkrtdt"|$|"\}(})t|(|)|D ]\}*}+}!|*|!d< |+|!d< q6||% ||fS )z
        Args:
            features (List[Tensor])
            proposals (List[Tensor[N, 4]])
            image_shapes (List[Tuple[H, W]])
            targets (List[Dict])
        Nr   z-target boxes must of float type, instead got r   zAtarget labels must of int64 type, instead got {t['labels'].dtype}rE   z1target keypoints must of float type, instead got zlabels cannot be Nonez!regression_targets cannot be None)loss_classifierloss_box_reg)r   r   r   c                 S   s   g | ]}|d  qS r   r    r%   r8   r    r    r!   r'     s     z$RoIHeads.forward.<locals>.<listcomp>z0if in trainning, matched_idxs should not be Noner   z%Expected mask_roi_pool to be not NonezCtargets, pos_matched_idxs, mask_logits cannot be None when trainingc                 S   s   g | ]}|d  qS r   r    r   r    r    r!   r'   /  s     c                 S   s   g | ]}|d  qS r   r    r   r    r    r!   r'   0  s     	loss_maskc                 S   s   g | ]}|d  qS r   r    )r%   rr    r    r!   r'   4  s     r   c                 S   s   g | ]}|d  qS r   r    r   r    r    r!   r'   B  s     zJboth targets and pos_matched_idxs should not be None when in training modec                 S   s   g | ]}|d  qS )rE   r    r   r    r    r!   r'   [  s     loss_keypointzXboth keypoint_logits and keypoint_proposals should not be None when not in training modeZkeypoints_scores)#r   r   doubleZhalfrM   	TypeErrorrS   r   rX   Ztrainingr   r   r   r   r   r"   r   r}   rn   r   r   r   r   r   r   	Exceptionr@   r0   r<   updater   r   r   r   r   ),r   featuresr>   r   r   r   Zfloating_point_typesr3   r   r   Zbox_featuresr   r   resultZlossesr   r   r   r   r   r9   Zmask_proposalsZpos_matched_idxsr   rd   Zmask_featuresr=   r   r2   r?   Zrcnn_loss_maskZmasks_probsr-   r   Zkeypoint_proposalsZkeypoint_featuresr   r   r   Zrcnn_loss_keypointZkeypoints_probsr   Zkeypoint_probZkpsr    r    r!   forward  s    










   

zRoIHeads.forward)NNNNNN)N)__name__
__module____qualname__r   r   r   r   __annotations__r   r   r   r   r   r   r   r   r   r   __classcell__r    r    r   r!   r     s*         3		#	&@ r   )r
   )*typingr   r   r   r   r   Ztorch.nn.functionalr   Z
functionalr   r{   r   Ztorchvision.opsr   r   r	    r   r   r"   r0   r6   r@   rK   rk   ZjitZ_script_if_tracingrv   r   r   r   r   r   Zunusedr   r   r   r   r   r   Moduler   r    r    r    r!   <module>   s8   ( $3
@
$
	
