U
    9%eD                     @   s&  d Z ddlmZmZmZmZ ddlZddlmZ ddlm	Z	 ddl
mZ ddlmZ dd	lmZ dd
lmZmZmZ ddlmZ ddlmZ dgZdZG dd dejZG dd dejZG dd dejZG dd dejZG dd dejZG dd deZ dZ!dZ"ede!G dd  d e Z#dS )!zs PyTorch UperNet model. Based on OpenMMLab's implementation, found in https://github.com/open-mmlab/mmsegmentation.    )ListOptionalTupleUnionN)nn)CrossEntropyLoss   )AutoBackbone)SemanticSegmenterOutput)PreTrainedModel)add_start_docstrings%add_start_docstrings_to_model_forwardreplace_return_docstrings)BackboneMixin   )UperNetConfigzopenmmlab/upernet-convnext-tinyr   c                       sz   e Zd ZdZdeeeeeeef f eeeeef ef eeeeeef f dd fddZ	e
je
jd	d
dZ  ZS )UperNetConvModulez
    A convolutional block that bundles conv/norm/activation layers. This block simplifies the usage of convolution
    layers, which are commonly used with a norm layer (e.g., BatchNorm) and activation layer (e.g., ReLU).
    r   Fr   N)in_channelsout_channelskernel_sizepaddingbiasdilationreturnc                    s<   t    tj||||||d| _t|| _t | _d S )N)r   r   r   r   r   r   )	super__init__r   Conv2dconvZBatchNorm2d
batch_normZReLU
activation)selfr   r   r   r   r   r   	__class__ k/var/www/html/Darija-Ai-API/env/lib/python3.8/site-packages/transformers/models/upernet/modeling_upernet.pyr   .   s    	
zUperNetConvModule.__init__inputr   c                 C   s"   |  |}| |}| |}|S N)r   r   r   )r    r&   outputr#   r#   r$   forwardC   s    


zUperNetConvModule.forward)r   Fr   )__name__
__module____qualname____doc__intr   r   strboolr   torchTensorr)   __classcell__r#   r#   r!   r$   r   (   s   
   r   c                       s<   e Zd Zeeedd fddZejejdddZ  ZS )UperNetPyramidPoolingBlockN)
pool_scaler   channelsr   c                    sL   t    t|t||ddg| _t| jD ]\}}| t|| q.d S )Nr   r   )	r   r   r   ZAdaptiveAvgPool2dr   layers	enumerate
add_moduler/   )r    r5   r   r6   ilayerr!   r#   r$   r   L   s    
z#UperNetPyramidPoolingBlock.__init__r%   c                 C   s   |}| j D ]}||}q
|S r'   )r8   )r    r&   Zhidden_stater<   r#   r#   r$   r)   U   s    

z"UperNetPyramidPoolingBlock.forward)	r*   r+   r,   r.   r   r1   r2   r)   r3   r#   r#   r!   r$   r4   K   s   	r4   c                       sN   e Zd ZdZeedf eeedd fddZej	e
ej	 ddd	Z  ZS )
UperNetPyramidPoolingModulea}  
    Pyramid Pooling Module (PPM) used in PSPNet.

    Args:
        pool_scales (`Tuple[int]`):
            Pooling scales used in Pooling Pyramid Module.
        in_channels (`int`):
            Input channels.
        channels (`int`):
            Channels after modules, before conv_seg.
        align_corners (`bool`):
            align_corners argument of F.interpolate.
    .N)pool_scalesr   r6   align_cornersr   c                    sh   t    || _|| _|| _|| _g | _t|D ]2\}}t|||d}| j	| | 
t|| q0d S )N)r5   r   r6   )r   r   r>   r?   r   r6   blocksr9   r4   appendr:   r/   )r    r>   r   r6   r?   r;   r5   blockr!   r#   r$   r   k   s    
z$UperNetPyramidPoolingModule.__init__)xr   c                 C   sH   g }| j D ]8}||}tjj|| dd  d| jd}|| q
|S )N   bilinearsizemoder?   )r@   r   
functionalinterpolaterG   r?   rA   )r    rC   Zppm_outsppmZppm_outZupsampled_ppm_outr#   r#   r$   r)   w   s    
   z#UperNetPyramidPoolingModule.forward)r*   r+   r,   r-   r   r.   r0   r   r1   r2   r   r)   r3   r#   r#   r!   r$   r=   \   s   "r=   c                       sL   e Zd ZdZ fddZdd Zdd Zdd	 Zej	ej	d
ddZ
  ZS )UperNetHeadz
    Unified Perceptual Parsing for Scene Understanding. This head is the implementation of
    [UPerNet](https://arxiv.org/abs/1807.10221).
    c                    s  t    || _|j| _|| _|j| _d| _tj	| j|j
dd| _t| j| jd | j| jd| _t| jd t| j| j  | jddd| _t | _t | _| jd d D ]@}t|| jdd}t| j| jddd}| j| | j| qtt| j| j | jddd| _d S )NFr   r7   )r?   r   r   r   )r   r   configr>   r   Zhidden_sizer6   r?   r   r   
num_labels
classifierr=   psp_modulesr   len
bottleneckZ
ModuleListlateral_convs	fpn_convsrA   fpn_bottleneck)r    rO   r   Zl_convZfpn_convr!   r#   r$   r      s@    


zUperNetHead.__init__c                 C   s   |  | j d S r'   apply_init_weightsr    r#   r#   r$   init_weights   s    zUperNetHead.init_weightsc                 C   s<   t |tjr8|jjjd| jjd |jd k	r8|jj	  d S Ng        )ZmeanZstd

isinstancer   r   weightdataZnormal_rO   Zinitializer_ranger   Zzero_r    moduler#   r#   r$   rZ      s    
zUperNetHead._init_weightsc                 C   s:   |d }|g}| | | tj|dd}| |}|S )NrM   r   dim)extendrR   r1   catrT   )r    inputsrC   Zpsp_outsr(   r#   r#   r$   psp_forward   s    
zUperNetHead.psp_forwardencoder_hidden_statesr   c                    s   fddt jD   t}t|d ddD ]H}|d  jdd  }|d  tjj	| |dj
d |d < q@fd	dt|d D }|d  t|d ddD ]0}tjj	|| |d jdd  dj
d||< qtj|dd
}|}|}|S )Nc                    s   g | ]\}}| | qS r#   r#   ).0r;   Zlateral_conv)rk   r#   r$   
<listcomp>   s     z'UperNetHead.forward.<locals>.<listcomp>r   r   rM   rD   rE   rF   c                    s   g | ]}j |  | qS r#   )rV   )rl   r;   )lateralsr    r#   r$   rm      s     rd   )r9   rU   rA   ri   rS   rangeshaper   rI   rJ   r?   r1   rg   rW   rQ   )r    rk   Zused_backbone_levelsr;   Z
prev_shapeZfpn_outsr(   r#   )rk   rn   r    r$   r)      s0          

zUperNetHead.forward)r*   r+   r,   r-   r   r\   rZ   ri   r1   r2   r)   r3   r#   r#   r!   r$   rL      s   '	rL   c                       sb   e Zd ZdZdeeeeeeef f dd fddZd	d
 Zdd Z	e
je
jdddZ  ZS )UperNetFCNHeada  
    Fully Convolution Networks for Semantic Segmentation. This head is the implementation of
    [FCNNet](https://arxiv.org/abs/1411.4038>).

    Args:
        config:
            Configuration.
        in_channels (int):
            Number of input channels.
        kernel_size (int):
            The kernel size for convs in the head. Default: 3.
        dilation (int):
            The dilation rate for convs in the head. Default: 1.
    rD   r   r   N)in_indexr   r   r   c              
      s   t    || _|j| _|j| _|j| _|j	| _
|| _|d | }g }|t| j| j|||d t| jd D ] }|t| j| j|||d qp| jdkrt | _ntj| | _| j
rt| j| j | j||d d| _tj| j|jdd| _d S )NrD   )r   r   r   r   r   rN   r7   )r   r   rO   Zauxiliary_in_channelsr   Zauxiliary_channelsr6   Zauxiliary_num_convsZ	num_convsZauxiliary_concat_inputconcat_inputrr   rA   r   ro   r   ZIdentityconvsZ
Sequentialconv_catr   rP   rQ   )r    rO   rr   r   r   Zconv_paddingrt   r;   r!   r#   r$   r      sN    
        

   zUperNetFCNHead.__init__c                 C   s   |  | j d S r'   rX   r[   r#   r#   r$   r\     s    zUperNetFCNHead.init_weightsc                 C   s<   t |tjr8|jjjd| jjd |jd k	r8|jj	  d S r]   r^   rb   r#   r#   r$   rZ     s    
zUperNetFCNHead._init_weightsrj   c                 C   s@   || j  }| |}| jr2| tj||gdd}| |}|S )Nr   rd   )rr   rt   rs   ru   r1   rg   rQ   )r    rk   hidden_statesr(   r#   r#   r$   r)     s    


zUperNetFCNHead.forward)rD   r   r   )r*   r+   r,   r-   r.   r   r   r   r\   rZ   r1   r2   r)   r3   r#   r#   r!   r$   rq      s          $rq   c                   @   s6   e Zd ZdZeZdZdZdd Zdd Z	dd	d
Z
dS )UperNetPreTrainedModelz
    An abstract class to handle weights initialization and a simple interface for downloading and loading pretrained
    models.
    pixel_valuesTc                 C   s6   t |tr2|j  |j  |jd k	r2|j  d S r'   )r_   rw   backboner\   decode_headauxiliary_headrb   r#   r#   r$   rZ   0  s
    



z$UperNetPreTrainedModel._init_weightsc                 C   s,   | j   | j  | jdk	r(| j  dS )zInitialize the weightsN)ry   r\   rz   r{   r[   r#   r#   r$   r\   7  s    


z#UperNetPreTrainedModel.init_weightsFc                 C   s   t |tr||_d S r'   )r_   r   Zgradient_checkpointing)r    rc   valuer#   r#   r$   _set_gradient_checkpointing>  s    
z2UperNetPreTrainedModel._set_gradient_checkpointingN)F)r*   r+   r,   r-   r   config_classZmain_input_nameZsupports_gradient_checkpointingrZ   r\   r}   r#   r#   r#   r$   rw   &  s   rw   aI  
    Parameters:
    This model is a PyTorch [torch.nn.Module](https://pytorch.org/docs/stable/nn.html#torch.nn.Module) sub-class. Use
    it as a regular PyTorch Module and refer to the PyTorch documentation for all matter related to general usage and
    behavior.
        config ([`UperNetConfig`]): Model configuration class with all the parameters of the model.
            Initializing with a config file does not load the weights associated with the model, only the
            configuration. Check out the [`~PreTrainedModel.from_pretrained`] method to load the model weights.
ax  
    Args:
        pixel_values (`torch.FloatTensor` of shape `(batch_size, num_channels, height, width)`):
            Pixel values. Padding will be ignored by default should you provide it. Pixel values can be obtained using
            [`AutoImageProcessor`]. See [`SegformerImageProcessor.__call__`] for details.
        output_attentions (`bool`, *optional*):
            Whether or not to return the attentions tensors of all attention layers in case the backbone has them. See
            `attentions` under returned tensors for more detail.
        output_hidden_states (`bool`, *optional*):
            Whether or not to return the hidden states of all layers of the backbone. See `hidden_states` under
            returned tensors for more detail.
        return_dict (`bool`, *optional*):
            Whether or not to return a [`~utils.ModelOutput`] instead of a plain tuple.
zMUperNet framework leveraging any vision backbone e.g. for ADE20k, CityScapes.c                       sp   e Zd Z fddZeedeee	dd	e
ej e
e e
e e
ej e
e eeef dddZ  ZS )
UperNetForSemanticSegmentationc                    sL   t  | t|j| _t|| jjd| _|j	r:t
|nd | _|   d S )N)r   )r   r   r	   from_configZbackbone_configry   rL   r6   rz   Zuse_auxiliary_headrq   r{   Z	post_init)r    rO   r!   r#   r$   r   b  s
    z'UperNetForSemanticSegmentation.__init__zbatch_size, sequence_length)output_typer~   N)rx   output_attentionsoutput_hidden_stateslabelsreturn_dictr   c                 C   sp  |dk	r|n| j j}|dk	r |n| j j}|dk	r4|n| j j}| jj|||d}|j}| |}tj	j
||jdd ddd}d}	| jdk	r| |}	tj	j
|	|jdd ddd}	d}
|dk	r| j jdkrtdn<t| j jd	}|||}
|	dk	r||	|}|
| j j| 7 }
|s\|r2|f|dd  }n|f|dd  }|
dk	rX|
f| S |S t|
||j|jd
S )a  
        labels (`torch.LongTensor` of shape `(batch_size, height, width)`, *optional*):
            Ground truth semantic segmentation maps for computing the loss. Indices should be in `[0, ...,
            config.num_labels - 1]`. If `config.num_labels > 1`, a classification loss is computed (Cross-Entropy).

        Returns:

        Examples:
        ```python
        >>> from transformers import AutoImageProcessor, UperNetForSemanticSegmentation
        >>> from PIL import Image
        >>> from huggingface_hub import hf_hub_download

        >>> image_processor = AutoImageProcessor.from_pretrained("openmmlab/upernet-convnext-tiny")
        >>> model = UperNetForSemanticSegmentation.from_pretrained("openmmlab/upernet-convnext-tiny")

        >>> filepath = hf_hub_download(
        ...     repo_id="hf-internal-testing/fixtures_ade20k", filename="ADE_val_00000001.jpg", repo_type="dataset"
        ... )
        >>> image = Image.open(filepath).convert("RGB")

        >>> inputs = image_processor(images=image, return_tensors="pt")

        >>> outputs = model(**inputs)

        >>> logits = outputs.logits  # shape (batch_size, num_labels, height, width)
        >>> list(logits.shape)
        [1, 150, 512, 512]
        ```N)r   r   rD   rE   FrF   r   z/The number of labels should be greater than one)Zignore_index)losslogitsrv   
attentions)rO   Zuse_return_dictr   r   ry   Zforward_with_filtered_kwargsZfeature_mapsrz   r   rI   rJ   rp   r{   rP   
ValueErrorr   Zloss_ignore_indexZauxiliary_loss_weightr
   rv   r   )r    rx   r   r   r   r   outputsfeaturesr   Zauxiliary_logitsr   Zloss_fctZauxiliary_lossr(   r#   r#   r$   r)   n  sR    (  


   




z&UperNetForSemanticSegmentation.forward)NNNNN)r*   r+   r,   r   r   UPERNET_INPUTS_DOCSTRINGformatr   r
   _CONFIG_FOR_DOCr   r1   r2   r0   r   tupler)   r3   r#   r#   r!   r$   r   ]  s    
     
r   )$r-   typingr   r   r   r   r1   r   Ztorch.nnr    r	   Zmodeling_outputsr
   Zmodeling_utilsr   utilsr   r   r   Zutils.backbone_utilsr   Zconfiguration_upernetr   Z%UPERNET_PRETRAINED_MODEL_ARCHIVE_LISTr   Moduler   r4   r=   rL   rq   rw   ZUPERNET_START_DOCSTRINGr   r   r#   r#   r#   r$   <module>   s4   #&]G
