U
    9%e=                     @   sd   d Z ddlmZ ddlmZ eeZdddZG dd deZ	G d	d
 d
eZ
G dd deZdS )z Idefics model configuration   )PretrainedConfig)loggingzEhttps://huggingface.co/HuggingFaceM4/idefics-9b/blob/main/config.jsonzFhttps://huggingface.co/HuggingFaceM4/idefics-80b/blob/main/config.json)zHuggingFaceM4/idefics-9bzHuggingFaceM4/idefics-80bc                       s.   e Zd ZdZdZddiZd fdd	Z  ZS )IdeficsVisionConfiga@
  
    This is the configuration class to store the configuration of a [`IdeficsModel`]. It is used to instantiate an
    Idefics model according to the specified arguments, defining the model architecture. Instantiating a configuration
    with the defaults will yield a similar configuration to that of the Idefics-9B.

    e.g. [HuggingFaceM4/idefics-9b](https://huggingface.co/HuggingFaceM4/idefics-9b)

    Configuration objects inherit from [`PretrainedConfig`] and can be used to control the model outputs. Read the
    documentation from [`PretrainedConfig`] for more information.

    Args:
        hidden_size (`int`, *optional*, defaults to 768):
            Dimensionality of the encoder layers and the pooler layer. (elsewhere referred to as `hidden_size`)
        image_size (`int`, *optional*, defaults to 224):
            The size (resolution) of each image.
        intermediate_size (`int`, *optional*, defaults to 5120):
            Dimensionality of the "intermediate" (i.e., feed-forward) layer in the Transformer encoder.
        patch_size (`int`, *optional*, defaults to 14):
            The size (resolution) of each patch.
        num_hidden_layers (`int`, *optional*, defaults to 32):
            Number of hidden layers in the Transformer encoder.
        num_attention_heads (`int`, *optional*, defaults to 16):
            Number of attention heads for each attention layer in the Transformer encoder.
        image_num_channels (`int`, *optional*, defaults to `3`):
            Number of image channels.
        hidden_act (`str` or `function`, *optional*, defaults to `"gelu"`):
            The non-linear activation function (function or string) in the encoder and pooler. If string, `"gelu"`,
            `"relu"`, `"selu"` and `"gelu_new"` ``"quick_gelu"` are supported.
        layer_norm_eps (`float`, *optional*, defaults to 1e-5):
            The epsilon used by the layer normalization layers.
        attention_dropout (`float`, *optional*, defaults to 0.0):
            The dropout ratio for the attention probabilities.
        initializer_range (`float`, *optional*, defaults to 0.02):
            The standard deviation of the truncated_normal_initializer for initializing all weight matrices.
        initializer_factor (`float`, *optional*, defaults to 1.0):
            A factor for initializing all weight matrices (should be kept to 1.0, used internally for initialization
            testing).
        initializer_range (`float`, *optional*, defaults to 0.02):
            The standard deviation of the truncated_normal_initializer for initializing all weight matrices.
    ideficshidden_size	embed_dim                   r   geluh㈵>        {Gz?      ?c                    sZ   || _ || _|| _|| _|| _|| _|| _|	| _|
| _|| _	|| _
|| _t jf | d S N)r   
image_sizeintermediate_size
patch_sizenum_hidden_layersnum_attention_headsnum_channelslayer_norm_epsattention_dropoutinitializer_rangeinitializer_factor
hidden_actsuper__init__)selfr   r   r   r   r   r   r   r   r   r   r   r   kwargs	__class__ p/var/www/html/Darija-Ai-API/env/lib/python3.8/site-packages/transformers/models/idefics/configuration_idefics.pyr    P   s    zIdeficsVisionConfig.__init__)r   r	   r
   r   r   r   r   r   r   r   r   r   )__name__
__module____qualname____doc__
model_typeZattribute_mapr    __classcell__r%   r%   r#   r&   r   "   s$   (             r   c                       s&   e Zd ZdZdZd
 fdd		Z  ZS )IdeficsPerceiverConfiga  
    This is the configuration class to store the configuration of a [`IdeficsModel`]. It is used to instantiate an
    Idefics model according to the specified arguments, defining the model architecture. Instantiating a configuration
    with the defaults will yield a similar configuration to that of the Idefics-9B.

    e.g. [HuggingFaceM4/idefics-9b](https://huggingface.co/HuggingFaceM4/idefics-9b)

    Configuration objects inherit from [`PretrainedConfig`] and can be used to control the model outputs. Read the
    documentation from [`PretrainedConfig`] for more information.

    Args:
        use_resampler (`bool`, *optional*, defaults to `False`):
            Whether or not to use the resampler
        resampler_n_latents (`int`, *optional*, defaults to ):
            Number of latent embeddings to resample ("compress") the input sequence to (usually < 128).
        resampler_depth (`int`, *optional*, defaults to 6):
            Depth of the Perceiver Resampler (Transformer w/ cross attention). Should be shallow (< 3).
        resampler_n_heads (`int`, *optional*, defaults to 16):
            Number of heads in each Transformer block (for multi-headed self-attention).
        resampler_head_dim (`int`, *optional*, defaults to 96):
            Dimensionality of each head projection in the Transformer block.
        qk_layer_norms_perceiver (`bool`, *optional*, defaults to `False`):
            Whether or not to use qk layer norms in perceiver
    r   F@      r   `   c                    s6   || _ || _|| _|| _|| _|| _t jf | d S r   )use_resamplerresampler_n_latentsresampler_depthresampler_n_headsresampler_head_dimqk_layer_norms_perceiverr   r    )r!   r1   r2   r3   r4   r5   r6   r"   r#   r%   r&   r       s    
zIdeficsPerceiverConfig.__init__)Fr.   r/   r   r0   F)r'   r(   r)   r*   r+   r    r,   r%   r%   r#   r&   r-   p   s         r-   c                       sb   e Zd ZdZdZdZddddddd	d
ddd	ddddddddddg ddg dddf fdd	Z  ZS )IdeficsConfiga  
    This is the configuration class to store the configuration of a [`IdeficsModel`]. It is used to instantiate an
    Idefics model according to the specified arguments, defining the model architecture. Instantiating a configuration
    with the defaults will yield a similar configuration to that of the Idefics-9B.

    e.g. [HuggingFaceM4/idefics-9b](https://huggingface.co/HuggingFaceM4/idefics-9b)

    Configuration objects inherit from [`PretrainedConfig`] and can be used to control the model outputs. Read the
    documentation from [`PretrainedConfig`] for more information.

    Args:
        additional_vocab_size (`int`, *optional`, defaults to 0):
            Additional vocabulary size of the model, typically for the special "<img>" token. Additional vocab tokens
            are always trainable whereas regular vocab tokens can be frozen or not.
        vocab_size (`int`, *optional*, defaults to 32000):
            Vocabulary size of the Idefics model. Defines the number of different tokens that can be represented by the
            `inputs_ids` passed when calling [`~IdeficsModel`]
        hidden_size (`int`, *optional*, defaults to 4096):
            Dimension of the hidden representations.
        intermediate_size (`int`, *optional*, defaults to 11008):
            Dimension of the MLP representations.
        num_hidden_layers (`int`, *optional*, defaults to 32):
            Number of hidden layers in the Transformer encoder.
        num_attention_heads (`int`, *optional*, defaults to 32):
            Number of attention heads for each attention layer in the Transformer encoder.
        dropout (`float`, *optional*, defaults to 0.0):
            The dropout probability for all fully connected layers in the embeddings, encoder, and pooler.
        hidden_act (`str` or `function`, *optional*, defaults to `"silu"`):
            The non-linear activation function (function or string) in the decoder.
        initializer_range (`float`, *optional*, defaults to 0.02):
            The standard deviation of the truncated_normal_initializer for initializing all weight matrices.
        alpha_initializer (`str`, *optional*, defaults to `"zeros"`):
            Initialization type for the alphas.
        alphas_initializer_range (`float`, *optional*, defaults to 0.0):
            The standard deviation of the truncated_normal_initializer for initializing the alphas in the Gated Cross
            Attention.
        alpha_type (`str`, *optional*, defaults to `"float"`):
            Whether the gating alphas should be vectors or single floats.
        rms_norm_eps (`float`, *optional*, defaults to 1e-6):
            The epsilon used by the rms normalization layers.
        use_cache (`bool`, *optional*, defaults to `True`):
            Whether or not the model should return the last key/values attentions (not used by all models). Only
            relevant if `config.is_decoder=True`.
        pad_token_id (`int`, *optional*, defaults to 0)
            Padding token id.
        bos_token_id (`int`, *optional*, defaults to 1)
            Beginning of stream token id.
        eos_token_id (`int`, *optional*, defaults to 2)
            End of stream token id.
        tie_word_embeddings(`bool`, *optional*, defaults to `False`):
            Whether to tie weight embeddings
        cross_layer_interval (`int`, *optional*, default to 1)
            Interval for cross attention (from text to image) layers.
        qk_layer_norms (`bool`, *optional*, defaults to `False`): Whether to add layer norm after q and k
        freeze_text_layers (`bool`, *optional*, defaults to `True`): Whether to freeze text layers
        freeze_text_module_exceptions (`bool`, *optional*, defaults to `[]`):
            Exceptions to freezing text layers when `freeze_text_layers` is `True`
        freeze_lm_head (`bool`, *optional*, defaults to `False`): Whether to freeze lm head
        freeze_vision_layers (`bool`, *optional*, defaults to `True`):  Whether to freeze vision layers
        freeze_vision_module_exceptions (`bool`, *optional*, defaults to `[]`):
            Exceptions to freezing vision layers when `freeze_vision_layers` is `True`
        use_resampler (`bool`, *optional*, defaults to `False`): Whether to use the Resampler
        vision_config (`IdeficsVisionConfig`,  *optional*): Custom vision config or dict
        perceiver_config (`IdeficsPerceiverConfig`,  *optional*): Custom perceiver config or dict

    Example:

    ```python
    >>> from transformers import IdeficsModel, IdeficsConfig

    >>> # Initializing a Idefics idefics-9b style configuration
    >>> configuration = IdeficsConfig()

    >>> # Initializing a model from the idefics-9b style configuration
    >>> model = IdeficsModel(configuration)

    >>> # Accessing the model configuration
    >>> configuration = model.config
    ```r   Fi }      i   i +  r   r   Zsilur   Zzerosfloatgư>T      Nc                    s  || _ || _|| _|| _|| _|| _|| _|| _|	| _|
| _	|| _
|| _|| _|| _|| _|| _|| _|| _|| _|| _|| _|| _|d krt | _n(t|trtf || _nt|tr|| _|d krt | _n(t|trtf || _nt|tr|| _t jf ||||d| d S )N)pad_token_idbos_token_ideos_token_idtie_word_embeddings)
vocab_sizeadditional_vocab_sizer   r   r   r   dropoutr   r   alpha_initializeralphas_initializer_range
alpha_typerms_norm_eps	use_cachecross_layer_intervalqk_layer_normsfreeze_vision_layersfreeze_text_layersfreeze_text_module_exceptionsfreeze_vision_module_exceptionsfreeze_lm_headr1   r-   perceiver_config
isinstancedictr   vision_configr   r    )r!   r@   rA   r   r   r   r   rB   r   r   rC   rD   rE   rF   rG   r<   r=   r>   r?   rH   rI   rK   rL   rN   rJ   rM   r1   rR   rO   r"   r#   r%   r&   r       sT     





zIdeficsConfig.__init__)r'   r(   r)   r*   r+   Zis_compositionr    r,   r%   r%   r#   r&   r7      s@   Or7   N)r*   Zconfiguration_utilsr   utilsr   Z
get_loggerr'   loggerZ%IDEFICS_PRETRAINED_CONFIG_ARCHIVE_MAPr   r-   r7   r%   r%   r%   r&   <module>   s   
N/