Skip to content

Commit

Permalink
Idefics: fix docstring (#35079)
Browse files Browse the repository at this point in the history
nit: fix docstring
  • Loading branch information
zucchini-nlp authored Jan 6, 2025
1 parent 32aa2db commit 9895f7d
Show file tree
Hide file tree
Showing 2 changed files with 10 additions and 5 deletions.
5 changes: 3 additions & 2 deletions src/transformers/models/idefics/modeling_idefics.py
Original file line number Diff line number Diff line change
Expand Up @@ -747,7 +747,7 @@ def forward(


class IdeficsGatedCrossAttentionLayer(nn.Module):
def __init__(self, config: IdeficsConfig):
def __init__(self, config: IdeficsConfig, layer_idx: int = None):
super().__init__()
self.hidden_size = config.hidden_size
self.cross_attn = IdeficsAttention(
Expand All @@ -757,6 +757,7 @@ def __init__(self, config: IdeficsConfig):
dropout=config.dropout,
config=config,
qk_layer_norms=config.qk_layer_norms,
layer_idx=layer_idx,
)
self.mlp = IdeficsMLP(
hidden_size=self.hidden_size,
Expand Down Expand Up @@ -1048,7 +1049,7 @@ def __init__(self, config: IdeficsConfig):
self.cross_layer_interval = config.cross_layer_interval
num_cross_layers = config.num_hidden_layers // self.cross_layer_interval
self.gated_cross_attn_layers = nn.ModuleList(
[IdeficsGatedCrossAttentionLayer(config) for _ in range(num_cross_layers)]
[IdeficsGatedCrossAttentionLayer(config, layer_idx=i) for i in range(num_cross_layers)]
)
self.gradient_checkpointing = False

Expand Down
10 changes: 7 additions & 3 deletions src/transformers/models/idefics/processing_idefics.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
from urllib.parse import urlparse

from ...feature_extraction_utils import BatchFeature
from ...image_utils import ImageInput
from ...processing_utils import (
ImagesKwargs,
ProcessingKwargs,
Expand Down Expand Up @@ -203,7 +204,10 @@ class IdeficsProcessor(ProcessorMixin):
An instance of [`IdeficsImageProcessor`]. The image processor is a required input.
tokenizer (`LlamaTokenizerFast`):
An instance of [`LlamaTokenizerFast`]. The tokenizer is a required input.
image_size (`int`, *optional*, defaults to 224): Image size (assuming a square image)
image_size (`int`, *optional*, defaults to 224):
Image size (assuming a square image)
add_end_of_utterance_token (`str`, *optional*):
The string representation of token representing end of utterance
"""

attributes = ["image_processor", "tokenizer"]
Expand Down Expand Up @@ -240,7 +244,7 @@ def __init__(self, image_processor, tokenizer=None, image_size=224, add_end_of_u
@deprecate_kwarg(old_name="prompts", version="5.0.0", new_name="text", raise_if_both_names=True)
def __call__(
self,
images=None,
images: Union[ImageInput, List[ImageInput], str, List[str], List[List[str]]] = None,
text: Union[
TextInput,
PreTokenizedInput,
Expand All @@ -257,7 +261,7 @@ def __call__(
the model was trained on and prepares the image pixel values for the model to process.
Args:
images (`Union[PIL.Image, str, List[PIL.Image], List[str]]`):
images (`Union[ImageInput, List[ImageInput], str, List[str], List[List[str]]]`):
either a single image or a batched list of images - can be passed in when text contains only text prompts,
in order to use the image-text-to-text behavior.
text (`Union[List[TextInput], [List[List[TextInput]]]]`):
Expand Down

0 comments on commit 9895f7d

Please sign in to comment.