diff --git a/src/transformers/models/idefics3/processing_idefics3.py b/src/transformers/models/idefics3/processing_idefics3.py index a64d62cef012a9..7ca5829e2063d8 100644 --- a/src/transformers/models/idefics3/processing_idefics3.py +++ b/src/transformers/models/idefics3/processing_idefics3.py @@ -325,6 +325,10 @@ def __call__( inputs.update(text_inputs) elif text is not None: + if any(n_images_in_text): + raise ValueError( + f"Found {sum(n_images_in_text)} {self.image_token.content} tokens in the text but no images were passed." + ) text_inputs = self.tokenizer(text=text, **output_kwargs["text_kwargs"]) inputs.update(text_inputs)