huggingface · Factral · Jan 18, 2025 · Jan 18, 2025 · Jan 18, 2025
diff --git a/src/transformers/image_processing_base.py b/src/transformers/image_processing_base.py
@@ -411,6 +411,7 @@ def from_dict(cls, image_processor_dict: Dict[str, Any], **kwargs):
         """
         image_processor_dict = image_processor_dict.copy()
         return_unused_kwargs = kwargs.pop("return_unused_kwargs", False)
+        is_timm = kwargs.pop("is_timm", False)
 
         # The `size` parameter is a dict and was previously an int or tuple in feature extractors.
         # We set `size` here directly to the `image_processor_dict` so that it is converted to the appropriate
@@ -420,7 +421,10 @@ def from_dict(cls, image_processor_dict: Dict[str, Any], **kwargs):
         if "crop_size" in kwargs and "crop_size" in image_processor_dict:
             image_processor_dict["crop_size"] = kwargs.pop("crop_size")
 
-        image_processor = cls(**image_processor_dict)
+        if is_timm:
+            image_processor = cls(image_processor_dict)
+        else:
+            image_processor = cls(**image_processor_dict)
 
         # Update image_processor with kwargs if needed
         to_remove = []

diff --git a/src/transformers/models/auto/feature_extraction_auto.py b/src/transformers/models/auto/feature_extraction_auto.py
@@ -94,6 +94,7 @@
         ("swinv2", "ViTFeatureExtractor"),
         ("table-transformer", "DetrFeatureExtractor"),
         ("timesformer", "VideoMAEFeatureExtractor"),
+        ("timm_wrapper", "TimmWrapperImageProcessor"),
         ("tvlt", "TvltFeatureExtractor"),
         ("unispeech", "Wav2Vec2FeatureExtractor"),
         ("unispeech-sat", "Wav2Vec2FeatureExtractor"),

diff --git a/src/transformers/models/timm_wrapper/image_processing_timm_wrapper.py b/src/transformers/models/timm_wrapper/image_processing_timm_wrapper.py
@@ -93,6 +93,15 @@ def get_image_processor_dict(
             pretrained_model_name_or_path, image_processor_filename=image_processor_filename, **kwargs
         )
 
+    @classmethod
+    def from_dict(cls, image_processor_dict: Dict[str, Any], **kwargs):
+        """
+        Overrides the `from_dict` method from the base class to make sure parameters are updated if image processor is
+        created using from_dict and kwargs e.g. `TimmWrapperImageProcessor.from_pretrained(checkpoint)`
+        """
+        kwargs.update({"is_timm": True})
+        return super().from_dict(image_processor_dict, **kwargs)
+
     def preprocess(
         self,
         images: ImageInput,