From 0535d7c3eef2859d3971b01cd6fa0d45637e4002 Mon Sep 17 00:00:00 2001 From: jp1924 Date: Thu, 31 Oct 2024 01:10:39 +0000 Subject: [PATCH 1/7] Add: do_convert_rgb --- src/transformers/models/vit/image_processing_vit.py | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/src/transformers/models/vit/image_processing_vit.py b/src/transformers/models/vit/image_processing_vit.py index 7c0d8abefd8..58f9ec16a87 100644 --- a/src/transformers/models/vit/image_processing_vit.py +++ b/src/transformers/models/vit/image_processing_vit.py @@ -19,7 +19,7 @@ import numpy as np from ...image_processing_utils import BaseImageProcessor, BatchFeature, get_size_dict -from ...image_transforms import resize, to_channel_dimension_format +from ...image_transforms import convert_to_rgb, resize, to_channel_dimension_format from ...image_utils import ( IMAGENET_STANDARD_MEAN, IMAGENET_STANDARD_STD, @@ -82,6 +82,7 @@ def __init__( do_normalize: bool = True, image_mean: Optional[Union[float, List[float]]] = None, image_std: Optional[Union[float, List[float]]] = None, + do_convert_rgb: bool = None, **kwargs, ) -> None: super().__init__(**kwargs) @@ -95,6 +96,7 @@ def __init__( self.rescale_factor = rescale_factor self.image_mean = image_mean if image_mean is not None else IMAGENET_STANDARD_MEAN self.image_std = image_std if image_std is not None else IMAGENET_STANDARD_STD + self.do_convert_rgb = do_convert_rgb def resize( self, @@ -159,6 +161,7 @@ def preprocess( return_tensors: Optional[Union[str, TensorType]] = None, data_format: Union[str, ChannelDimension] = ChannelDimension.FIRST, input_data_format: Optional[Union[str, ChannelDimension]] = None, + do_convert_rgb: bool = None, ): """ Preprocess an image or batch of images. @@ -203,6 +206,8 @@ def preprocess( - `"channels_first"` or `ChannelDimension.FIRST`: image in (num_channels, height, width) format. - `"channels_last"` or `ChannelDimension.LAST`: image in (height, width, num_channels) format. - `"none"` or `ChannelDimension.NONE`: image in (height, width) format. + do_convert_rgb (`bool`, *optional*, defaults to `self.do_convert_rgb`): + Whether to convert the image to RGB. """ do_resize = do_resize if do_resize is not None else self.do_resize do_rescale = do_rescale if do_rescale is not None else self.do_rescale @@ -211,6 +216,7 @@ def preprocess( rescale_factor = rescale_factor if rescale_factor is not None else self.rescale_factor image_mean = image_mean if image_mean is not None else self.image_mean image_std = image_std if image_std is not None else self.image_std + do_convert_rgb = do_convert_rgb if do_convert_rgb is not None else self.do_convert_rgb size = size if size is not None else self.size size_dict = get_size_dict(size) @@ -233,6 +239,9 @@ def preprocess( resample=resample, ) + if do_convert_rgb: + images = [convert_to_rgb(image) for image in images] + # All transformations expect numpy arrays. images = [to_numpy_array(image) for image in images] From 052a89c5f27dccb47cd45453ab50af7728c0523f Mon Sep 17 00:00:00 2001 From: jp1924 Date: Thu, 31 Oct 2024 11:03:31 +0900 Subject: [PATCH 2/7] Add: doc string --- src/transformers/models/vit/image_processing_vit.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/transformers/models/vit/image_processing_vit.py b/src/transformers/models/vit/image_processing_vit.py index 58f9ec16a87..13b62cdf881 100644 --- a/src/transformers/models/vit/image_processing_vit.py +++ b/src/transformers/models/vit/image_processing_vit.py @@ -68,6 +68,8 @@ class ViTImageProcessor(BaseImageProcessor): image_std (`float` or `List[float]`, *optional*, defaults to `IMAGENET_STANDARD_STD`): Standard deviation to use if normalizing the image. This is a float or list of floats the length of the number of channels in the image. Can be overridden by the `image_std` parameter in the `preprocess` method. + do_convert_rgb (`bool`, *optional*, defaults to `self.do_convert_rgb`): + Whether to convert the image to RGB. """ model_input_names = ["pixel_values"] From 1600d26a4244fbe8450ca55cb3e2eadf32d8ff19 Mon Sep 17 00:00:00 2001 From: jp Date: Tue, 5 Nov 2024 17:05:59 +0900 Subject: [PATCH 3/7] Update src/transformers/models/vit/image_processing_vit.py Co-authored-by: Pavel Iakubovskii --- src/transformers/models/vit/image_processing_vit.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/transformers/models/vit/image_processing_vit.py b/src/transformers/models/vit/image_processing_vit.py index 13b62cdf881..c2b506dbc0f 100644 --- a/src/transformers/models/vit/image_processing_vit.py +++ b/src/transformers/models/vit/image_processing_vit.py @@ -163,7 +163,7 @@ def preprocess( return_tensors: Optional[Union[str, TensorType]] = None, data_format: Union[str, ChannelDimension] = ChannelDimension.FIRST, input_data_format: Optional[Union[str, ChannelDimension]] = None, - do_convert_rgb: bool = None, + do_convert_rgb: Optional[bool] = None, ): """ Preprocess an image or batch of images. From 71c7fe2b90a31843a4493f70d1b31fa94b59285e Mon Sep 17 00:00:00 2001 From: jp Date: Tue, 5 Nov 2024 17:06:08 +0900 Subject: [PATCH 4/7] Update src/transformers/models/vit/image_processing_vit.py Co-authored-by: Pavel Iakubovskii --- src/transformers/models/vit/image_processing_vit.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/transformers/models/vit/image_processing_vit.py b/src/transformers/models/vit/image_processing_vit.py index c2b506dbc0f..a36070db096 100644 --- a/src/transformers/models/vit/image_processing_vit.py +++ b/src/transformers/models/vit/image_processing_vit.py @@ -68,7 +68,7 @@ class ViTImageProcessor(BaseImageProcessor): image_std (`float` or `List[float]`, *optional*, defaults to `IMAGENET_STANDARD_STD`): Standard deviation to use if normalizing the image. This is a float or list of floats the length of the number of channels in the image. Can be overridden by the `image_std` parameter in the `preprocess` method. - do_convert_rgb (`bool`, *optional*, defaults to `self.do_convert_rgb`): + do_convert_rgb (`bool`, *optional*): Whether to convert the image to RGB. """ From 3fc89df3cdf396c50cc59353fdb0906cf77051e5 Mon Sep 17 00:00:00 2001 From: jp Date: Tue, 5 Nov 2024 17:06:17 +0900 Subject: [PATCH 5/7] Update src/transformers/models/vit/image_processing_vit.py Co-authored-by: Pavel Iakubovskii --- src/transformers/models/vit/image_processing_vit.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/transformers/models/vit/image_processing_vit.py b/src/transformers/models/vit/image_processing_vit.py index a36070db096..05bb8bae049 100644 --- a/src/transformers/models/vit/image_processing_vit.py +++ b/src/transformers/models/vit/image_processing_vit.py @@ -84,7 +84,7 @@ def __init__( do_normalize: bool = True, image_mean: Optional[Union[float, List[float]]] = None, image_std: Optional[Union[float, List[float]]] = None, - do_convert_rgb: bool = None, + do_convert_rgb: Optional[bool] = None, **kwargs, ) -> None: super().__init__(**kwargs) From a922a4e80e61570a52f656ea5b407b67be83f979 Mon Sep 17 00:00:00 2001 From: jp1924 Date: Tue, 5 Nov 2024 08:11:33 +0000 Subject: [PATCH 6/7] Add: do_convert_rgb to fast --- src/transformers/models/vit/image_processing_vit_fast.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/src/transformers/models/vit/image_processing_vit_fast.py b/src/transformers/models/vit/image_processing_vit_fast.py index 21f5a99a3e3..d36ae32c2ea 100644 --- a/src/transformers/models/vit/image_processing_vit_fast.py +++ b/src/transformers/models/vit/image_processing_vit_fast.py @@ -76,6 +76,8 @@ class ViTImageProcessorFast(BaseImageProcessorFast): image_std (`float` or `List[float]`, *optional*, defaults to `IMAGENET_STANDARD_STD`): Standard deviation to use if normalizing the image. This is a float or list of floats the length of the number of channels in the image. Can be overridden by the `image_std` parameter in the `preprocess` method. + do_convert_rgb (`bool`, *optional*): + Whether to convert the image to RGB. """ model_input_names = ["pixel_values"] @@ -101,6 +103,7 @@ def __init__( do_normalize: bool = True, image_mean: Optional[Union[float, List[float]]] = None, image_std: Optional[Union[float, List[float]]] = None, + do_convert_rgb: Optional[bool] = None, **kwargs, ) -> None: super().__init__(**kwargs) @@ -114,6 +117,7 @@ def __init__( self.rescale_factor = rescale_factor self.image_mean = image_mean if image_mean is not None else IMAGENET_STANDARD_MEAN self.image_std = image_std if image_std is not None else IMAGENET_STANDARD_STD + self.do_convert_rgb = do_convert_rgb def _build_transforms( self, @@ -199,6 +203,7 @@ def preprocess( return_tensors: Optional[Union[str, TensorType]] = "pt", data_format: Union[str, ChannelDimension] = ChannelDimension.FIRST, input_data_format: Optional[Union[str, ChannelDimension]] = None, + do_convert_rgb: Optional[bool] = None, **kwargs, ): """ @@ -237,6 +242,8 @@ def preprocess( - `"channels_first"` or `ChannelDimension.FIRST`: image in (num_channels, height, width) format. - `"channels_last"` or `ChannelDimension.LAST`: image in (height, width, num_channels) format. - `"none"` or `ChannelDimension.NONE`: image in (height, width) format. + do_convert_rgb (`bool`, *optional*): + Whether to convert the image to RGB. """ do_resize = do_resize if do_resize is not None else self.do_resize do_rescale = do_rescale if do_rescale is not None else self.do_rescale @@ -246,6 +253,7 @@ def preprocess( image_mean = image_mean if image_mean is not None else self.image_mean image_std = image_std if image_std is not None else self.image_std size = size if size is not None else self.size + do_convert_rgb = do_convert_rgb if do_convert_rgb is not None else self.do_convert_rgb # Make hashable for cache size = SizeDict(**size) image_mean = tuple(image_mean) if isinstance(image_mean, list) else image_mean From ae573cf91c98039258f943720d2e9d4a42bb9dec Mon Sep 17 00:00:00 2001 From: jp1924 Date: Tue, 12 Nov 2024 00:05:47 +0000 Subject: [PATCH 7/7] Add: convert_to_rgb --- src/transformers/models/vit/image_processing_vit_fast.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/transformers/models/vit/image_processing_vit_fast.py b/src/transformers/models/vit/image_processing_vit_fast.py index d36ae32c2ea..98ecfb3927a 100644 --- a/src/transformers/models/vit/image_processing_vit_fast.py +++ b/src/transformers/models/vit/image_processing_vit_fast.py @@ -20,7 +20,7 @@ from ...image_processing_base import BatchFeature from ...image_processing_utils import get_size_dict from ...image_processing_utils_fast import BaseImageProcessorFast, SizeDict -from ...image_transforms import FusedRescaleNormalize, NumpyToTensor, Rescale +from ...image_transforms import FusedRescaleNormalize, NumpyToTensor, Rescale, convert_to_rgb from ...image_utils import ( IMAGENET_STANDARD_MEAN, IMAGENET_STANDARD_STD, @@ -279,6 +279,9 @@ def preprocess( image_type=image_type, ) + if do_convert_rgb: + images = [convert_to_rgb(image) for image in images] + transforms = self.get_transforms( do_resize=do_resize, do_rescale=do_rescale,