From fe008d6ebea1f5770b740991daeefd9322fa434a Mon Sep 17 00:00:00 2001 From: Raushan Turganbay Date: Fri, 19 Jul 2024 19:21:45 +0500 Subject: [PATCH] Chameleon: not supported with fast load (#32091) fixes --- docs/source/en/_toctree.yml | 2 +- docs/source/en/model_doc/chameleon.md | 12 ++++++------ .../models/chameleon/modeling_chameleon.py | 1 + 3 files changed, 8 insertions(+), 7 deletions(-) diff --git a/docs/source/en/_toctree.yml b/docs/source/en/_toctree.yml index 65ee07580ca6b4..430670aa4364e6 100644 --- a/docs/source/en/_toctree.yml +++ b/docs/source/en/_toctree.yml @@ -761,7 +761,7 @@ - local: model_doc/bros title: BROS - local: model_doc/chameleon - title: chameleon + title: Chameleon - local: model_doc/chinese_clip title: Chinese-CLIP - local: model_doc/clip diff --git a/docs/source/en/model_doc/chameleon.md b/docs/source/en/model_doc/chameleon.md index 9b316c772e1041..0d3fd89e10a478 100644 --- a/docs/source/en/model_doc/chameleon.md +++ b/docs/source/en/model_doc/chameleon.md @@ -55,14 +55,14 @@ The original code can be found [here](https://github.com/facebookresearch/chamel - Chameleon generates in chat format which means that the generated text will always be the "assistant's turn". You can enable a text completion generation by passing `return_for_text_completion=True` when calling the processor. > [!NOTE] -> Chameleon implementation in Transformers uses a special image token to indicate where to merge image embeddings. For special image token we didn't add a new one but used one of the reserved tokens: ``. +> Chameleon implementation in Transformers uses a special image token to indicate where to merge image embeddings. For special image token we didn't add a new one but used one of the reserved tokens: ``. You have to add `` to your prompt in the place where the image should be embedded for correct generation. ## Usage example ### Single image inference Chameleon is a gated model so make sure to have access and login to Hugging Face Hub using a token. -Here's how to load the model and perform inference in half-precision (`torch.float16`): +Here's how to load the model and perform inference in half-precision (`torch.bfloat16`): ```python from transformers import ChameleonProcessor, ChameleonForConditionalGeneration @@ -71,7 +71,7 @@ from PIL import Image import requests processor = ChameleonProcessor.from_pretrained("facebook/chameleon-7b") -model = ChameleonForConditionalGeneration.from_pretrained("facebook/chameleon-7b", torch_dtype=torch.float16, device_map="cuda") +model = ChameleonForConditionalGeneration.from_pretrained("facebook/chameleon-7b", torch_dtype=torch.bfloat16, device_map="cuda") # prepare image and text prompt url = 'http://images.cocodataset.org/val2017/000000039769.jpg' @@ -97,7 +97,7 @@ import requests processor = ChameleonProcessor.from_pretrained("facebook/chameleon-7b") -model = ChameleonForConditionalGeneration.from_pretrained("facebook/chameleon-7b", torch_dtype=torch.float16, device_map="cuda") +model = ChameleonForConditionalGeneration.from_pretrained("facebook/chameleon-7b", torch_dtype=torch.bfloat16, device_map="cuda") # Get three different images url = "https://www.ilankelman.org/stopsigns/australia.jpg" @@ -117,7 +117,7 @@ prompts = [ # We can simply feed images in the order they have to be used in the text prompt # Each "" token uses one image leaving the next for the subsequent "" tokens -inputs = processor(text=prompts, images=[image_stop, image_cats, image_snowman], padding=True, return_tensors="pt").to(device="cuda", dtype=torch.float16) +inputs = processor(text=prompts, images=[image_stop, image_cats, image_snowman], padding=True, return_tensors="pt").to(device="cuda", dtype=torch.bfloat16) # Generate generate_ids = model.generate(**inputs, max_new_tokens=50) @@ -153,7 +153,7 @@ from transformers import ChameleonForConditionalGeneration model_id = "facebook/chameleon-7b" model = ChameleonForConditionalGeneration.from_pretrained( model_id, - torch_dtype=torch.float16, + torch_dtype=torch.bfloat16, low_cpu_mem_usage=True, attn_implementation="flash_attention_2" ).to(0) diff --git a/src/transformers/models/chameleon/modeling_chameleon.py b/src/transformers/models/chameleon/modeling_chameleon.py index 346479c771bf6f..1eea9b224958b1 100644 --- a/src/transformers/models/chameleon/modeling_chameleon.py +++ b/src/transformers/models/chameleon/modeling_chameleon.py @@ -1096,6 +1096,7 @@ class ChameleonPreTrainedModel(PreTrainedModel): _supports_quantized_cache = True _supports_cache_class = True _supports_static_cache = True + _supports_param_buffer_assignment = False def _init_weights(self, module): std = self.config.initializer_range