Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Support transformers 4.43 #1971

Merged
merged 32 commits into from
Aug 5, 2024
Merged
Show file tree
Hide file tree
Changes from 8 commits
Commits
Show all changes
32 commits
Select commit Hold shift + click to select a range
9333b58
fix bt bark test
IlyasMoutawwakil Jul 25, 2024
4dda6df
setup
IlyasMoutawwakil Jul 25, 2024
5926bc5
patch clip models for sd
IlyasMoutawwakil Jul 25, 2024
c2a5c03
infer ort model dtype property from inputs dtypes
IlyasMoutawwakil Jul 25, 2024
b610212
patch all clip variants
IlyasMoutawwakil Jul 25, 2024
9923084
device setter
IlyasMoutawwakil Jul 25, 2024
0cb6be7
bigger model for now
IlyasMoutawwakil Jul 25, 2024
88831a5
fix device attribution
IlyasMoutawwakil Jul 25, 2024
a1f838c
onnx opset for owlvit and owlv2
IlyasMoutawwakil Jul 25, 2024
b8f5f32
model dtype
IlyasMoutawwakil Jul 25, 2024
81d0227
revert
IlyasMoutawwakil Jul 25, 2024
82a2879
use model part dtype instead
IlyasMoutawwakil Jul 25, 2024
d2a15b5
no need for dtype with diffusion pipelines
IlyasMoutawwakil Jul 25, 2024
c761026
revert
IlyasMoutawwakil Jul 25, 2024
0eb5dce
fix clip text model with projection not outputting hidden states
IlyasMoutawwakil Jul 25, 2024
f568bf6
whisper generation
IlyasMoutawwakil Jul 26, 2024
92ea60b
fix whisper, support cache_position, and using transformers whisper g…
IlyasMoutawwakil Jul 29, 2024
170eaba
style
IlyasMoutawwakil Jul 29, 2024
991b66b
create cache position for merged decoder and fix test for non whisper…
IlyasMoutawwakil Jul 29, 2024
8f8e6ca
typo
IlyasMoutawwakil Jul 29, 2024
e5934b3
Merge branch 'main' into support-transformers-4.43
echarlaix Jul 30, 2024
96bdde1
conditioned cache position argument
IlyasMoutawwakil Jul 30, 2024
9d09389
update whisper min transformers version
IlyasMoutawwakil Jul 30, 2024
056e450
compare whisper ort generation with transformers
IlyasMoutawwakil Jul 30, 2024
b3d9181
Merge branch 'support-transformers-4.43' of https://github.com/huggin…
IlyasMoutawwakil Jul 30, 2024
825cc6d
fix generation length for speech to text model type
IlyasMoutawwakil Jul 30, 2024
3fe0cac
cache position in whisper only with dynamic axis decoder_sequence_length
IlyasMoutawwakil Jul 30, 2024
b3948b9
use minimal prepare_inputs_for_generation in ORTModelForSpeechSeq2Seq
IlyasMoutawwakil Aug 2, 2024
2f69a8a
remove version restrictions on whisper
IlyasMoutawwakil Aug 2, 2024
4cc1065
comment
IlyasMoutawwakil Aug 2, 2024
8077ded
fix
IlyasMoutawwakil Aug 2, 2024
aa9b9d6
simpler
IlyasMoutawwakil Aug 5, 2024
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
37 changes: 28 additions & 9 deletions optimum/exporters/onnx/model_configs.py
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,7 @@
)
from .constants import ONNX_DECODER_MERGED_NAME, ONNX_DECODER_NAME, ONNX_DECODER_WITH_PAST_NAME
from .model_patcher import (
CLIPModelPatcher,
FalconModelPatcher,
MistralModelPatcher,
MusicgenModelPatcher,
Expand Down Expand Up @@ -911,10 +912,16 @@ def outputs(self) -> Dict[str, Dict[int, str]]:

return common_outputs

def patch_model_for_export(
self,
model: Union["PreTrainedModel", "TFPreTrainedModel", "ModelMixin"],
model_kwargs: Optional[Dict[str, Any]] = None,
) -> "ModelPatcher":
return CLIPModelPatcher(self, model, model_kwargs=model_kwargs)


class CLIPOnnxConfig(TextAndVisionOnnxConfig):
NORMALIZED_CONFIG_CLASS = CLIPNormalizedConfig
DEFAULT_ONNX_OPSET = 14

@property
def inputs(self) -> Dict[str, Dict[int, str]]:
Expand All @@ -933,6 +940,13 @@ def outputs(self) -> Dict[str, Dict[int, str]]:
"image_embeds": {0: "image_batch_size"},
}

def patch_model_for_export(
self,
model: Union["PreTrainedModel", "TFPreTrainedModel", "ModelMixin"],
model_kwargs: Optional[Dict[str, Any]] = None,
) -> "ModelPatcher":
return CLIPModelPatcher(self, model, model_kwargs=model_kwargs)


class SentenceTransformersCLIPOnnxConfig(CLIPOnnxConfig):
@property
Expand Down Expand Up @@ -978,6 +992,13 @@ def outputs(self) -> Dict[str, Dict[int, str]]:

return common_outputs

def patch_model_for_export(
self,
model: Union["PreTrainedModel", "TFPreTrainedModel", "ModelMixin"],
model_kwargs: Optional[Dict[str, Any]] = None,
) -> "ModelPatcher":
return CLIPModelPatcher(self, model, model_kwargs=model_kwargs)


class CLIPTextOnnxConfig(CLIPTextWithProjectionOnnxConfig):
@property
Expand All @@ -992,14 +1013,12 @@ def outputs(self) -> Dict[str, Dict[int, str]]:

return common_outputs

def generate_dummy_inputs(self, framework: str = "pt", **kwargs):
dummy_inputs = super().generate_dummy_inputs(framework=framework, **kwargs)

if framework == "pt":
import torch

dummy_inputs["input_ids"] = dummy_inputs["input_ids"].to(dtype=torch.int32)
return dummy_inputs
def patch_model_for_export(
self,
model: Union["PreTrainedModel", "TFPreTrainedModel", "ModelMixin"],
model_kwargs: Optional[Dict[str, Any]] = None,
) -> "ModelPatcher":
return CLIPModelPatcher(self, model, model_kwargs=model_kwargs)


class UNetOnnxConfig(VisionOnnxConfig):
Expand Down
17 changes: 17 additions & 0 deletions optimum/exporters/onnx/model_patcher.py
Original file line number Diff line number Diff line change
Expand Up @@ -1131,3 +1131,20 @@ def __init__(
self._update_causal_mask_original = self._model.model._update_causal_mask
else:
self._update_causal_mask_original = self._model._update_causal_mask


class CLIPModelPatcher(ModelPatcher):
def __enter__(self):
super().__enter__()

if _transformers_version >= version.parse("4.43"):
from transformers.models.clip.modeling_clip import CLIPAttention, CLIPSdpaAttention

self.original_sdpa_forward, CLIPSdpaAttention.forward = CLIPSdpaAttention.forward, CLIPAttention.forward

def __exit__(self, exc_type, exc_value, traceback):
super().__exit__(exc_type, exc_value, traceback)
if _transformers_version >= version.parse("4.43"):
from transformers.models.clip.modeling_clip import CLIPSdpaAttention

CLIPSdpaAttention.forward = self.original_sdpa_forward
3 changes: 1 addition & 2 deletions optimum/exporters/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -96,7 +96,7 @@ def _get_submodels_for_export_diffusion(
pipeline, (StableDiffusionPipeline, StableDiffusionImg2ImgPipeline, StableDiffusionInpaintPipeline)
)
is_stable_diffusion_xl = isinstance(
pipeline, (StableDiffusionXLImg2ImgPipeline, StableDiffusionXLInpaintPipeline, StableDiffusionXLPipeline)
pipeline, (StableDiffusionXLPipeline, StableDiffusionXLImg2ImgPipeline, StableDiffusionXLInpaintPipeline)
)
is_latent_consistency_model = isinstance(
pipeline, (LatentConsistencyModelPipeline, LatentConsistencyModelImg2ImgPipeline)
Expand Down Expand Up @@ -150,7 +150,6 @@ def _get_submodels_for_export_diffusion(

text_encoder_2 = getattr(pipeline, "text_encoder_2", None)
if text_encoder_2 is not None:
text_encoder_2.config.output_hidden_states = True
IlyasMoutawwakil marked this conversation as resolved.
Show resolved Hide resolved
models_for_export["text_encoder_2"] = text_encoder_2

return models_for_export
Expand Down
8 changes: 7 additions & 1 deletion optimum/onnxruntime/modeling_diffusion.py
Original file line number Diff line number Diff line change
Expand Up @@ -452,10 +452,14 @@ def to(self, device: Union[torch.device, str, int]):
Returns:
`ORTModel`: the model placed on the requested device.
"""

device, provider_options = parse_device(device)
provider = get_provider_for_device(device)
validate_provider_availability(provider) # raise error if the provider is not available
self.device = device

if device.type == "cuda" and self.providers[0] == "TensorrtExecutionProvider":
return self

self.vae_decoder.session.set_providers([provider], provider_options=[provider_options])
self.text_encoder.session.set_providers([provider], provider_options=[provider_options])
self.unet.session.set_providers([provider], provider_options=[provider_options])
Expand All @@ -464,6 +468,8 @@ def to(self, device: Union[torch.device, str, int]):
self.vae_encoder.session.set_providers([provider], provider_options=[provider_options])

self.providers = self.vae_decoder.session.get_providers()
self._device = device

return self

@classmethod
Expand Down
23 changes: 18 additions & 5 deletions optimum/onnxruntime/modeling_ort.py
Original file line number Diff line number Diff line change
Expand Up @@ -276,7 +276,19 @@ def __init__(

self._ordered_input_names = get_ordered_input_names(self.input_names.keys(), func=self.forward)

# TODO: why do we make device a property since we are only access the value, and do not do any check when setting the value?
@property
def dtype(self) -> torch.dtype:
"""
`torch.dtype`: The dtype of the model.
"""

for dtype in self.input_dtypes.values():
torch_dtype = TypeHelper.ort_type_to_torch_type(dtype)
if torch_dtype.is_floating_point:
return torch_dtype

return None

@property
def device(self) -> torch.device:
"""
Expand All @@ -286,8 +298,8 @@ def device(self) -> torch.device:
return self._device

@device.setter
def device(self, value: torch.device):
self._device = value
def device(self, **kwargs):
raise AttributeError("The device attribute is read-only, please use the `to` method to change the device.")

@property
def use_io_binding(self):
Expand All @@ -309,13 +321,13 @@ def to(self, device: Union[torch.device, str, int]):
Returns:
`ORTModel`: the model placed on the requested device.
"""

device, provider_options = parse_device(device)

if device.type == "cuda" and self.providers[0] == "TensorrtExecutionProvider":
return self

self.device = device
provider = get_provider_for_device(self.device)
provider = get_provider_for_device(device)
validate_provider_availability(provider) # raise error if the provider is not available

# IOBinding is only supported for CPU and CUDA Execution Providers.
Expand All @@ -331,6 +343,7 @@ def to(self, device: Union[torch.device, str, int]):

self.model.set_providers([provider], provider_options=[provider_options])
self.providers = self.model.get_providers()
self._device = device

return self

Expand Down
3 changes: 2 additions & 1 deletion optimum/onnxruntime/modeling_seq2seq.py
Original file line number Diff line number Diff line change
Expand Up @@ -1124,12 +1124,13 @@ def to(self, device: Union[torch.device, str, int]):
provider = get_provider_for_device(device)
validate_provider_availability(provider) # raise error if the provider is not available

self.device = device
self.encoder.session.set_providers([provider], provider_options=[provider_options])
self.decoder.session.set_providers([provider], provider_options=[provider_options])
if self.decoder_with_past is not None:
self.decoder_with_past.session.set_providers([provider], provider_options=[provider_options])

self.providers = self.encoder.session.get_providers()
self._device = device

return self

Expand Down
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
REQUIRED_PKGS = [
"coloredlogs",
"sympy",
"transformers[sentencepiece]>=4.26.0,<4.43.0",
"transformers[sentencepiece]>=4.26.0,<4.44.0",
"torch>=1.11",
"packaging",
"numpy<2.0", # transformers requires numpy<2.0 https://github.com/huggingface/transformers/pull/31569
Expand Down
5 changes: 3 additions & 2 deletions tests/bettertransformer/testing_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@

MODELS_DICT = {
"albert": "hf-internal-testing/tiny-random-AlbertModel",
"bark": "ylacombe/bark-small", # TODO: put a smaller model, this one is 1.7GB...
"bark": "ylacombe/bark-small",
"bart": "hf-internal-testing/tiny-random-bart",
"bert": "hf-internal-testing/tiny-random-BertModel",
"bert-generation": "ybelkada/random-tiny-BertGenerationModel",
Expand Down Expand Up @@ -359,7 +359,8 @@ def _test_save_load_invertible(self, model_id, keep_original_model=True):
for name, param in bt_model.named_parameters():
self.assertFalse(param.device.type == "meta", f"Parameter {name} is on the meta device.")

bt_model.save_pretrained(tmpdirname)
# saving a normal transformers bark model fails because of shared tensors
bt_model.save_pretrained(tmpdirname, safe_serialization=hf_model.config.model_type != "bark")

bt_model_from_load = AutoModel.from_pretrained(tmpdirname)

Expand Down
Loading