Skip to content

Commit

Permalink
Merge branch 'main' into rt-detr-onnx
Browse files Browse the repository at this point in the history
  • Loading branch information
echarlaix committed Dec 19, 2024
2 parents f25f227 + 0c42291 commit dd2a2a2
Show file tree
Hide file tree
Showing 10 changed files with 305 additions and 6 deletions.
11 changes: 11 additions & 0 deletions docs/source/exporters/onnx/overview.mdx
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@ Supported architectures from [🤗 Transformers](https://huggingface.co/docs/tra
- Decision Transformer
- Deit
- Detr
- DINOv2
- DistilBert
- Donut-Swin
- Electra
Expand All @@ -53,6 +54,7 @@ Supported architectures from [🤗 Transformers](https://huggingface.co/docs/tra
- GPT-NeoX
- OPT
- GroupVit
- Hiera
- Hubert
- IBert
- LayoutLM
Expand All @@ -64,7 +66,9 @@ Supported architectures from [🤗 Transformers](https://huggingface.co/docs/tra
- M2-M100
- Marian
- MarkupLM
- MaskFormer
- MBart
- MGP-STR
- Mistral
- MobileBert
- MobileVit
Expand All @@ -74,13 +78,16 @@ Supported architectures from [🤗 Transformers](https://huggingface.co/docs/tra
- MT5
- Musicgen (text-conditional only)
- Nystromformer
- OLMo
- OLMo2
- OWL-ViT
- Pegasus
- Perceiver
- Phi
- Phi3
- Pix2Struct
- PoolFormer
- PVT
- Qwen2(Qwen1.5)
- RegNet
- RemBERT
Expand All @@ -92,17 +99,21 @@ Supported architectures from [🤗 Transformers](https://huggingface.co/docs/tra
- SEW
- SEW-D
- Speech2Text
- SigLIP
- SpeechT5
- Splinter
- SqueezeBert
- Swin
- SwinV2
- T5
- Table Transformer
- TROCR
- UniSpeech
- UniSpeech SAT
- Vision Encoder Decoder
- Vit
- VitMAE
- VitMSN
- Wav2Vec2
- Wav2Vec2 Conformer
- WavLM
Expand Down
143 changes: 143 additions & 0 deletions optimum/exporters/onnx/model_configs.py
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,7 @@
from .model_patcher import (
CLIPModelPatcher,
FalconModelPatcher,
MgpstrModelPatcher,
MistralModelPatcher,
MusicgenModelPatcher,
SAMModelPatcher,
Expand Down Expand Up @@ -324,6 +325,15 @@ class LlamaOnnxConfig(TextDecoderWithPositionIdsOnnxConfig):
NORMALIZED_CONFIG_CLASS = NormalizedTextConfig


class OlmoOnnxConfig(LlamaOnnxConfig):
ATOL_FOR_VALIDATION = 1e-4
MIN_TRANSFORMERS_VERSION = version.parse("4.40.0")


class Olmo2OnnxConfig(OlmoOnnxConfig):
MIN_TRANSFORMERS_VERSION = version.parse("4.47.0")


class Qwen2OnnxConfig(LlamaOnnxConfig):
MIN_TRANSFORMERS_VERSION = version.parse("4.37.0")

Expand Down Expand Up @@ -837,6 +847,65 @@ class ConvNextV2OnnxConfig(ViTOnnxConfig):
DEFAULT_ONNX_OPSET = 11


class HieraOnnxConfig(ViTOnnxConfig):
DEFAULT_ONNX_OPSET = 11


class PvtOnnxConfig(ViTOnnxConfig):
DEFAULT_ONNX_OPSET = 11


class VitMAEOnnxConfig(ViTOnnxConfig):
# torch.onnx.errors.UnsupportedOperatorError: Exporting the operator 'aten::scaled_dot_product_attention' to ONNX opset version 11 is not supported.
# Support for this operator was added in version 14, try exporting with this version.
DEFAULT_ONNX_OPSET = 14


class VitMSNOnnxConfig(ViTOnnxConfig):
# torch.onnx.errors.UnsupportedOperatorError: Exporting the operator 'aten::scaled_dot_product_attention' to ONNX opset version 11 is not supported.
# Support for this operator was added in version 14, try exporting with this version.
DEFAULT_ONNX_OPSET = 14


class Dinov2DummyInputGenerator(DummyVisionInputGenerator):
def __init__(
self,
task: str,
normalized_config: NormalizedVisionConfig,
batch_size: int = DEFAULT_DUMMY_SHAPES["batch_size"],
num_channels: int = DEFAULT_DUMMY_SHAPES["num_channels"],
width: int = DEFAULT_DUMMY_SHAPES["width"],
height: int = DEFAULT_DUMMY_SHAPES["height"],
**kwargs,
):
super().__init__(
task=task,
normalized_config=normalized_config,
batch_size=batch_size,
num_channels=num_channels,
width=width,
height=height,
**kwargs,
)

from transformers.onnx.utils import get_preprocessor

preprocessor = get_preprocessor(normalized_config._name_or_path)
if preprocessor is not None and hasattr(preprocessor, "crop_size"):
self.height = preprocessor.crop_size.get("height", self.height)
self.width = preprocessor.crop_size.get("width", self.width)

def generate(self, input_name: str, framework: str = "pt", int_dtype: str = "int64", float_dtype: str = "fp32"):
input_ = super().generate(
input_name=input_name, framework=framework, int_dtype=int_dtype, float_dtype=float_dtype
)
return input_


class Dinov2OnnxConfig(ViTOnnxConfig):
DUMMY_INPUT_GENERATOR_CLASSES = (Dinov2DummyInputGenerator,)


class MobileViTOnnxConfig(ViTOnnxConfig):
ATOL_FOR_VALIDATION = 1e-4
DEFAULT_ONNX_OPSET = 11
Expand Down Expand Up @@ -878,6 +947,10 @@ class SwinOnnxConfig(ViTOnnxConfig):
DEFAULT_ONNX_OPSET = 11


class SwinV2OnnxConfig(SwinOnnxConfig):
pass


class Swin2srOnnxConfig(SwinOnnxConfig):
pass

Expand Down Expand Up @@ -913,6 +986,28 @@ class MobileNetV2OnnxConfig(MobileNetV1OnnxConfig):
pass


class MaskFormerOnnxConfig(ViTOnnxConfig):
# torch.onnx.errors.UnsupportedOperatorError: Exporting the operator 'aten::einsum' to ONNX opset version 11 is not supported.
# Support for this operator was added in version 12, try exporting with this version.
DEFAULT_ONNX_OPSET = 12

@property
def outputs(self) -> Dict[str, Dict[int, str]]:
if self.task == "image-segmentation":
return {
"class_queries_logits": {0: "batch_size", 1: "num_queries"},
"masks_queries_logits": {0: "batch_size", 1: "num_queries", 2: "height", 3: "width"},
}
else:
return super().outputs

@property
def torch_to_onnx_output_map(self) -> Dict[str, str]:
return {
"transformer_decoder_last_hidden_state": "last_hidden_state",
}


class DonutSwinOnnxConfig(ViTOnnxConfig):
DEFAULT_ONNX_OPSET = 11

Expand All @@ -933,6 +1028,21 @@ def torch_to_onnx_input_map(self) -> Dict[str, str]:
return {"x": "pixel_values"}


class MgpstrOnnxConfig(ViTOnnxConfig):
@property
def outputs(self) -> Dict[str, Dict[int, str]]:
return {
"char_logits": {0: "batch_size"},
"bpe_logits": {0: "batch_size"},
"wp_logits": {0: "batch_size"},
}

def patch_model_for_export(
self, model: Union["PreTrainedModel", "TFPreTrainedModel"], model_kwargs: Optional[Dict[str, Any]] = None
) -> "ModelPatcher":
return MgpstrModelPatcher(self, model, model_kwargs=model_kwargs)


class SentenceTransformersTransformerOnnxConfig(TextEncoderOnnxConfig):
NORMALIZED_CONFIG_CLASS = NormalizedTextConfig
DEFAULT_ONNX_OPSET = 14 # Some bottleneck transformers models require a specific ONNX opset to be successfully exported. We put a rather high opset here for the export to work for all architectures.
Expand Down Expand Up @@ -1090,6 +1200,39 @@ def patch_model_for_export(
return CLIPModelPatcher(self, model, model_kwargs=model_kwargs)


class SiglipNormalizedConfig(CLIPNormalizedConfig):
pass


class SiglipOnnxConfig(CLIPOnnxConfig):
NORMALIZED_CONFIG_CLASS = SiglipNormalizedConfig
# torch.onnx.errors.UnsupportedOperatorError: Exporting the operator 'aten::scaled_dot_product_attention' to ONNX opset version 13 is not supported.
# Support for this operator was added in version 14, try exporting with this version.
DEFAULT_ONNX_OPSET = 14

@property
def inputs(self) -> Dict[str, Dict[int, str]]:
return {
"input_ids": {0: "text_batch_size", 1: "sequence_length"},
"pixel_values": {0: "image_batch_size", 1: "num_channels", 2: "height", 3: "width"},
# NOTE: No attention_mask
}


class SiglipTextWithProjectionOnnxConfig(CLIPTextWithProjectionOnnxConfig):
pass


class SiglipTextOnnxConfig(CLIPTextOnnxConfig):
pass


class SiglipVisionModelOnnxConfig(CLIPVisionModelOnnxConfig):
# torch.onnx.errors.UnsupportedOperatorError: Exporting the operator 'aten::scaled_dot_product_attention' to ONNX opset version 11 is not supported.
# Support for this operator was added in version 14, try exporting with this version.
DEFAULT_ONNX_OPSET = 14


class UNetOnnxConfig(VisionOnnxConfig):
ATOL_FOR_VALIDATION = 1e-4
# The ONNX export of a CLIPText architecture, an other Stable Diffusion component, needs the Trilu
Expand Down
26 changes: 26 additions & 0 deletions optimum/exporters/onnx/model_patcher.py
Original file line number Diff line number Diff line change
Expand Up @@ -509,6 +509,32 @@ def patched_forward(*args, **kwargs):
self.patched_forward = patched_forward


class MgpstrModelPatcher(ModelPatcher):
def __init__(
self,
config: "OnnxConfig",
model: Union["PreTrainedModel", "TFPreTrainedModel"],
model_kwargs: Optional[Dict[str, Any]] = None,
):
super().__init__(config, model, model_kwargs)

@functools.wraps(self.orig_forward)
def patched_forward(*args, **kwargs):
signature = inspect.signature(self.orig_forward)
args, kwargs = override_arguments(args, kwargs, signature, model_kwargs=self.model_kwargs)

# logits is a tuple, so we unpack it and return them as separate outputs
char_logits, bpe_logits, wp_logits = self.orig_forward(*args, **kwargs).logits

return {
"char_logits": char_logits,
"bpe_logits": bpe_logits,
"wp_logits": wp_logits,
}

self.patched_forward = patched_forward


class SAMModelPatcher(ModelPatcher):
def __init__(
self,
Expand Down
Loading

0 comments on commit dd2a2a2

Please sign in to comment.