Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add ONNX Support for Decision Transformer Model #2038

Merged
merged 18 commits into from
Nov 25, 2024
Merged
Show file tree
Hide file tree
Changes from 10 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions docs/source/exporters/onnx/overview.mdx
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@ Supported architectures from [🤗 Transformers](https://huggingface.co/docs/tra
- Data2VecVision
- Deberta
- Deberta-v2
- Decision Transformer
- Deit
- Detr
- DistilBert
Expand Down
8 changes: 8 additions & 0 deletions optimum/exporters/onnx/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -173,6 +173,14 @@ class OnnxConfig(ExportConfig, ABC):
"end_logits": {0: "batch_size", 1: "sequence_length"},
}
),
"reinforcement-learning": OrderedDict(
{
"return_preds": {0: "batch_size", 1: "sequence_length"},
"action_preds": {0: "batch_size", 1: "sequence_length", 2: "act_dim"},
"state_preds": {0: "batch_size", 1: "sequence_length", 2: "state_dim"},
"last_hidden_state": {0: "batch_size", 1: "sequence_length", 2: "last_hidden_state"},
}
),
"semantic-segmentation": OrderedDict({"logits": {0: "batch_size", 1: "num_labels", 2: "height", 3: "width"}}),
"text2text-generation": OrderedDict({"logits": {0: "batch_size", 1: "decoder_sequence_length"}}),
"text-classification": OrderedDict({"logits": {0: "batch_size"}}),
Expand Down
18 changes: 18 additions & 0 deletions optimum/exporters/onnx/model_configs.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@
DummyAudioInputGenerator,
DummyCodegenDecoderTextInputGenerator,
DummyDecoderTextInputGenerator,
DummyDecisionTransformerInputGenerator,
DummyEncodecInputGenerator,
DummyInputGenerator,
DummyIntGenerator,
Expand Down Expand Up @@ -256,6 +257,23 @@ class ImageGPTOnnxConfig(GPT2OnnxConfig):
pass


class DecisionTransformerOnnxConfig(GPT2OnnxConfig):
DUMMY_INPUT_GENERATOR_CLASSES = (
DummyDecisionTransformerInputGenerator,
)

@property
def inputs(self) -> Dict[str, Dict[int, str]]:

return {
'timesteps': {0: 'batch_size', 1: 'sequence_length'},
'returns_to_go': {0: 'batch_size', 1: 'sequence_length'},
'attention_mask': {0: 'batch_size', 1: 'sequence_length'},
'actions': {0: 'batch_size', 1: 'sequence_length', 2: 'act_dim'},
'states': {0: 'batch_size', 1: 'sequence_length', 2: 'state_dim'},
}


class GPTNeoOnnxConfig(TextDecoderWithPositionIdsOnnxConfig):
DEFAULT_ONNX_OPSET = 14
NORMALIZED_CONFIG_CLASS = NormalizedTextConfig.with_args(num_attention_heads="num_heads")
Expand Down
10 changes: 10 additions & 0 deletions optimum/exporters/tasks.py
Original file line number Diff line number Diff line change
Expand Up @@ -217,6 +217,7 @@ class TasksManager:
"multiple-choice": "AutoModelForMultipleChoice",
"object-detection": "AutoModelForObjectDetection",
"question-answering": "AutoModelForQuestionAnswering",
"reinforcement-learning": "AutoModel",
IlyasMoutawwakil marked this conversation as resolved.
Show resolved Hide resolved
IlyasMoutawwakil marked this conversation as resolved.
Show resolved Hide resolved
"semantic-segmentation": "AutoModelForSemanticSegmentation",
"text-to-audio": ("AutoModelForTextToSpectrogram", "AutoModelForTextToWaveform"),
"text-generation": "AutoModelForCausalLM",
Expand Down Expand Up @@ -562,6 +563,12 @@ class TasksManager:
onnx="DebertaV2OnnxConfig",
tflite="DebertaV2TFLiteConfig",
),
"decision-transformer": supported_tasks_mapping(
"feature-extraction",
"feature-extraction-with-past",
"reinforcement-learning",
onnx="DecisionTransformerOnnxConfig",
),
"deit": supported_tasks_mapping(
"feature-extraction",
"image-classification",
Expand Down Expand Up @@ -2061,6 +2068,9 @@ def get_model_from_task(
if original_task == "automatic-speech-recognition" or task == "automatic-speech-recognition":
if original_task == "auto" and config.architectures is not None:
model_class_name = config.architectures[0]
elif original_task == "reinforcement-learning" or task == "reinforcement-learning":
if config.architectures is not None:
model_class_name = config.architectures[0]

if library_name == "diffusers":
config = DiffusionPipeline.load_config(model_name_or_path, **kwargs)
Expand Down
1 change: 1 addition & 0 deletions optimum/utils/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,7 @@
DummyAudioInputGenerator,
DummyBboxInputGenerator,
DummyCodegenDecoderTextInputGenerator,
DummyDecisionTransformerInputGenerator,
DummyDecoderTextInputGenerator,
DummyEncodecInputGenerator,
DummyInputGenerator,
Expand Down
35 changes: 35 additions & 0 deletions optimum/utils/input_generators.py
Original file line number Diff line number Diff line change
Expand Up @@ -507,6 +507,41 @@ class DummyDecoderTextInputGenerator(DummyTextInputGenerator):
)


class DummyDecisionTransformerInputGenerator(DummyTextInputGenerator):
"""
Generates dummy decision transformer inputs.
"""

SUPPORTED_INPUT_NAMES = (
'actions',
'timesteps',
'attention_mask',
'returns_to_go',
'states',
)

IlyasMoutawwakil marked this conversation as resolved.
Show resolved Hide resolved
def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
self.act_dim = self.normalized_config.config.act_dim
self.state_dim = self.normalized_config.config.state_dim
self.max_ep_len = self.normalized_config.config.max_ep_len

def generate(self, input_name: str, framework: str = "pt", int_dtype: str = "int64", float_dtype: str = "fp32"):
if input_name == "states":
shape = [self.batch_size, self.sequence_length, self.state_dim]
elif input_name == "actions":
shape = [self.batch_size, self.sequence_length, self.act_dim]
elif input_name == 'returns_to_go':
shape = [self.batch_size, self.sequence_length, 1]
elif input_name == "attention_mask":
shape = [self.batch_size, self.sequence_length]
elif input_name == 'timesteps':
shape = [self.batch_size, self.sequence_length]
return self.random_int_tensor(shape=shape, max_value=self.max_ep_len, framework=framework, dtype=int_dtype)

return self.random_float_tensor(shape, min_value=-2., max_value=2., framework=framework, dtype=float_dtype)


class DummySeq2SeqDecoderTextInputGenerator(DummyDecoderTextInputGenerator):
SUPPORTED_INPUT_NAMES = (
"decoder_input_ids",
Expand Down
10 changes: 10 additions & 0 deletions optimum/utils/normalized_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -85,6 +85,15 @@ class NormalizedTextConfig(NormalizedConfig):
EOS_TOKEN_ID = "eos_token_id"


class NormalizedDecisionTransformerConfig(NormalizedConfig):
# REFERENCE: https://huggingface.co/docs/transformers/model_doc/decision_transformer
ACT_DIM = "act_dim"
STATE_DIM = "state_dim"

MAX_EP_LEN = "max_ep_len"
HIDDEN_SIZE = "hidden_size"


class NormalizedTextConfigWithGQA(NormalizedTextConfig):
NUM_KEY_VALUE_HEADS = "num_key_value_heads"

Expand Down Expand Up @@ -236,6 +245,7 @@ class NormalizedConfigManager:
"cvt": NormalizedVisionConfig,
"deberta": NormalizedTextConfig,
"deberta-v2": NormalizedTextConfig,
"decision-transformer": NormalizedDecisionTransformerConfig,
"deit": NormalizedVisionConfig,
"distilbert": NormalizedTextConfig.with_args(num_attention_heads="n_heads", hidden_size="dim"),
"donut-swin": NormalizedVisionConfig,
Expand Down
1 change: 1 addition & 0 deletions tests/exporters/exporters_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,7 @@
"data2vec-audio": "hf-internal-testing/tiny-random-Data2VecAudioModel",
"deberta": "hf-internal-testing/tiny-random-DebertaModel",
"deberta-v2": "hf-internal-testing/tiny-random-DebertaV2Model",
"decision-transformer": "edbeeching/decision-transformer-gym-hopper-medium",
IlyasMoutawwakil marked this conversation as resolved.
Show resolved Hide resolved
"deit": "hf-internal-testing/tiny-random-DeiTModel",
"donut": "fxmarty/tiny-doc-qa-vision-encoder-decoder",
"donut-swin": "hf-internal-testing/tiny-random-DonutSwinModel",
Expand Down
Loading