WA shapeof issue

huggingface · Jun 27, 2024 · 0684b0a · 0684b0a
1 parent 494c5e9
commit 0684b0a
Show file tree

Hide file tree

Showing 5 changed files with 18 additions and 85 deletions.
diff --git a/optimum/exporters/openvino/__main__.py b/optimum/exporters/openvino/__main__.py
@@ -22,9 +22,10 @@
 from transformers import AutoConfig, AutoTokenizer, PreTrainedTokenizerBase
 
 from optimum.exporters import TasksManager
+from optimum.exporters.onnx.constants import SDPA_ARCHS_ONNX_EXPORT_NOT_SUPPORTED
 from optimum.exporters.onnx.base import OnnxConfig
 from optimum.exporters.openvino.convert import export_from_model
-from optimum.intel.utils.import_utils import is_openvino_tokenizers_available
+from optimum.intel.utils.import_utils import is_openvino_tokenizers_available, is_transformers_version
 from optimum.utils.save_utils import maybe_load_preprocessors
 
 
@@ -240,8 +241,8 @@ def main_export(
                 f"Asked to export a {model_type} model for the task {task}{autodetected_message}, but the Optimum OpenVINO exporter only supports the tasks {', '.join(model_tasks.keys())} for {model_type}. Please use a supported task. Please open an issue at https://github.com/huggingface/optimum/issues if you would like the task {task} to be supported in the ONNX export for {model_type}."
             )
 
-        # if is_transformers_version(">=", "4.36") and model_type in SDPA_ARCHS_ONNX_EXPORT_NOT_SUPPORTED:
-        #     loading_kwargs["attn_implementation"] = "eager"
+        if is_transformers_version(">=", "4.36") and model_type in SDPA_ARCHS_ONNX_EXPORT_NOT_SUPPORTED:
+            loading_kwargs["attn_implementation"] = "eager"
         # there are some difference between remote and in library representation of past key values for some models,
         # for avoiding confusion we disable remote code for them
         if (

diff --git a/optimum/exporters/openvino/model_configs.py b/optimum/exporters/openvino/model_configs.py
@@ -30,9 +30,7 @@
     M2M100OnnxConfig,
     MPTOnnxConfig,
     PhiOnnxConfig,
-    Pix2StructOnnxConfig,
     T5OnnxConfig,
-    TrOCROnnxConfig,
     UNetOnnxConfig,
     VaeDecoderOnnxConfig,
     VaeEncoderOnnxConfig,
@@ -853,7 +851,7 @@ def _create_dummy_input_generator_classes(self, **kwargs) -> List[DummyInputGene
         forces the other generators to use the same batch size, meaning they will all produce inputs of the same batch
         size. Override this method for custom behavior.
         """
-        if getattr(self, "stateful"):
+        if getattr(self, "stateful", False):
             if "encoder_sequence_length" not in kwargs:
                 sequence_len = kwargs.get("sequence_length", DEFAULT_DUMMY_SHAPES["sequence_length"])
                 kwargs["encoder_sequence_length"] = sequence_len + 2
@@ -1039,74 +1037,3 @@ class MarianOpenVINOConfig(M2M100OpenVINOConfig):
 )
 class PegasusOpenVINOConfig(M2M100OpenVINOConfig):
     pass
-
-
-@register_in_tasks_manager(
-    "pix2struct",
-    *[
-        "image-to-text",
-        "image-to-text-with-past",
-    ],
-    library_name="transformers",
-)
-class Pix2StructOpenVINOConfig(Pix2StructOnnxConfig):
-    def _create_dummy_input_generator_classes(self, **kwargs) -> List["DummyInputGenerator"]:
-        dummy_inputs_generators = []
-        dummy_inputs_generators.append(self.DUMMY_INPUT_GENERATOR_CLASSES[0](self.task, self._normalized_config))
-
-        if self._preprocessors is None or len(self._preprocessors) != 2:
-            raise ValueError(
-                f"Preprocessors for pix2struct need to be available for the ONNX export to infer input static shapes. Got: {self._preprocessors}"
-            )
-
-        encoder_sequence_length = self._preprocessors[1].image_processor.max_patches
-        if getattr(self, "stateful", False):
-            encoder_sequence_length += 2
-        # A hack for DummyPix2StructInputGenerator to gain access to the preprocessors.
-        # TODO: we should probably pass preprocessors to all dummy input generators.
-        kwargs["preprocessors"] = self._preprocessors
-        for cls_ in self.DUMMY_INPUT_GENERATOR_CLASSES[1:]:
-            dummy_inputs_generators.append(
-                cls_(self.task, self._normalized_config, encoder_sequence_length=encoder_sequence_length, **kwargs)
-            )
-
-        return dummy_inputs_generators
-
-
-@register_in_tasks_manager(
-    "trocr",
-    *[
-        "feature-extraction",
-        "feature-extraction-with-past",
-        "image-to-text",
-        "image-to-text-with-past",
-    ],
-    library_name="transformers",
-)
-class TrOCROpenVINOConfig(TrOCROnnxConfig):
-    def _create_dummy_input_generator_classes(self, **kwargs) -> List["DummyInputGenerator"]:
-        dummy_text_input_generator = self.DUMMY_INPUT_GENERATOR_CLASSES[0](
-            self.task, self._normalized_config, **kwargs
-        )
-        dummy_decoder_text_input_generator = self.DUMMY_INPUT_GENERATOR_CLASSES[1](
-            self.task,
-            self._normalized_config,
-            **kwargs,
-        )
-        encoder_sequence_length = dummy_text_input_generator.sequence_length
-
-        if getattr(self, "stateful", False):
-            encoder_sequence_length += 2
-        dummy_seq2seq_past_key_values_generator = self.DUMMY_INPUT_GENERATOR_CLASSES[2](
-            self.task,
-            self._normalized_config,
-            encoder_sequence_length=encoder_sequence_length,
-            **kwargs,
-        )
-        dummy_inputs_generators = [
-            dummy_text_input_generator,
-            dummy_decoder_text_input_generator,
-            dummy_seq2seq_past_key_values_generator,
-        ]
-
-        return dummy_inputs_generators
diff --git a/optimum/exporters/openvino/stateful.py b/optimum/exporters/openvino/stateful.py
@@ -189,10 +189,7 @@ def ensure_export_task_support_stateful(task: str, is_encoder_decoder: bool = Fa
 
     _ENCODER_DECODER_TASKS_WITH_PAST = (
         "automatic-speech-recognition",
-        "document-question-answering",
-        "image-to-text",
         "text2text-generation",
-        "visual-question-answering",
     )
 
     is_stateful = task.endswith("-with-past") and task.replace("-with-past", "") in _ENCODER_DECODER_TASKS_WITH_PAST
@@ -223,14 +220,18 @@ def get_read_value_ops(model: ov.Model):
     return [op for op in model.get_ops() if op.get_type_name() == "ReadValue"]
 
 
+def get_shape_of_ops(model: ov.Model):
+    return [op for op in model.get_ops() if op.get_type_name() == "ShapeOf"]
+
+
 def get_consumer_nodes(node):
     consumer_inputs = set().union(*[output.get_target_inputs() for output in node.outputs()])
     return set(input.get_node() for input in consumer_inputs)
 
 
 def find_output_nodes_of_dependent_subgraph(model: ov.Model, sources: list):
     # Search for nodes in the model graph that depend on nodes in `starts` list but independent of other model Parameter's/ReadValue's
-    other_inputs = set(model.get_parameters() + get_read_value_ops(model)) - set(sources)
+    other_inputs = set(model.get_parameters() + get_read_value_ops(model) + get_shape_of_ops(model)) - set(sources)
     other_nodes = find_dependent_nodes(model, other_inputs)
     source_dependent_nodes = find_dependent_nodes(model, sources)
     # TODO: Use symbols on dimensions to filter out ShapeOf subexpressions that do not bring new symbols in the subgraph

diff --git a/optimum/intel/openvino/modeling_seq2seq.py b/optimum/intel/openvino/modeling_seq2seq.py
@@ -592,7 +592,7 @@ def forward(
         if "beam_idx" in self.input_names:
             batch_size = input_ids.shape[0]
             inputs["beam_idx"] = (
-                self.next_beam_idx if self.next_beam_idx is not None else np.arange(batch_size, dtype=int)
+                self.next_beam_idx if self.next_beam_idx is not None else np.arange(batch_size, dtype=np.int32)
             )
         # Run inference
         self.request.start_async(inputs, share_inputs=True)
@@ -753,7 +753,9 @@ def _reshape(self, model: openvino.runtime.Model, batch_size: int, sequence_leng
             if is_decoder:
                 if inputs.get_any_name().startswith("past_key_values"):
                     shapes[inputs][2] = -1
-                elif not inputs.get_any_name().startswith("encoder"):
+                elif not inputs.get_any_name().startswith("encoder") and not inputs.get_any_name().startswith(
+                    "beam_idx"
+                ):
                     shapes[inputs][1] = -1
         model.reshape(shapes)
         return model
@@ -836,7 +838,9 @@ def _reshape(self, model: openvino.runtime.Model, batch_size: int, sequence_leng
             if is_decoder:
                 if inputs.get_any_name().startswith("past_key_values"):
                     shapes[inputs][2] = -1
-                elif not inputs.get_any_name().startswith("encoder"):
+                elif not inputs.get_any_name().startswith("encoder") and not inputs.get_any_name().startswith(
+                    "beam_idx"
+                ):
                     shapes[inputs][1] = -1
         model.reshape(shapes)
         return model

diff --git a/tests/openvino/test_modeling.py b/tests/openvino/test_modeling.py
@@ -1194,7 +1194,7 @@ def test_compare_to_transformers(self, model_arch):
         self.assertIsInstance(ov_model.encoder, OVEncoder)
         self.assertIsInstance(ov_model.decoder, OVDecoder)
         self.assertTrue(ov_model.decoder.stateful)
-        self.assertIsInstance(ov_model.decoder_with_past, None)
+        self.assertTrue(ov_model.decoder_with_past is None)
         self.assertIsInstance(ov_model.config, PretrainedConfig)
 
         transformers_model = AutoModelForSeq2SeqLM.from_pretrained(model_id)