diff --git a/optimum/intel/openvino/quantization.py b/optimum/intel/openvino/quantization.py index 6f739e254..75dab9366 100644 --- a/optimum/intel/openvino/quantization.py +++ b/optimum/intel/openvino/quantization.py @@ -44,7 +44,6 @@ from transformers.pytorch_utils import Conv1D from transformers.utils import is_accelerate_available -from optimum.exporters.onnx.convert import check_dummy_inputs_are_allowed from optimum.exporters.tasks import TasksManager from optimum.quantization_base import OptimumQuantizer @@ -524,6 +523,8 @@ def _quantize_torchmodel( quantization_config = ov_config.quantization_config if isinstance(quantization_config, OVWeightQuantizationConfig): + from optimum.exporters.onnx.convert import check_dummy_inputs_are_allowed + if stateful: # patch model before weight compression model = patch_model_with_bettertransformer(model)