diff --git a/optimum/exporters/onnx/model_configs.py b/optimum/exporters/onnx/model_configs.py
index 5b1268f3c64..2da3f5bea6b 100644
--- a/optimum/exporters/onnx/model_configs.py
+++ b/optimum/exporters/onnx/model_configs.py
@@ -1291,7 +1291,10 @@ def inputs(self) -> Dict[str, Dict[int, str]]:
 
 
 class WhisperOnnxConfig(AudioToTextOnnxConfig):
-    NORMALIZED_CONFIG_CLASS = NormalizedSeq2SeqConfig
+    NORMALIZED_CONFIG_CLASS = NormalizedSeq2SeqConfig.with_args(
+        encoder_num_layers="encoder_layers",
+        decoder_num_layers="decoder_layers",
+    )
     ATOL_FOR_VALIDATION = 1e-3
 
     @property
diff --git a/optimum/gptq/quantizer.py b/optimum/gptq/quantizer.py
index 103015642c6..1704f92b3ef 100644
--- a/optimum/gptq/quantizer.py
+++ b/optimum/gptq/quantizer.py
@@ -245,10 +245,18 @@ def _replace_by_quant_layers(self, module: nn.Module, names: List[str], name: st
                     out_features = layer.weight.shape[1]
                 if not (self.desc_act) or self.group_size == -1:
                     new_layer = QuantLinear(
-                        self.bits, self.group_size, in_features, out_features, True, use_cuda_fp16=self.use_cuda_fp16, weight_dtype=layer.weight.dtype
+                        self.bits,
+                        self.group_size,
+                        in_features,
+                        out_features,
+                        True,
+                        use_cuda_fp16=self.use_cuda_fp16,
+                        weight_dtype=layer.weight.dtype,
                     )
                 else:
-                    new_layer = QuantLinear(self.bits, self.group_size, in_features, out_features, True, weight_dtype=layer.weight.dtype)
+                    new_layer = QuantLinear(
+                        self.bits, self.group_size, in_features, out_features, True, weight_dtype=layer.weight.dtype
+                    )
                 new_layer.device = device
                 setattr(module, attr, new_layer.to(device))
         for name1, child in module.named_children():