diff --git a/optimum/exporters/onnx/model_configs.py b/optimum/exporters/onnx/model_configs.py index 5b1268f3c64..2da3f5bea6b 100644 --- a/optimum/exporters/onnx/model_configs.py +++ b/optimum/exporters/onnx/model_configs.py @@ -1291,7 +1291,10 @@ def inputs(self) -> Dict[str, Dict[int, str]]: class WhisperOnnxConfig(AudioToTextOnnxConfig): - NORMALIZED_CONFIG_CLASS = NormalizedSeq2SeqConfig + NORMALIZED_CONFIG_CLASS = NormalizedSeq2SeqConfig.with_args( + encoder_num_layers="encoder_layers", + decoder_num_layers="decoder_layers", + ) ATOL_FOR_VALIDATION = 1e-3 @property diff --git a/optimum/gptq/quantizer.py b/optimum/gptq/quantizer.py index 103015642c6..1704f92b3ef 100644 --- a/optimum/gptq/quantizer.py +++ b/optimum/gptq/quantizer.py @@ -245,10 +245,18 @@ def _replace_by_quant_layers(self, module: nn.Module, names: List[str], name: st out_features = layer.weight.shape[1] if not (self.desc_act) or self.group_size == -1: new_layer = QuantLinear( - self.bits, self.group_size, in_features, out_features, True, use_cuda_fp16=self.use_cuda_fp16, weight_dtype=layer.weight.dtype + self.bits, + self.group_size, + in_features, + out_features, + True, + use_cuda_fp16=self.use_cuda_fp16, + weight_dtype=layer.weight.dtype, ) else: - new_layer = QuantLinear(self.bits, self.group_size, in_features, out_features, True, weight_dtype=layer.weight.dtype) + new_layer = QuantLinear( + self.bits, self.group_size, in_features, out_features, True, weight_dtype=layer.weight.dtype + ) new_layer.device = device setattr(module, attr, new_layer.to(device)) for name1, child in module.named_children():