From 7312b7ae44d088cc1b4ce4943ff9a1b19cabc0c2 Mon Sep 17 00:00:00 2001 From: jiqing-feng Date: Tue, 17 Dec 2024 06:59:40 -0500 Subject: [PATCH] fix all auto-gptq tests Signed-off-by: jiqing-feng --- optimum/gptq/quantizer.py | 11 +++++++++-- optimum/gptq/utils.py | 2 +- optimum/utils/testing_utils.py | 6 +++--- tests/gptq/test_quantization.py | 5 ++++- 4 files changed, 17 insertions(+), 7 deletions(-) diff --git a/optimum/gptq/quantizer.py b/optimum/gptq/quantizer.py index 976f6418b3b..6a845bb23db 100644 --- a/optimum/gptq/quantizer.py +++ b/optimum/gptq/quantizer.py @@ -32,7 +32,14 @@ from ..version import __version__ as optimum_version from .constants import GPTQ_CONFIG from .data import get_dataset, prepare_dataset -from .utils import get_block_name_with_pattern, get_device, get_layers, get_preceding_modules, get_seqlen +from .utils import ( + get_block_name_with_pattern, + get_device, + get_layers, + get_preceding_modules, + get_seqlen, + nested_move_to, +) if is_accelerate_available(): @@ -53,7 +60,7 @@ from gptqmodel import exllama_set_max_input_length from gptqmodel.quantization import GPTQ from gptqmodel.utils.importer import hf_select_quant_linear - from gptqmodel.utils.model import hf_convert_gptq_v1_to_v2_format, hf_convert_gptq_v2_to_v1_format, nested_move_to + from gptqmodel.utils.model import hf_convert_gptq_v1_to_v2_format, hf_convert_gptq_v2_to_v1_format from gptqmodel.utils.model import hf_gptqmodel_post_init as gptq_post_init from gptqmodel.version import __version__ as gptqmodel_version diff --git a/optimum/gptq/utils.py b/optimum/gptq/utils.py index 2b842253a42..732ecbd66b9 100644 --- a/optimum/gptq/utils.py +++ b/optimum/gptq/utils.py @@ -115,7 +115,7 @@ def get_seqlen(model: nn.Module): return 2048 -def move_to(obj: torch.Tensor | nn.Module, device: torch.device): +def move_to(obj: torch.Tensor, device: torch.device): if get_device(obj) != device: obj = obj.to(device) return obj diff --git a/optimum/utils/testing_utils.py b/optimum/utils/testing_utils.py index 5bf57f4d41b..7a5e27860a4 100644 --- a/optimum/utils/testing_utils.py +++ b/optimum/utils/testing_utils.py @@ -65,9 +65,9 @@ def require_gptq(test_case): """ Decorator marking a test that requires gptqmodel or auto-gptq. These tests are skipped when gptqmodel and auto-gptq are not installed. """ - return unittest.skipUnless(is_auto_gptq_available() or is_gptqmodel_available(), "test requires auto-gptq")( - test_case - ) + return unittest.skipUnless( + is_auto_gptq_available() or is_gptqmodel_available(), "test requires gptqmodel or auto-gptq" + )(test_case) def require_torch_gpu(test_case): diff --git a/tests/gptq/test_quantization.py b/tests/gptq/test_quantization.py index b6b50fb617d..0cf2416bb17 100644 --- a/tests/gptq/test_quantization.py +++ b/tests/gptq/test_quantization.py @@ -152,6 +152,9 @@ def test_serialization(self): """ Test the serialization of the model and the loading of the quantized weights """ + # AutoGPTQ does not support CPU + if self.device_map_for_quantization == "cpu" and not is_gptqmodel_available(): + return with tempfile.TemporaryDirectory() as tmpdirname: self.quantizer.save(self.quantized_model, tmpdirname) @@ -309,7 +312,7 @@ def test_exllama_serialization(self): device_map={"": self.device_for_inference}, ) self.check_quantized_layers_type( - quantized_model_from_saved, "exllama" if is_gptqmodel_available else "exllamav2" + quantized_model_from_saved, "exllama" if is_gptqmodel_available() else "exllamav2" ) # transformers and auto-gptq compatibility