bug-fix - remove unnecessary call to apply quant lifecycle

neuralmagic · Aug 22, 2024 · e1ae504 · e1ae504
1 parent 2ecf711
commit e1ae504
Show file tree

Hide file tree

Showing 2 changed files with 1 addition and 7 deletions.
diff --git a/src/transformers/quantizers/quantizer_compressed_tensors.py b/src/transformers/quantizers/quantizer_compressed_tensors.py
@@ -76,12 +76,6 @@ def _is_decompressed_key(key: str) -> bool:
 
         return [key for key in missing_keys if not _is_decompressed_key(key)]
 
-    def _process_model_before_weight_loading(self, model, **kwargs):
-        if self.quantization_config.quantization_config is not None:
-            from compressed_tensors.quantization import apply_quantization_config
-
-            apply_quantization_config(model, self.quantization_config.quantization_config)
-
     def _process_model_after_weight_loading(self, model, resolved_archive_file, **kwargs):
         self.compressor.decompress(model_path=resolved_archive_file, model=model)
 

diff --git a/tests/quantization/compressed_tensor/test_compressed_tensors.py b/tests/quantization/compressed_tensor/test_compressed_tensors.py
@@ -52,7 +52,7 @@ def test_llama_8b_fp8(self):
 
     def _test_quantized_model(self, model_name: str):
         """Carry out generation"""
-        quantized_model = AutoModelForCausalLM.from_pretrained(model_name)
+        quantized_model = AutoModelForCausalLM.from_pretrained(model_name, device_map="auto")
         tokenizer = AutoTokenizer.from_pretrained(model_name)
         device = quantized_model.device
         self.assertIsNotNone(