Skip to content

Commit

Permalink
bug-fix - remove unnecessary call to apply quant lifecycle
Browse files Browse the repository at this point in the history
  • Loading branch information
Benjamin committed Aug 22, 2024
1 parent 2ecf711 commit e1ae504
Show file tree
Hide file tree
Showing 2 changed files with 1 addition and 7 deletions.
6 changes: 0 additions & 6 deletions src/transformers/quantizers/quantizer_compressed_tensors.py
Original file line number Diff line number Diff line change
Expand Up @@ -76,12 +76,6 @@ def _is_decompressed_key(key: str) -> bool:

return [key for key in missing_keys if not _is_decompressed_key(key)]

def _process_model_before_weight_loading(self, model, **kwargs):
if self.quantization_config.quantization_config is not None:
from compressed_tensors.quantization import apply_quantization_config

apply_quantization_config(model, self.quantization_config.quantization_config)

def _process_model_after_weight_loading(self, model, resolved_archive_file, **kwargs):
self.compressor.decompress(model_path=resolved_archive_file, model=model)

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@ def test_llama_8b_fp8(self):

def _test_quantized_model(self, model_name: str):
"""Carry out generation"""
quantized_model = AutoModelForCausalLM.from_pretrained(model_name)
quantized_model = AutoModelForCausalLM.from_pretrained(model_name, device_map="auto")
tokenizer = AutoTokenizer.from_pretrained(model_name)
device = quantized_model.device
self.assertIsNotNone(
Expand Down

0 comments on commit e1ae504

Please sign in to comment.