Skip to content

Commit

Permalink
Fix warning message for fp32_cpu_offloading in bitsandbytes configs (#…
Browse files Browse the repository at this point in the history
…34079)

* change cpu offload warning for fp8 quantization

* change cpu offload warning for fp4 quantization

* change cpu offload variable name for fp8 and fp4 quantization
  • Loading branch information
amosyou authored Oct 17, 2024
1 parent 6d2b203 commit aa3e35a
Show file tree
Hide file tree
Showing 2 changed files with 6 additions and 6 deletions.
6 changes: 3 additions & 3 deletions src/transformers/quantizers/quantizer_bnb_4bit.py
Original file line number Diff line number Diff line change
Expand Up @@ -102,7 +102,7 @@ def validate_environment(self, *args, **kwargs):
raise ValueError(
"Some modules are dispatched on the CPU or the disk. Make sure you have enough GPU RAM to fit the "
"quantized model. If you want to dispatch the model on the CPU or the disk while keeping these modules "
"in 32-bit, you need to set `load_in_8bit_fp32_cpu_offload=True` and pass a custom `device_map` to "
"in 32-bit, you need to set `llm_int8_enable_fp32_cpu_offload=True` and pass a custom `device_map` to "
"`from_pretrained`. Check "
"https://huggingface.co/docs/transformers/main/en/main_classes/quantization#offload-between-cpu-and-gpu "
"for more details. "
Expand Down Expand Up @@ -285,7 +285,7 @@ def _process_model_before_weight_loading(
):
from ..integrations import get_keys_to_not_convert, replace_with_bnb_linear

load_in_8bit_fp32_cpu_offload = self.quantization_config.llm_int8_enable_fp32_cpu_offload
llm_int8_enable_fp32_cpu_offload = self.quantization_config.llm_int8_enable_fp32_cpu_offload

# We keep some modules such as the lm_head in their original dtype for numerical stability reasons
if self.quantization_config.llm_int8_skip_modules is None:
Expand All @@ -302,7 +302,7 @@ def _process_model_before_weight_loading(
if isinstance(device_map, dict) and len(device_map.keys()) > 1:
keys_on_cpu = [key for key, value in device_map.items() if value in ["disk", "cpu"]]

if len(keys_on_cpu) > 0 and not load_in_8bit_fp32_cpu_offload:
if len(keys_on_cpu) > 0 and not llm_int8_enable_fp32_cpu_offload:
raise ValueError(
"If you want to offload some keys to `cpu` or `disk`, you need to set "
"`llm_int8_enable_fp32_cpu_offload=True`. Note that these modules will not be "
Expand Down
6 changes: 3 additions & 3 deletions src/transformers/quantizers/quantizer_bnb_8bit.py
Original file line number Diff line number Diff line change
Expand Up @@ -101,7 +101,7 @@ def validate_environment(self, *args, **kwargs):
raise ValueError(
"Some modules are dispatched on the CPU or the disk. Make sure you have enough GPU RAM to fit the "
"quantized model. If you want to dispatch the model on the CPU or the disk while keeping these modules "
"in 32-bit, you need to set `load_in_8bit_fp32_cpu_offload=True` and pass a custom `device_map` to "
"in 32-bit, you need to set `llm_int8_enable_fp32_cpu_offload=True` and pass a custom `device_map` to "
"`from_pretrained`. Check "
"https://huggingface.co/docs/transformers/main/en/main_classes/quantization#offload-between-cpu-and-gpu "
"for more details. "
Expand Down Expand Up @@ -250,7 +250,7 @@ def _process_model_before_weight_loading(
):
from ..integrations import get_keys_to_not_convert, replace_with_bnb_linear

load_in_8bit_fp32_cpu_offload = self.quantization_config.llm_int8_enable_fp32_cpu_offload
llm_int8_enable_fp32_cpu_offload = self.quantization_config.llm_int8_enable_fp32_cpu_offload

# We keep some modules such as the lm_head in their original dtype for numerical stability reasons
if self.quantization_config.llm_int8_skip_modules is None:
Expand All @@ -267,7 +267,7 @@ def _process_model_before_weight_loading(
if isinstance(device_map, dict) and len(device_map.keys()) > 1:
keys_on_cpu = [key for key, value in device_map.items() if value in ["disk", "cpu"]]

if len(keys_on_cpu) > 0 and not load_in_8bit_fp32_cpu_offload:
if len(keys_on_cpu) > 0 and not llm_int8_enable_fp32_cpu_offload:
raise ValueError(
"If you want to offload some keys to `cpu` or `disk`, you need to set "
"`llm_int8_enable_fp32_cpu_offload=True`. Note that these modules will not be "
Expand Down

0 comments on commit aa3e35a

Please sign in to comment.