From b9bb02406a11b1d6973e394bdea17afbfba2f8ba Mon Sep 17 00:00:00 2001 From: Wing Lian Date: Tue, 3 Dec 2024 00:02:38 -0500 Subject: [PATCH] fix so inference can be run against quantized models without adapters (#1834) * fix so inference can be run against quantized models without adapters * Update error msg [skip e2e] Co-authored-by: NanoCode012 --------- Co-authored-by: NanoCode012 --- src/axolotl/cli/inference.py | 2 +- src/axolotl/utils/config/models/input/v0_4_1/__init__.py | 8 +++++--- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/src/axolotl/cli/inference.py b/src/axolotl/cli/inference.py index adc991456d..b738e5c222 100644 --- a/src/axolotl/cli/inference.py +++ b/src/axolotl/cli/inference.py @@ -19,7 +19,7 @@ def do_cli(config: Path = Path("examples/"), gradio=False, **kwargs): # pylint: disable=duplicate-code print_axolotl_text_art() - parsed_cfg = load_cfg(config, **kwargs) + parsed_cfg = load_cfg(config, inference=True, **kwargs) parsed_cfg.sample_packing = False parser = transformers.HfArgumentParser((TrainerCliArgs)) parsed_cli_args, _ = parser.parse_args_into_dataclasses( diff --git a/src/axolotl/utils/config/models/input/v0_4_1/__init__.py b/src/axolotl/utils/config/models/input/v0_4_1/__init__.py index 0f01a7cadc..c9170b7a84 100644 --- a/src/axolotl/utils/config/models/input/v0_4_1/__init__.py +++ b/src/axolotl/utils/config/models/input/v0_4_1/__init__.py @@ -323,11 +323,13 @@ class LoraConfig(BaseModel): @model_validator(mode="before") @classmethod def validate_adapter(cls, data): - if not data.get("adapter") and ( - data.get("load_in_8bit") or data.get("load_in_4bit") + if ( + not data.get("adapter") + and not data.get("inference") + and (data.get("load_in_8bit") or data.get("load_in_4bit")) ): raise ValueError( - "load_in_8bit and load_in_4bit are not supported without setting an adapter." + "load_in_8bit and load_in_4bit are not supported without setting an adapter for training." "If you want to full finetune, please turn off load_in_8bit and load_in_4bit." ) return data