Skip to content

Commit

Permalink
fix so inference can be run against quantized models without adapters (
Browse files Browse the repository at this point in the history
…#1834)

* fix so inference can be run against quantized models without adapters

* Update error msg [skip e2e]

Co-authored-by: NanoCode012 <[email protected]>

---------

Co-authored-by: NanoCode012 <[email protected]>
  • Loading branch information
winglian and NanoCode012 authored Dec 3, 2024
1 parent ff4794c commit b9bb024
Show file tree
Hide file tree
Showing 2 changed files with 6 additions and 4 deletions.
2 changes: 1 addition & 1 deletion src/axolotl/cli/inference.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@
def do_cli(config: Path = Path("examples/"), gradio=False, **kwargs):
# pylint: disable=duplicate-code
print_axolotl_text_art()
parsed_cfg = load_cfg(config, **kwargs)
parsed_cfg = load_cfg(config, inference=True, **kwargs)
parsed_cfg.sample_packing = False
parser = transformers.HfArgumentParser((TrainerCliArgs))
parsed_cli_args, _ = parser.parse_args_into_dataclasses(
Expand Down
8 changes: 5 additions & 3 deletions src/axolotl/utils/config/models/input/v0_4_1/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -323,11 +323,13 @@ class LoraConfig(BaseModel):
@model_validator(mode="before")
@classmethod
def validate_adapter(cls, data):
if not data.get("adapter") and (
data.get("load_in_8bit") or data.get("load_in_4bit")
if (
not data.get("adapter")
and not data.get("inference")
and (data.get("load_in_8bit") or data.get("load_in_4bit"))
):
raise ValueError(
"load_in_8bit and load_in_4bit are not supported without setting an adapter."
"load_in_8bit and load_in_4bit are not supported without setting an adapter for training."
"If you want to full finetune, please turn off load_in_8bit and load_in_4bit."
)
return data
Expand Down

0 comments on commit b9bb024

Please sign in to comment.