fix so inference can be run against quantized models without adapters (…

…#1834) * fix so inference can be run against quantized models without adapters * Update error msg [skip e2e] Co-authored-by: NanoCode012 <[email protected]> --------- Co-authored-by: NanoCode012 <[email protected]>
axolotl-ai-cloud · Dec 3, 2024 · b9bb024 · b9bb024
1 parent ff4794c
commit b9bb024
Show file tree

Hide file tree

Showing 2 changed files with 6 additions and 4 deletions.
diff --git a/src/axolotl/cli/inference.py b/src/axolotl/cli/inference.py
@@ -19,7 +19,7 @@
 def do_cli(config: Path = Path("examples/"), gradio=False, **kwargs):
     # pylint: disable=duplicate-code
     print_axolotl_text_art()
-    parsed_cfg = load_cfg(config, **kwargs)
+    parsed_cfg = load_cfg(config, inference=True, **kwargs)
     parsed_cfg.sample_packing = False
     parser = transformers.HfArgumentParser((TrainerCliArgs))
     parsed_cli_args, _ = parser.parse_args_into_dataclasses(

diff --git a/src/axolotl/utils/config/models/input/v0_4_1/__init__.py b/src/axolotl/utils/config/models/input/v0_4_1/__init__.py
@@ -323,11 +323,13 @@ class LoraConfig(BaseModel):
     @model_validator(mode="before")
     @classmethod
     def validate_adapter(cls, data):
-        if not data.get("adapter") and (
-            data.get("load_in_8bit") or data.get("load_in_4bit")
+        if (
+            not data.get("adapter")
+            and not data.get("inference")
+            and (data.get("load_in_8bit") or data.get("load_in_4bit"))
         ):
             raise ValueError(
-                "load_in_8bit and load_in_4bit are not supported without setting an adapter."
+                "load_in_8bit and load_in_4bit are not supported without setting an adapter for training."
                 "If you want to full finetune, please turn off load_in_8bit and load_in_4bit."
             )
         return data