consolidate as peft_model_dir

axolotl-ai-cloud · Sep 19, 2023 · 71b0fcb · 71b0fcb
1 parent 283e37e
commit 71b0fcb
Show file tree

Hide file tree

Showing 33 changed files with 50 additions and 49 deletions.
diff --git a/README.md b/README.md
@@ -94,7 +94,7 @@ accelerate launch -m axolotl.cli.train examples/openllama-3b/lora.yml
 
 # inference
 accelerate launch -m axolotl.cli.inference examples/openllama-3b/lora.yml \
-    --lora_model_dir="./lora-out"
+    --peft_model_dir="./lora-out"
 ```
 
 ## Installation
@@ -501,7 +501,7 @@ total_num_tokens:
 adapter: lora
 # if you already have a lora model trained that you want to load, put that here
 # lora hyperparameters
-lora_model_dir:
+peft_model_dir:
 lora_r: 8
 lora_alpha: 16
 lora_dropout: 0.05
@@ -738,7 +738,7 @@ Pass the appropriate flag to the train command:
 
 - Pretrained LORA:
   ```bash
-  python -m axolotl.cli.inference examples/your_config.yml --lora_model_dir="./lora-output-dir"
+  python -m axolotl.cli.inference examples/your_config.yml --peft_model_dir="./lora-output-dir"
   ```
 - Full weights finetune:
   ```bash
@@ -755,7 +755,7 @@ Pass the appropriate flag to the train command:
 Add below flag to train command above
 
 ```bash
-python3 -m axolotl.cli.merge_lora examples/your_config.yml --lora_model_dir="./completed-model" --load_in_8bit=False --load_in_4bit=False
+python3 -m axolotl.cli.merge_lora examples/your_config.yml --peft_model_dir="./completed-model" --load_in_8bit=False --load_in_4bit=False
 ```
 
 If you run out of CUDA memory, you can try to merge in system RAM with

diff --git a/examples/cerebras/btlm-ft.yml b/examples/cerebras/btlm-ft.yml
@@ -18,7 +18,7 @@ dataset_prepared_path: last_prepared_run
 val_set_size: 0.01
 
 adapter:
-lora_model_dir:
+peft_model_dir:
 sequence_len: 2048
 max_packed_sequence_len:
 sample_packing: false

diff --git a/examples/cerebras/qlora.yml b/examples/cerebras/qlora.yml
@@ -10,7 +10,7 @@ datasets:
 dataset_prepared_path: last_run_prepared
 val_set_size: 0.01
 adapter: qlora
-lora_model_dir:
+peft_model_dir:
 sequence_len: 2048
 max_packed_sequence_len: 2048
 lora_r: 16

diff --git a/examples/code-llama/13b/lora.yml b/examples/code-llama/13b/lora.yml
@@ -20,7 +20,7 @@ sample_packing: true
 pad_to_sequence_len: true
 
 adapter: lora
-lora_model_dir:
+peft_model_dir:
 lora_r: 32
 lora_alpha: 16
 lora_dropout: 0.05

diff --git a/examples/code-llama/13b/qlora.yml b/examples/code-llama/13b/qlora.yml
@@ -16,7 +16,7 @@ val_set_size: 0.01
 output_dir: ./qlora-out
 
 adapter: qlora
-lora_model_dir:
+peft_model_dir:
 
 sequence_len: 4096
 sample_packing: true

diff --git a/examples/code-llama/34b/lora.yml b/examples/code-llama/34b/lora.yml
@@ -20,7 +20,7 @@ sample_packing: true
 pad_to_sequence_len: true
 
 adapter: lora
-lora_model_dir:
+peft_model_dir:
 lora_r: 32
 lora_alpha: 16
 lora_dropout: 0.05

diff --git a/examples/code-llama/34b/qlora.yml b/examples/code-llama/34b/qlora.yml
@@ -16,7 +16,7 @@ val_set_size: 0.01
 output_dir: ./qlora-out
 
 adapter: qlora
-lora_model_dir:
+peft_model_dir:
 
 sequence_len: 4096
 sample_packing: true

diff --git a/examples/code-llama/7b/lora.yml b/examples/code-llama/7b/lora.yml
@@ -20,7 +20,7 @@ sample_packing: true
 pad_to_sequence_len: true
 
 adapter: lora
-lora_model_dir:
+peft_model_dir:
 lora_r: 32
 lora_alpha: 16
 lora_dropout: 0.05

diff --git a/examples/code-llama/7b/qlora.yml b/examples/code-llama/7b/qlora.yml
@@ -16,7 +16,7 @@ val_set_size: 0.01
 output_dir: ./qlora-out
 
 adapter: qlora
-lora_model_dir:
+peft_model_dir:
 
 sequence_len: 4096
 sample_packing: true

diff --git a/examples/falcon/config-7b-lora.yml b/examples/falcon/config-7b-lora.yml
@@ -14,7 +14,7 @@ datasets:
 dataset_prepared_path: last_run_prepared
 val_set_size: 0.01
 adapter: lora
-lora_model_dir:
+peft_model_dir:
 sequence_len: 2048
 max_packed_sequence_len:
 lora_r: 16

diff --git a/examples/falcon/config-7b-qlora.yml b/examples/falcon/config-7b-qlora.yml
@@ -21,7 +21,7 @@ dataset_prepared_path: last_run_prepared
 val_set_size: 0.01
 # enable QLoRA
 adapter: qlora
-lora_model_dir:
+peft_model_dir:
 sequence_len: 2048
 max_packed_sequence_len:
 

diff --git a/examples/falcon/config-7b.yml b/examples/falcon/config-7b.yml
@@ -14,7 +14,7 @@ datasets:
 dataset_prepared_path: last_run_prepared
 val_set_size: 0.01
 adapter:
-lora_model_dir:
+peft_model_dir:
 sequence_len: 2048
 max_packed_sequence_len:
 lora_r: 64

diff --git a/examples/gptj/qlora.yml b/examples/gptj/qlora.yml
@@ -10,7 +10,7 @@ datasets:
 dataset_prepared_path: last_run_prepared
 val_set_size: 0.01
 adapter: qlora
-lora_model_dir:
+peft_model_dir:
 sequence_len: 2048
 max_packed_sequence_len:
 lora_r: 8

diff --git a/examples/jeopardy-bot/config.yml b/examples/jeopardy-bot/config.yml
@@ -9,7 +9,7 @@ datasets:
 dataset_prepared_path: last_run_prepared
 val_set_size: 0.02
 adapter:
-lora_model_dir:
+peft_model_dir:
 sequence_len: 512
 max_packed_sequence_len:
 lora_r:

diff --git a/examples/llama-2/gptq-lora.yml b/examples/llama-2/gptq-lora.yml
@@ -18,7 +18,7 @@ datasets:
 dataset_prepared_path: last_run_prepared
 val_set_size: 0.01
 adapter: lora
-lora_model_dir:
+peft_model_dir:
 sequence_len: 4096
 sample_packing:
 lora_r: 8

diff --git a/examples/llama-2/ia3.yml b/examples/llama-2/ia3.yml
@@ -20,7 +20,7 @@ sample_packing: true
 pad_to_sequence_len: true
 
 adapter: ia3
-ia3_model_dir:
+peft_model_dir:
 ia3_target_modules:
   - k_proj
   - v_proj

diff --git a/examples/llama-2/lora.yml b/examples/llama-2/lora.yml
@@ -20,7 +20,7 @@ sample_packing: true
 pad_to_sequence_len: true
 
 adapter: lora
-lora_model_dir:
+peft_model_dir:
 lora_r: 32
 lora_alpha: 16
 lora_dropout: 0.05

diff --git a/examples/llama-2/qlora.yml b/examples/llama-2/qlora.yml
@@ -16,7 +16,7 @@ val_set_size: 0.01
 output_dir: ./qlora-out
 
 adapter: qlora
-lora_model_dir:
+peft_model_dir:
 
 sequence_len: 4096
 sample_packing: true

diff --git a/examples/llama-2/relora.yml b/examples/llama-2/relora.yml
@@ -16,7 +16,7 @@ val_set_size: 0.01
 output_dir: ./relora-out
 
 adapter: qlora
-lora_model_dir:
+peft_model_dir:
 
 sequence_len: 4096
 sample_packing: true

diff --git a/examples/llama-2/tiny-llama.yml b/examples/llama-2/tiny-llama.yml
@@ -20,7 +20,7 @@ sequence_len: 4096
 sample_packing: true
 
 adapter: lora
-lora_model_dir:
+peft_model_dir:
 lora_r: 32
 lora_alpha: 16
 lora_dropout: 0.05

diff --git a/examples/mpt-7b/config.yml b/examples/mpt-7b/config.yml
@@ -9,7 +9,7 @@ datasets:
 dataset_prepared_path: last_run_prepared
 val_set_size: 0.02
 adapter:
-lora_model_dir:
+peft_model_dir:
 sequence_len: 2048
 max_packed_sequence_len:
 lora_r: 8

diff --git a/examples/openllama-3b/config.yml b/examples/openllama-3b/config.yml
@@ -12,7 +12,7 @@ datasets:
 dataset_prepared_path: last_run_prepared
 val_set_size: 0.02
 adapter:
-lora_model_dir:
+peft_model_dir:
 sequence_len: 256
 max_packed_sequence_len:
 lora_r:

diff --git a/examples/openllama-3b/lora.yml b/examples/openllama-3b/lora.yml
@@ -12,7 +12,7 @@ datasets:
 dataset_prepared_path: last_run_prepared
 val_set_size: 0.02
 adapter: lora
-lora_model_dir:
+peft_model_dir:
 sequence_len: 256
 max_packed_sequence_len:
 lora_r: 8

diff --git a/examples/openllama-3b/qlora.yml b/examples/openllama-3b/qlora.yml
@@ -12,7 +12,7 @@ datasets:
 dataset_prepared_path: last_run_prepared
 val_set_size: 0.01
 adapter: qlora
-lora_model_dir:
+peft_model_dir:
 sequence_len: 2048
 max_packed_sequence_len: 2048
 lora_r: 8

diff --git a/examples/phi/phi-ft.yml b/examples/phi/phi-ft.yml
@@ -22,7 +22,7 @@ sample_packing: true
 pad_to_sequence_len:
 
 adapter:
-lora_model_dir:
+peft_model_dir:
 lora_r:
 lora_alpha:
 lora_dropout:

diff --git a/examples/phi/phi-qlora.yml b/examples/phi/phi-qlora.yml
@@ -22,7 +22,7 @@ sample_packing: false  # not CURRENTLY compatible with LoRAs
 pad_to_sequence_len:
 
 adapter: qlora
-lora_model_dir:
+peft_model_dir:
 lora_r: 64
 lora_alpha: 32
 lora_dropout: 0.05

diff --git a/examples/pythia-12b/config.yml b/examples/pythia-12b/config.yml
@@ -13,7 +13,7 @@ datasets:
 dataset_prepared_path: last_run_prepared
 val_set_size: 0.05
 adapter:
-lora_model_dir:
+peft_model_dir:
 sequence_len: 2048
 max_packed_sequence_len: 2048
 lora_r: 64

diff --git a/examples/pythia/lora.yml b/examples/pythia/lora.yml
@@ -7,7 +7,7 @@ datasets:
 dataset_prepared_path: last_run_prepared
 val_set_size: 0.05
 adapter: lora
-lora_model_dir:
+peft_model_dir:
 sequence_len: 512
 lora_r: 16
 lora_alpha: 32

diff --git a/examples/redpajama/config-3b.yml b/examples/redpajama/config-3b.yml
@@ -10,7 +10,7 @@ datasets:
 dataset_prepared_path: last_run_prepared
 val_set_size: 0.02
 adapter:
-lora_model_dir:
+peft_model_dir:
 sequence_len: 2048
 max_packed_sequence_len:
 lora_r: 8

diff --git a/examples/replit-3b/config-lora.yml b/examples/replit-3b/config-lora.yml
@@ -8,7 +8,7 @@ datasets:
 dataset_prepared_path: last_run_prepared
 val_set_size: 0.05
 adapter: lora
-lora_model_dir:
+peft_model_dir:
 sequence_len: 2048
 max_packed_sequence_len:
 lora_r: 8

diff --git a/examples/xgen-7b/xgen-7b-8k-qlora.yml b/examples/xgen-7b/xgen-7b-8k-qlora.yml
@@ -20,7 +20,7 @@ dataset_prepared_path: last_run_prepared
 val_set_size: 0.01
 # enable QLoRA
 adapter: qlora
-lora_model_dir:
+peft_model_dir:
 sequence_len: 8192
 max_packed_sequence_len:
 

diff --git a/src/axolotl/utils/config.py b/src/axolotl/utils/config.py
@@ -152,7 +152,10 @@ def validate_config(cfg):
                 raise ValueError("Require cfg.load_in_4bit to be True for qlora")
 
     if not cfg.load_in_8bit and cfg.adapter == "lora":
-        LOG.warning("We recommend setting `load_in_8bit: true` for LORA finetuning")
+        LOG.warning("We recommend setting `load_in_8bit: true` for LoRA finetuning")
+
+    if not cfg.load_in_8bit and cfg.adapter == "ia3":
+        LOG.warning("We recommend setting `load_in_8bit: true` for IA3 finetuning")
 
     if cfg.relora_steps:
         if cfg.adapter not in ("lora", "qlora"):

diff --git a/src/axolotl/utils/models.py b/src/axolotl/utils/models.py
@@ -366,23 +366,21 @@ def load_model(
             if hasattr(module, "weight"):
                 module.to(torch.float32)
 
-    needs_fa2_dtype = cfg.adapter or cfg.fsdp
-    if (
-        (cfg.adapter == "lora" and cfg.load_in_8bit)
-        or (cfg.adapter == "qlora" and cfg.load_in_4bit)
-        or (cfg.adapter == "ia3" and cfg.load_in_8bit)
-    ):
+    require_peft: bool = False
+    if cfg.adapter in ["lora", "qlora", "ia3"]:
+        require_peft = True
+
+    if require_peft:
         LOG.info("converting PEFT model w/ prepare_model_for_kbit_training")
         if cfg.gradient_checkpointing:
             model.gradient_checkpointing_enable()
         model = prepare_model_for_kbit_training(
             model, use_gradient_checkpointing=cfg.gradient_checkpointing
         )
-        needs_fa2_dtype = True
 
     # LlamaRMSNorm layers are in fp32 after kbit_training or full finetune, so we need to
     # convert them back to fp16/bf16 for flash-attn compatibility.
-    if needs_fa2_dtype or (cfg.flash_attention and cfg.is_llama_derived_model):
+    if require_peft or cfg.fsdp or (cfg.flash_attention and cfg.is_llama_derived_model):
         LOG.info("converting modules to %s for flash attention", cfg.torch_dtype)
         for name, module in model.named_modules():
             if "norm" in name:
@@ -452,11 +450,11 @@ def load_llama_adapter(model, cfg):
         task_type="CAUSAL_LM",
     )
 
-    if cfg.lora_model_dir:
+    if cfg.peft_model_dir or cfg.lora_model_dir:
         LOG.debug("Loading pretained PEFT - llama_adapter")
         model = PeftModel.from_pretrained(
             model,
-            cfg.lora_model_dir,
+            cfg.peft_model_dir or cfg.lora_model_dir,
             torch_dtype=torch.float16,
         )
     else:
@@ -504,11 +502,11 @@ def load_lora(model, cfg, inference=False):
         task_type="CAUSAL_LM",
     )
 
-    if cfg.lora_model_dir:
+    if cfg.peft_model_dir:
         LOG.debug("Loading pretained PEFT - LoRA")
         model = PeftModel.from_pretrained(
             model,
-            cfg.lora_model_dir,
+            cfg.peft_model_dir,
             is_trainable=(not inference),
         )
     else:
@@ -537,11 +535,11 @@ def load_ia3(model, cfg, inference=False):
         **ia3_config_kwargs,
     )
 
-    if cfg.ia3_model_dir:
+    if cfg.peft_model_dir:
         LOG.debug("Loading pretained PEFT - IA3")
         model = PeftModel.from_pretrained(
             model,
-            cfg.ia3_model_dir,
+            cfg.peft_model_dir,
             is_trainable=(not inference),
         )
     else: