-
-
Notifications
You must be signed in to change notification settings - Fork 894
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
memoize dataset length for eval sample packing (#1974)
* wip on multimodal sample packing support * wip on multimodal packing support * llama-1b-yml * setup logging for test * yml * yml * yml * fix for __len__ for eval sample packing * reverted irrelavant changes * reformatted, reverted log message * reverted unnecessary changes * added e2e multigpu testing for eval sample packing * formatting * fixed e2e test_eval params * fix test_eval e2e multigpu * fix test_eval e2e multigpu * Update tests/e2e/multigpu/test_eval.py Co-authored-by: Wing Lian <[email protected]> * Update tests/e2e/multigpu/test_eval.py Co-authored-by: Wing Lian <[email protected]> --------- Co-authored-by: Wing Lian <[email protected]>
- Loading branch information
1 parent
54673fd
commit f62e237
Showing
3 changed files
with
239 additions
and
6 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,77 @@ | ||
base_model: meta-llama/Llama-3.2-1B | ||
|
||
load_in_8bit: false | ||
load_in_4bit: true | ||
strict: false | ||
|
||
datasets: | ||
- path: teknium/GPT4-LLM-Cleaned | ||
type: alpaca | ||
dataset_prepared_path: last_run_prepared | ||
val_set_size: 0.1 | ||
output_dir: ./outputs/qlora-out | ||
|
||
adapter: qlora | ||
lora_model_dir: | ||
|
||
sequence_len: 2048 | ||
sample_packing: true | ||
eval_sample_packing: true | ||
pad_to_sequence_len: true | ||
|
||
lora_r: 32 | ||
lora_alpha: 16 | ||
lora_dropout: 0.05 | ||
lora_target_linear: true | ||
lora_fan_in_fan_out: | ||
lora_target_modules: | ||
- gate_proj | ||
- down_proj | ||
- up_proj | ||
- q_proj | ||
- v_proj | ||
- k_proj | ||
- o_proj | ||
|
||
wandb_project: | ||
wandb_entity: | ||
wandb_watch: | ||
wandb_name: | ||
wandb_log_model: | ||
|
||
gradient_accumulation_steps: 4 | ||
micro_batch_size: 2 | ||
num_epochs: 1 | ||
optimizer: adamw_bnb_8bit | ||
lr_scheduler: cosine | ||
learning_rate: 0.0002 | ||
|
||
train_on_inputs: false | ||
group_by_length: false | ||
bf16: auto | ||
fp16: | ||
tf32: false | ||
|
||
gradient_checkpointing: true | ||
early_stopping_patience: | ||
resume_from_checkpoint: | ||
local_rank: | ||
logging_steps: 1 | ||
xformers_attention: | ||
flash_attention: true | ||
|
||
loss_watchdog_threshold: 5.0 | ||
loss_watchdog_patience: 3 | ||
|
||
warmup_steps: 10 | ||
evals_per_epoch: 4 | ||
eval_table_size: | ||
eval_max_new_tokens: 128 | ||
saves_per_epoch: 1 | ||
debug: | ||
deepspeed: | ||
weight_decay: 0.0 | ||
fsdp: | ||
fsdp_config: | ||
special_tokens: | ||
pad_token: "<|end_of_text|>" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,155 @@ | ||
""" | ||
E2E tests for multigpu eval | ||
""" | ||
import logging | ||
import os | ||
import unittest | ||
from pathlib import Path | ||
|
||
import yaml | ||
from accelerate.test_utils import execute_subprocess_async | ||
|
||
from axolotl.utils.dict import DictDefault | ||
|
||
from ..utils import with_temp_dir | ||
|
||
LOG = logging.getLogger("axolotl.tests.e2e.multigpu") | ||
os.environ["WANDB_DISABLED"] = "true" | ||
|
||
AXOLOTL_ROOT = Path(__file__).parent.parent.parent.parent | ||
|
||
|
||
class TestMultiGPUEval(unittest.TestCase): | ||
""" | ||
Test case for MultiGPU Eval Sample Packing | ||
""" | ||
|
||
@with_temp_dir | ||
def test_eval_sample_packing(self, temp_dir): | ||
# pylint: disable=duplicate-code | ||
cfg = DictDefault( | ||
{ | ||
"base_model": "JackFram/llama-68m", | ||
"load_in_8bit": False, | ||
"load_in_4bit": True, | ||
"strict": False, | ||
"sequence_len": 2048, | ||
"adapter": "qlora", | ||
"sample_packing": True, | ||
"eval_sample_packing": True, | ||
"pad_to_sequence_len": True, | ||
"lora_r": 8, | ||
"lora_alpha": 16, | ||
"lora_dropout": 0.05, | ||
"lora_target_linear": True, | ||
"lora_modules_to_save": ["embed_tokens", "lm_head"], | ||
"val_set_size": 0.1, | ||
"special_tokens": {"pad_token": "<|end_of_text|>"}, | ||
"datasets": [ | ||
{ | ||
"path": "teknium/GPT4-LLM-Cleaned", | ||
"type": "alpaca", | ||
}, | ||
], | ||
"num_epochs": 1, | ||
"max_steps": 5, | ||
"micro_batch_size": 2, | ||
"gradient_accumulation_steps": 4, | ||
"output_dir": temp_dir, | ||
"learning_rate": 0.00001, | ||
"optimizer": "adamw_8bit", | ||
"lr_scheduler": "cosine", | ||
"flash_attention": True, | ||
"loss_watchdog_threshold": 5.0, | ||
"loss_watchdog_patience": 3, | ||
"bf16": "auto", | ||
"warmup_steps": 1, | ||
"evals_per_epoch": 2, | ||
"eval_max_new_tokens": 128, | ||
"saves_per_epoch": 1, | ||
"logging_steps": 1, | ||
"weight_decay": 0.0, | ||
} | ||
) | ||
|
||
# write cfg to yaml file | ||
Path(temp_dir).mkdir(parents=True, exist_ok=True) | ||
with open(Path(temp_dir) / "config.yaml", "w", encoding="utf-8") as fout: | ||
fout.write(yaml.dump(cfg.to_dict(), Dumper=yaml.Dumper)) | ||
|
||
execute_subprocess_async( | ||
[ | ||
"accelerate", | ||
"launch", | ||
"--num-processes", | ||
"2", | ||
"-m", | ||
"axolotl.cli.train", | ||
str(Path(temp_dir) / "config.yaml"), | ||
] | ||
) | ||
|
||
@with_temp_dir | ||
def test_eval(self, temp_dir): | ||
# pylint: disable=duplicate-code | ||
cfg = DictDefault( | ||
{ | ||
"base_model": "JackFram/llama-68m", | ||
"load_in_8bit": False, | ||
"load_in_4bit": True, | ||
"strict": False, | ||
"sequence_len": 2048, | ||
"adapter": "qlora", | ||
"sample_packing": True, | ||
"eval_sample_packing": False, | ||
"pad_to_sequence_len": True, | ||
"lora_r": 8, | ||
"lora_alpha": 16, | ||
"lora_dropout": 0.05, | ||
"lora_target_linear": True, | ||
"lora_modules_to_save": ["embed_tokens", "lm_head"], | ||
"val_set_size": 0.1, | ||
"special_tokens": {"pad_token": "<|end_of_text|>"}, | ||
"datasets": [ | ||
{ | ||
"path": "teknium/GPT4-LLM-Cleaned", | ||
"type": "alpaca", | ||
}, | ||
], | ||
"num_epochs": 1, | ||
"max_steps": 5, | ||
"micro_batch_size": 2, | ||
"gradient_accumulation_steps": 4, | ||
"output_dir": temp_dir, | ||
"learning_rate": 0.00001, | ||
"optimizer": "adamw_8bit", | ||
"lr_scheduler": "cosine", | ||
"flash_attention": True, | ||
"loss_watchdog_threshold": 5.0, | ||
"loss_watchdog_patience": 3, | ||
"bf16": "auto", | ||
"warmup_steps": 1, | ||
"evals_per_epoch": 2, | ||
"eval_max_new_tokens": 128, | ||
"saves_per_epoch": 1, | ||
"logging_steps": 1, | ||
"weight_decay": 0.0, | ||
} | ||
) | ||
|
||
# write cfg to yaml file | ||
Path(temp_dir).mkdir(parents=True, exist_ok=True) | ||
with open(Path(temp_dir) / "config.yaml", "w", encoding="utf-8") as fout: | ||
fout.write(yaml.dump(cfg.to_dict(), Dumper=yaml.Dumper)) | ||
|
||
execute_subprocess_async( | ||
[ | ||
"accelerate", | ||
"launch", | ||
"--num-processes", | ||
"2", | ||
"-m", | ||
"axolotl.cli.train", | ||
str(Path(temp_dir) / "config.yaml"), | ||
] | ||
) |