diff --git a/.github/workflows/e2e.yml b/.github/workflows/e2e.yml index ada1fd0c48..09c26c2a67 100644 --- a/.github/workflows/e2e.yml +++ b/.github/workflows/e2e.yml @@ -24,6 +24,7 @@ jobs: - name: Install dependencies run: | pip3 install -e . + pip3 install flash-attn pip3 install -r requirements-tests.txt - name: Run e2e tests diff --git a/src/axolotl/utils/bench.py b/src/axolotl/utils/bench.py index 30f0985e75..b460b2ba7c 100644 --- a/src/axolotl/utils/bench.py +++ b/src/axolotl/utils/bench.py @@ -2,6 +2,7 @@ import pynvml import torch +from pynvml.nvml import NVMLError def gpu_memory_usage(device=0): @@ -20,11 +21,13 @@ def gpu_memory_usage_smi(device=0): device = device.index if isinstance(device, str) and device.startswith("cuda:"): device = int(device[5:]) - - pynvml.nvmlInit() - handle = pynvml.nvmlDeviceGetHandleByIndex(device) - info = pynvml.nvmlDeviceGetMemoryInfo(handle) - return info.used / 1024.0**3 + try: + pynvml.nvmlInit() + handle = pynvml.nvmlDeviceGetHandleByIndex(device) + info = pynvml.nvmlDeviceGetMemoryInfo(handle) + return info.used / 1024.0**3 + except NVMLError: + return 0.0 def log_gpu_memory_usage(log, msg, device): diff --git a/src/axolotl/utils/config.py b/src/axolotl/utils/config.py index 90ed409b9c..a31f34b73e 100644 --- a/src/axolotl/utils/config.py +++ b/src/axolotl/utils/config.py @@ -29,7 +29,7 @@ def get_device(): cfg.device_map = "auto" else: if cfg.device.startswith("cuda"): - cfg.device_map = {"": cfg.local_rank} + cfg.device_map = {"": torch.cuda.current_device()} else: cfg.device_map = {"": cfg.device} diff --git a/tests/e2e/test_lora_llama.py b/tests/e2e/test_lora_llama.py index 7873b7ec20..905c3711fd 100644 --- a/tests/e2e/test_lora_llama.py +++ b/tests/e2e/test_lora_llama.py @@ -78,3 +78,45 @@ def test_lora(self): dataset_meta = load_datasets(cfg=cfg, cli_args=cli_args) train(cfg=cfg, cli_args=cli_args, dataset_meta=dataset_meta) + + def test_lora_packing(self): + cfg = DictDefault( + { + "base_model": "JackFram/llama-68m", + "base_model_config": "JackFram/llama-68m", + "tokenizer_type": "LlamaTokenizer", + "sequence_len": 1024, + "sample_packing": True, + "flash_attention": True, + "load_in_8bit": True, + "adapter": "lora", + "lora_r": 32, + "lora_alpha": 64, + "lora_dropout": 0.05, + "lora_target_linear": True, + "val_set_size": 0.1, + "special_tokens": { + "unk_token": "", + "bos_token": "", + "eos_token": "", + }, + "datasets": [ + { + "path": "mhenrichsen/alpaca_2k_test", + "type": "alpaca", + }, + ], + "num_epochs": 2, + "micro_batch_size": 8, + "gradient_accumulation_steps": 1, + "output_dir": tempfile.mkdtemp(), + "learning_rate": 0.00001, + "optimizer": "adamw_torch", + "lr_scheduler": "cosine", + } + ) + normalize_config(cfg) + cli_args = TrainerCliArgs() + dataset_meta = load_datasets(cfg=cfg, cli_args=cli_args) + + train(cfg=cfg, cli_args=cli_args, dataset_meta=dataset_meta)