From 992e742cdc79383926db3fbdac6660d39b97bd29 Mon Sep 17 00:00:00 2001 From: Bryan Thornbury Date: Mon, 4 Dec 2023 06:29:21 -0800 Subject: [PATCH] Support device_map=sequential & max_memory config parameters (#903) * Support device_map sequential (and others). Support max_memory in cfg. * Update documentation in README accordingly. * Update README.md --------- Co-authored-by: Wing Lian --- README.md | 6 ++++++ src/axolotl/utils/config.py | 2 +- src/axolotl/utils/models.py | 1 + 3 files changed, 8 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index 92f7c8b20a..6146988bd1 100644 --- a/README.md +++ b/README.md @@ -612,6 +612,12 @@ eval_sample_packing: sample_packing_eff_est: total_num_tokens: +# Passed through to transformers when loading the model when launched without accelerate +# Use `sequential` when training w/ model parallelism to limit memory +device_map: +# Defines the max memory usage per gpu on the system. Passed through to transformers when loading the model. +max_memory: + # If you want to use 'lora' or 'qlora' or leave blank to train all parameters in original model adapter: lora # If you already have a lora model trained that you want to load, put that here. diff --git a/src/axolotl/utils/config.py b/src/axolotl/utils/config.py index 6ae49514ad..74da669289 100644 --- a/src/axolotl/utils/config.py +++ b/src/axolotl/utils/config.py @@ -27,7 +27,7 @@ def get_device(): cfg.device = get_device() if cfg.world_size == 1: - cfg.device_map = "auto" + cfg.device_map = cfg.device_map or "auto" else: if cfg.device.startswith("cuda"): cfg.device_map = {"": torch.cuda.current_device()} diff --git a/src/axolotl/utils/models.py b/src/axolotl/utils/models.py index acc6f41fa6..3037901761 100644 --- a/src/axolotl/utils/models.py +++ b/src/axolotl/utils/models.py @@ -216,6 +216,7 @@ def load_model( model_kwargs = {} model_kwargs["device_map"] = cfg.device_map + model_kwargs["max_memory"] = cfg.max_memory model_kwargs["torch_dtype"] = cfg.torch_dtype if cfg.model_revision: