From eb300b6c573c509ed5b2618ae076ff8a677521ff Mon Sep 17 00:00:00 2001 From: Maxime <672982+maximegmd@users.noreply.github.com> Date: Sun, 4 Feb 2024 14:20:40 +0100 Subject: [PATCH 1/7] add mps support --- examples/tiny-llama/lora-mps.yml | 65 ++++++++++++++++++++++++++++++++ setup.py | 15 +++++--- src/axolotl/monkeypatch/utils.py | 4 +- src/axolotl/utils/bench.py | 10 ++++- src/axolotl/utils/models.py | 6 ++- 5 files changed, 91 insertions(+), 9 deletions(-) create mode 100644 examples/tiny-llama/lora-mps.yml diff --git a/examples/tiny-llama/lora-mps.yml b/examples/tiny-llama/lora-mps.yml new file mode 100644 index 0000000000..e744638ba4 --- /dev/null +++ b/examples/tiny-llama/lora-mps.yml @@ -0,0 +1,65 @@ +base_model: TinyLlama/TinyLlama-1.1B-intermediate-step-1431k-3T +model_type: LlamaForCausalLM +tokenizer_type: LlamaTokenizer +is_llama_derived_model: true + +load_in_8bit: true +load_in_4bit: false +strict: false + +datasets: + - path: mhenrichsen/alpaca_2k_test + type: alpaca +dataset_prepared_path: +val_set_size: 0 +output_dir: ./lora-out + +sequence_len: 4096 +sample_packing: true +pad_to_sequence_len: true +eval_sample_packing: false + +adapter: lora +lora_model_dir: +lora_r: 32 +lora_alpha: 16 +lora_dropout: 0.05 +lora_target_linear: true +lora_fan_in_fan_out: + +wandb_project: +wandb_entity: +wandb_watch: +wandb_name: +wandb_log_model: + +gradient_accumulation_steps: 4 +micro_batch_size: 2 +num_epochs: 4 +optimizer: adamw_torch +lr_scheduler: cosine +learning_rate: 0.0002 + +train_on_inputs: false +group_by_length: false +bf16: auto +fp16: false +tf32: true + +gradient_checkpointing: true +early_stopping_patience: +resume_from_checkpoint: +local_rank: +logging_steps: 1 +xformers_attention: +flash_attention: false + +warmup_steps: 10 +evals_per_epoch: 0 +saves_per_epoch: 1 +debug: +deepspeed: +weight_decay: 0.0 +fsdp: +fsdp_config: +special_tokens: diff --git a/setup.py b/setup.py index 6f816ce4a6..39d705711d 100644 --- a/setup.py +++ b/setup.py @@ -1,7 +1,8 @@ """setup.py for axolotl""" from importlib.metadata import PackageNotFoundError, version - +from packaging.version import Version, parse +import platform from setuptools import find_packages, setup @@ -26,11 +27,15 @@ def parse_requirements(): _install_requires.append(line) try: - torch_version = version("torch") - _install_requires.append(f"torch=={torch_version}") - if torch_version.startswith("2.1."): + if "Darwin" in platform.system(): _install_requires.pop(_install_requires.index("xformers==0.0.22")) - _install_requires.append("xformers>=0.0.23") + else: + torch_version = parse(version("torch")) + _install_requires.append(f"torch=={torch_version}") + + if torch_version >= Version("2.1"): + _install_requires.pop(_install_requires.index("xformers==0.0.22")) + _install_requires.append("xformers>=0.0.23") except PackageNotFoundError: pass diff --git a/src/axolotl/monkeypatch/utils.py b/src/axolotl/monkeypatch/utils.py index 63141635ab..e43c58650a 100644 --- a/src/axolotl/monkeypatch/utils.py +++ b/src/axolotl/monkeypatch/utils.py @@ -186,8 +186,8 @@ def mask_2d_to_4d( # Create a binary mask from the original mask where zeros remain zeros and all other values are set to one binary_mask = torch.where( mask != 0, - torch.tensor(1).to(dtype), - torch.tensor(0).to(dtype), + torch.tensor(1, device=mask.device).to(dtype), + torch.tensor(0, device=mask.device).to(dtype), ) # Create a block-diagonal mask. diff --git a/src/axolotl/utils/bench.py b/src/axolotl/utils/bench.py index 8f33665c69..8e3bc72641 100644 --- a/src/axolotl/utils/bench.py +++ b/src/axolotl/utils/bench.py @@ -46,6 +46,11 @@ def gpu_memory_usage_all(device=0): smi = gpu_memory_usage_smi(device) return usage, reserved - usage, max(0, smi - reserved) +def mps_memory_usage_all(): + usage = torch.mps.current_allocated_memory() / 1024.0**3 + reserved = torch.mps.driver_allocated_memory() / 1024.0**3 + return usage, reserved - usage, 0 + @check_cuda_device(0.0) def gpu_memory_usage_smi(device=0): @@ -63,7 +68,10 @@ def gpu_memory_usage_smi(device=0): def log_gpu_memory_usage(log, msg, device): - usage, cache, misc = gpu_memory_usage_all(device) + if torch.backends.mps.is_available(): + usage, cache, misc = mps_memory_usage_all() + else: + usage, cache, misc = gpu_memory_usage_all(device) extras = [] if cache > 0: extras.append(f"+{cache:.03f}GB cache") diff --git a/src/axolotl/utils/models.py b/src/axolotl/utils/models.py index 52a81ea2c0..6efad38434 100644 --- a/src/axolotl/utils/models.py +++ b/src/axolotl/utils/models.py @@ -429,6 +429,10 @@ def load_model( model_kwargs["device_map"] = device_map model_kwargs["torch_dtype"] = cfg.torch_dtype + + if torch.backends.mps.is_available(): + model_kwargs["device_map"] = "mps:0" + # TODO can we put the reference model on it's own gpu? I think we have to move logits around to calculate loss # if cfg.rl: # if torch.cuda.device_count() > 1: @@ -668,7 +672,7 @@ def load_model( ): model.config.eos_token_id = tokenizer.eos_token_id - if hasattr(model, "device") and model.device.type == "cuda": + if hasattr(model, "device") and (model.device.type == "cuda" or model.device.type == "mps"): log_gpu_memory_usage(LOG, "after model load", model.device) # make sure these are fp32 per Ramesh et al. (2021) From 67c70d19543228f5cfc0718e7e4166441b040656 Mon Sep 17 00:00:00 2001 From: Maxime <672982+maximegmd@users.noreply.github.com> Date: Sun, 4 Feb 2024 18:27:38 +0100 Subject: [PATCH 2/7] linter stuff --- setup.py | 3 ++- src/axolotl/utils/bench.py | 1 + src/axolotl/utils/models.py | 2 +- 3 files changed, 4 insertions(+), 2 deletions(-) diff --git a/setup.py b/setup.py index 39d705711d..f6a530dfae 100644 --- a/setup.py +++ b/setup.py @@ -1,8 +1,9 @@ """setup.py for axolotl""" +import platform from importlib.metadata import PackageNotFoundError, version + from packaging.version import Version, parse -import platform from setuptools import find_packages, setup diff --git a/src/axolotl/utils/bench.py b/src/axolotl/utils/bench.py index 8e3bc72641..c039e790a1 100644 --- a/src/axolotl/utils/bench.py +++ b/src/axolotl/utils/bench.py @@ -46,6 +46,7 @@ def gpu_memory_usage_all(device=0): smi = gpu_memory_usage_smi(device) return usage, reserved - usage, max(0, smi - reserved) + def mps_memory_usage_all(): usage = torch.mps.current_allocated_memory() / 1024.0**3 reserved = torch.mps.driver_allocated_memory() / 1024.0**3 diff --git a/src/axolotl/utils/models.py b/src/axolotl/utils/models.py index 6efad38434..1df6228ab5 100644 --- a/src/axolotl/utils/models.py +++ b/src/axolotl/utils/models.py @@ -672,7 +672,7 @@ def load_model( ): model.config.eos_token_id = tokenizer.eos_token_id - if hasattr(model, "device") and (model.device.type == "cuda" or model.device.type == "mps"): + if hasattr(model, "device") and model.device.type in ("cuda", "mps"): log_gpu_memory_usage(LOG, "after model load", model.device) # make sure these are fp32 per Ramesh et al. (2021) From 4609e3b166ff0ce8e926f39d541aa7ef76592ec4 Mon Sep 17 00:00:00 2001 From: Maxime <672982+maximegmd@users.noreply.github.com> Date: Tue, 6 Feb 2024 16:38:00 +0100 Subject: [PATCH 3/7] CI fixes --- .github/workflows/tests.yml | 1 + docker/Dockerfile | 2 ++ 2 files changed, 3 insertions(+) diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index 2d99695241..b14d080ba2 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -48,6 +48,7 @@ jobs: - name: Install dependencies run: | + pip3 install packaging pip3 install -U -e . pip3 install -r requirements-tests.txt diff --git a/docker/Dockerfile b/docker/Dockerfile index efc40ab061..f8b97771d6 100644 --- a/docker/Dockerfile +++ b/docker/Dockerfile @@ -18,6 +18,8 @@ RUN git clone --depth=1 https://github.com/OpenAccess-AI-Collective/axolotl.git WORKDIR /workspace/axolotl +RUN pip install packaging + # If AXOLOTL_EXTRAS is set, append it in brackets RUN if [ "$AXOLOTL_EXTRAS" != "" ] ; then \ pip install -e .[deepspeed,flash-attn,mamba-ssm,$AXOLOTL_EXTRAS]; \ From 980e7aa44d667b9cbbfe01b9743edb00d0ac447b Mon Sep 17 00:00:00 2001 From: Wing Lian Date: Wed, 7 Feb 2024 09:43:31 -0500 Subject: [PATCH 4/7] install packaging for various tests --- .github/workflows/tests.yml | 1 + docker/Dockerfile | 3 ++- docker/Dockerfile-tests | 3 +++ 3 files changed, 6 insertions(+), 1 deletion(-) diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index b14d080ba2..68ca8534ce 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -48,6 +48,7 @@ jobs: - name: Install dependencies run: | + pip3 install --upgrade pip pip3 install packaging pip3 install -U -e . pip3 install -r requirements-tests.txt diff --git a/docker/Dockerfile b/docker/Dockerfile index f8b97771d6..f59033c7b1 100644 --- a/docker/Dockerfile +++ b/docker/Dockerfile @@ -18,7 +18,8 @@ RUN git clone --depth=1 https://github.com/OpenAccess-AI-Collective/axolotl.git WORKDIR /workspace/axolotl -RUN pip install packaging +RUN pip install --upgrade pip && \ + pip install packaging # If AXOLOTL_EXTRAS is set, append it in brackets RUN if [ "$AXOLOTL_EXTRAS" != "" ] ; then \ diff --git a/docker/Dockerfile-tests b/docker/Dockerfile-tests index 2ec94f8684..585f68ba39 100644 --- a/docker/Dockerfile-tests +++ b/docker/Dockerfile-tests @@ -22,6 +22,9 @@ WORKDIR /workspace/axolotl RUN git fetch origin +$GITHUB_REF && \ git checkout FETCH_HEAD +RUN pip install --upgrade pip && \ + pip install packaging \ + # If AXOLOTL_EXTRAS is set, append it in brackets RUN if [ "$AXOLOTL_EXTRAS" != "" ] ; then \ pip install -e .[deepspeed,flash-attn,mamba-ssm,$AXOLOTL_EXTRAS]; \ From 73e72db00e6ab94b1b147281ff494c8e62d350d3 Mon Sep 17 00:00:00 2001 From: Maxime <672982+maximegmd@users.noreply.github.com> Date: Wed, 7 Feb 2024 17:21:27 +0100 Subject: [PATCH 5/7] Update setup.py --- setup.py | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/setup.py b/setup.py index f6a530dfae..d4a39b76ea 100644 --- a/setup.py +++ b/setup.py @@ -1,9 +1,9 @@ """setup.py for axolotl""" import platform +import re from importlib.metadata import PackageNotFoundError, version -from packaging.version import Version, parse from setuptools import find_packages, setup @@ -31,10 +31,20 @@ def parse_requirements(): if "Darwin" in platform.system(): _install_requires.pop(_install_requires.index("xformers==0.0.22")) else: - torch_version = parse(version("torch")) + torch_version = version("torch") _install_requires.append(f"torch=={torch_version}") - if torch_version >= Version("2.1"): + version_match = re.match(r"^(\d+)\.(\d+)(?:\.(\d+))?", torch_version) + if version_match: + major, minor, patch = version_match.groups() + major, minor = int(major), int(minor) + patch = ( + int(patch) if patch is not None else 0 + ) # Default patch to 0 if not present + else: + raise ValueError("Invalid version format") + + if (major, minor) >= (2, 1): _install_requires.pop(_install_requires.index("xformers==0.0.22")) _install_requires.append("xformers>=0.0.23") except PackageNotFoundError: From 521d4db16848cbd43fd47980f34955c5c205274e Mon Sep 17 00:00:00 2001 From: Maxime <672982+maximegmd@users.noreply.github.com> Date: Wed, 7 Feb 2024 17:22:37 +0100 Subject: [PATCH 6/7] Revert "install packaging for various tests" This reverts commit 980e7aa44d667b9cbbfe01b9743edb00d0ac447b. --- .github/workflows/tests.yml | 1 - docker/Dockerfile | 3 +-- docker/Dockerfile-tests | 3 --- 3 files changed, 1 insertion(+), 6 deletions(-) diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index 68ca8534ce..b14d080ba2 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -48,7 +48,6 @@ jobs: - name: Install dependencies run: | - pip3 install --upgrade pip pip3 install packaging pip3 install -U -e . pip3 install -r requirements-tests.txt diff --git a/docker/Dockerfile b/docker/Dockerfile index f59033c7b1..f8b97771d6 100644 --- a/docker/Dockerfile +++ b/docker/Dockerfile @@ -18,8 +18,7 @@ RUN git clone --depth=1 https://github.com/OpenAccess-AI-Collective/axolotl.git WORKDIR /workspace/axolotl -RUN pip install --upgrade pip && \ - pip install packaging +RUN pip install packaging # If AXOLOTL_EXTRAS is set, append it in brackets RUN if [ "$AXOLOTL_EXTRAS" != "" ] ; then \ diff --git a/docker/Dockerfile-tests b/docker/Dockerfile-tests index 585f68ba39..2ec94f8684 100644 --- a/docker/Dockerfile-tests +++ b/docker/Dockerfile-tests @@ -22,9 +22,6 @@ WORKDIR /workspace/axolotl RUN git fetch origin +$GITHUB_REF && \ git checkout FETCH_HEAD -RUN pip install --upgrade pip && \ - pip install packaging \ - # If AXOLOTL_EXTRAS is set, append it in brackets RUN if [ "$AXOLOTL_EXTRAS" != "" ] ; then \ pip install -e .[deepspeed,flash-attn,mamba-ssm,$AXOLOTL_EXTRAS]; \ From e25fe924718f97fd426bd40b434f7f3a303ed98d Mon Sep 17 00:00:00 2001 From: Maxime <672982+maximegmd@users.noreply.github.com> Date: Wed, 7 Feb 2024 17:22:43 +0100 Subject: [PATCH 7/7] Revert "CI fixes" This reverts commit 4609e3b166ff0ce8e926f39d541aa7ef76592ec4. --- .github/workflows/tests.yml | 1 - docker/Dockerfile | 2 -- 2 files changed, 3 deletions(-) diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index b14d080ba2..2d99695241 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -48,7 +48,6 @@ jobs: - name: Install dependencies run: | - pip3 install packaging pip3 install -U -e . pip3 install -r requirements-tests.txt diff --git a/docker/Dockerfile b/docker/Dockerfile index f8b97771d6..efc40ab061 100644 --- a/docker/Dockerfile +++ b/docker/Dockerfile @@ -18,8 +18,6 @@ RUN git clone --depth=1 https://github.com/OpenAccess-AI-Collective/axolotl.git WORKDIR /workspace/axolotl -RUN pip install packaging - # If AXOLOTL_EXTRAS is set, append it in brackets RUN if [ "$AXOLOTL_EXTRAS" != "" ] ; then \ pip install -e .[deepspeed,flash-attn,mamba-ssm,$AXOLOTL_EXTRAS]; \