From 386c79c6bb0bb2966352444feda978fb270766d7 Mon Sep 17 00:00:00 2001 From: NanoCode012 Date: Fri, 29 Dec 2023 17:38:10 +0900 Subject: [PATCH 1/9] fix: warn user to install mamba_ssm package --- src/axolotl/models/mamba/__init__.py | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/src/axolotl/models/mamba/__init__.py b/src/axolotl/models/mamba/__init__.py index 247c1d184b..6bea5b6b13 100644 --- a/src/axolotl/models/mamba/__init__.py +++ b/src/axolotl/models/mamba/__init__.py @@ -2,6 +2,16 @@ Modeling module for Mamba models """ +import importlib + + +def check_mamba_ssm_installed(): + mamba_ssm_spec = importlib.util.find_spec("mamba_ssm") + if mamba_ssm_spec is None: + raise ImportError( + "MambaLMHeadModel requires mamba_ssm. Please install it with `pip install -e .[mamba-ssm,flash-attn]`" + ) + def fix_mamba_attn_for_loss(): from mamba_ssm.models import mixer_seq_simple @@ -10,3 +20,6 @@ def fix_mamba_attn_for_loss(): mixer_seq_simple.MambaLMHeadModel = MambaLMHeadModelFixed return mixer_seq_simple.MambaLMHeadModel # pylint: disable=invalid-name + + +check_mamba_ssm_installed() From 573ccb390b53932b96cc00333e136127fbead737 Mon Sep 17 00:00:00 2001 From: NanoCode012 Date: Fri, 29 Dec 2023 19:58:45 +0900 Subject: [PATCH 2/9] fix: move lib check into patch function --- src/axolotl/models/mamba/__init__.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/src/axolotl/models/mamba/__init__.py b/src/axolotl/models/mamba/__init__.py index 6bea5b6b13..13fe49bcbe 100644 --- a/src/axolotl/models/mamba/__init__.py +++ b/src/axolotl/models/mamba/__init__.py @@ -14,12 +14,11 @@ def check_mamba_ssm_installed(): def fix_mamba_attn_for_loss(): + check_mamba_ssm_installed() + from mamba_ssm.models import mixer_seq_simple from .modeling_mamba import MambaLMHeadModel as MambaLMHeadModelFixed mixer_seq_simple.MambaLMHeadModel = MambaLMHeadModelFixed return mixer_seq_simple.MambaLMHeadModel # pylint: disable=invalid-name - - -check_mamba_ssm_installed() From 32ddbfb0b9f901f4f8a413cfae7c98003202cd8b Mon Sep 17 00:00:00 2001 From: NanoCode012 Date: Sat, 30 Dec 2023 16:25:10 +0900 Subject: [PATCH 3/9] chore: move mamba to requirements and remove from setup --- requirements.txt | 2 ++ 1 file changed, 2 insertions(+) diff --git a/requirements.txt b/requirements.txt index 33ddd395df..e05cbe3f20 100644 --- a/requirements.txt +++ b/requirements.txt @@ -34,6 +34,8 @@ fschat==0.2.34 gradio==3.50.2 tensorboard +mamba-ssm==1.0.1 + # remote filesystems s3fs gcsfs From 4f84721541c48441d0e5a0bb8317508caac29b29 Mon Sep 17 00:00:00 2001 From: NanoCode012 Date: Fri, 5 Jan 2024 16:10:55 +0900 Subject: [PATCH 4/9] fix: add packaging --- requirements.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/requirements.txt b/requirements.txt index e05cbe3f20..934be2b3d7 100644 --- a/requirements.txt +++ b/requirements.txt @@ -35,6 +35,7 @@ gradio==3.50.2 tensorboard mamba-ssm==1.0.1 +packaging==23.2 # remote filesystems s3fs From cffbbc4df147558497f3a6887867e329ebabcf99 Mon Sep 17 00:00:00 2001 From: NanoCode012 Date: Fri, 5 Jan 2024 18:48:21 +0900 Subject: [PATCH 5/9] fix: move packaging first --- requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements.txt b/requirements.txt index 934be2b3d7..6c97a95e46 100644 --- a/requirements.txt +++ b/requirements.txt @@ -33,9 +33,9 @@ art fschat==0.2.34 gradio==3.50.2 tensorboard +packaging==23.2 mamba-ssm==1.0.1 -packaging==23.2 # remote filesystems s3fs From b4eed6b97835b043afef55ba8fdb0ae8429082de Mon Sep 17 00:00:00 2001 From: NanoCode012 Date: Fri, 5 Jan 2024 22:24:43 +0900 Subject: [PATCH 6/9] fix: remove duplicate --- requirements.txt | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/requirements.txt b/requirements.txt index 6c97a95e46..96b55f7355 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,5 +1,5 @@ --extra-index-url https://huggingface.github.io/autogptq-index/whl/cu118/ -packaging +packaging==23.2 peft==0.7.0 transformers @ git+https://github.com/huggingface/transformers.git@3cefac1d974db5e2825a0cb2b842883a628be7a0 tokenizers==0.15.0 @@ -33,7 +33,6 @@ art fschat==0.2.34 gradio==3.50.2 tensorboard -packaging==23.2 mamba-ssm==1.0.1 From 5e8e0aa119af53e9f0c372fa278e3be55bbb2cb1 Mon Sep 17 00:00:00 2001 From: Wing Lian Date: Tue, 9 Jan 2024 10:06:18 -0500 Subject: [PATCH 7/9] mamba fixes --- docker/Dockerfile | 4 ++-- requirements.txt | 2 +- setup.py | 1 + src/axolotl/models/mamba/__init__.py | 2 +- 4 files changed, 5 insertions(+), 4 deletions(-) diff --git a/docker/Dockerfile b/docker/Dockerfile index f8e0528562..efc40ab061 100644 --- a/docker/Dockerfile +++ b/docker/Dockerfile @@ -20,9 +20,9 @@ WORKDIR /workspace/axolotl # If AXOLOTL_EXTRAS is set, append it in brackets RUN if [ "$AXOLOTL_EXTRAS" != "" ] ; then \ - pip install -e .[deepspeed,flash-attn,$AXOLOTL_EXTRAS]; \ + pip install -e .[deepspeed,flash-attn,mamba-ssm,$AXOLOTL_EXTRAS]; \ else \ - pip install -e .[deepspeed,flash-attn]; \ + pip install -e .[deepspeed,flash-attn,mamba-ssm]; \ fi # So we can test the Docker image diff --git a/requirements.txt b/requirements.txt index 96b55f7355..b2595de504 100644 --- a/requirements.txt +++ b/requirements.txt @@ -34,7 +34,7 @@ fschat==0.2.34 gradio==3.50.2 tensorboard -mamba-ssm==1.0.1 +mamba-ssm==1.1.1 # remote filesystems s3fs diff --git a/setup.py b/setup.py index 874f126089..c174540e15 100644 --- a/setup.py +++ b/setup.py @@ -19,6 +19,7 @@ def parse_requirements(): "flash-attn" not in line and "flash-attention" not in line and "deepspeed" not in line + and "mamba-ssm" not in line and line and line[0] != "#" ): diff --git a/src/axolotl/models/mamba/__init__.py b/src/axolotl/models/mamba/__init__.py index 13fe49bcbe..fee88e3a43 100644 --- a/src/axolotl/models/mamba/__init__.py +++ b/src/axolotl/models/mamba/__init__.py @@ -9,7 +9,7 @@ def check_mamba_ssm_installed(): mamba_ssm_spec = importlib.util.find_spec("mamba_ssm") if mamba_ssm_spec is None: raise ImportError( - "MambaLMHeadModel requires mamba_ssm. Please install it with `pip install -e .[mamba-ssm,flash-attn]`" + "MambaLMHeadModel requires mamba_ssm. Please install it with `pip install -e .[mamba-ssm]`" ) From a0682834baefeb6587ce9e1451176d9405fa72d5 Mon Sep 17 00:00:00 2001 From: Wing Lian Date: Tue, 9 Jan 2024 11:09:38 -0500 Subject: [PATCH 8/9] refactor extras check for pylint check --- setup.py | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) diff --git a/setup.py b/setup.py index c174540e15..04a51550d6 100644 --- a/setup.py +++ b/setup.py @@ -11,18 +11,14 @@ def parse_requirements(): with open("./requirements.txt", encoding="utf-8") as requirements_file: lines = [r.strip() for r in requirements_file.readlines()] for line in lines: + is_extras = ( + "flash-attn" in line or "deepspeed" in line or "mamba-ssm" in line + ) if line.startswith("--extra-index-url"): # Handle custom index URLs _, url = line.split() _dependency_links.append(url) - elif ( - "flash-attn" not in line - and "flash-attention" not in line - and "deepspeed" not in line - and "mamba-ssm" not in line - and line - and line[0] != "#" - ): + elif not is_extras and line and line[0] != "#": # Handle standard packages _install_requires.append(line) From f2a975d5e363ab49442cc39e0b044a365b34e1d3 Mon Sep 17 00:00:00 2001 From: Wing Lian Date: Wed, 10 Jan 2024 01:17:20 -0500 Subject: [PATCH 9/9] forgot flash-attention in check --- setup.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/setup.py b/setup.py index 04a51550d6..235018dcc5 100644 --- a/setup.py +++ b/setup.py @@ -12,7 +12,10 @@ def parse_requirements(): lines = [r.strip() for r in requirements_file.readlines()] for line in lines: is_extras = ( - "flash-attn" in line or "deepspeed" in line or "mamba-ssm" in line + "flash-attn" in line + or "flash-attention" in line + or "deepspeed" in line + or "mamba-ssm" in line ) if line.startswith("--extra-index-url"): # Handle custom index URLs