diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index cbefb966a4..9a8d992759 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -18,6 +18,7 @@ jobs: python_version: "3.10" pytorch: 2.1.2 axolotl_extras: + axolotl_args: "--extra-index-url https://download.pytorch.org/whl/cu118" is_latest: true - cuda: 121 cuda_version: 12.1.0 @@ -54,6 +55,7 @@ jobs: BASE_TAG=${{ github.ref_name }}-base-py${{ matrix.python_version }}-cu${{ matrix.cuda }}-${{ matrix.pytorch }} CUDA=${{ matrix.cuda }} PYTORCH_VERSION=${{ matrix.pytorch }} + AXOLOTL_ARGS=${{ matrix.axolotl_args }} file: ./docker/Dockerfile push: ${{ github.event_name != 'pull_request' }} tags: | diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index 802dbf0917..41eae1071a 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -70,6 +70,7 @@ jobs: cuda_version: 11.8.0 python_version: "3.10" pytorch: 2.1.2 + axolotl_args: "--extra-index-url https://download.pytorch.org/whl/cu118" - cuda: 121 cuda_version: 12.1.0 python_version: "3.10" @@ -87,11 +88,13 @@ jobs: # Set up build arguments BASE_TAG="main-base-py${{ matrix.python_version }}-cu${{ matrix.cuda }}-${{ matrix.pytorch }}" CUDA="${{ matrix.cuda }}" + AXOLOTL_ARGS="${{ matrix.axolotl_args }}" PYTORCH_VERSION="${{ matrix.pytorch }}" # Build the Docker image docker build . \ --file ./docker/Dockerfile-tests \ --build-arg BASE_TAG=$BASE_TAG \ + --build-arg AXOLOTL_ARGS="$AXOLOTL_ARGS" \ --build-arg CUDA=$CUDA \ --build-arg GITHUB_REF=$GITHUB_REF \ --build-arg PYTORCH_VERSION=$PYTORCH_VERSION \ diff --git a/docker/Dockerfile b/docker/Dockerfile index efc40ab061..9154fcda3c 100644 --- a/docker/Dockerfile +++ b/docker/Dockerfile @@ -3,6 +3,7 @@ FROM winglian/axolotl-base:$BASE_TAG ARG TORCH_CUDA_ARCH_LIST="7.0 7.5 8.0 8.6+PTX" ARG AXOLOTL_EXTRAS="" +ARG AXOLOTL_ARGS="" ARG CUDA="118" ENV BNB_CUDA_VERSION=$CUDA ARG PYTORCH_VERSION="2.0.1" @@ -20,9 +21,9 @@ WORKDIR /workspace/axolotl # If AXOLOTL_EXTRAS is set, append it in brackets RUN if [ "$AXOLOTL_EXTRAS" != "" ] ; then \ - pip install -e .[deepspeed,flash-attn,mamba-ssm,$AXOLOTL_EXTRAS]; \ + pip install -e .[deepspeed,flash-attn,mamba-ssm,$AXOLOTL_EXTRAS] $AXOLOTL_ARGS; \ else \ - pip install -e .[deepspeed,flash-attn,mamba-ssm]; \ + pip install -e .[deepspeed,flash-attn,mamba-ssm] $AXOLOTL_ARGS; \ fi # So we can test the Docker image diff --git a/docker/Dockerfile-tests b/docker/Dockerfile-tests index 2ec94f8684..e7df99b472 100644 --- a/docker/Dockerfile-tests +++ b/docker/Dockerfile-tests @@ -3,6 +3,7 @@ FROM winglian/axolotl-base:$BASE_TAG ARG TORCH_CUDA_ARCH_LIST="7.0 7.5 8.0 8.6+PTX" ARG AXOLOTL_EXTRAS="" +ARG AXOLOTL_ARGS="" ARG CUDA="118" ENV BNB_CUDA_VERSION=$CUDA ARG PYTORCH_VERSION="2.0.1" @@ -24,9 +25,9 @@ RUN git fetch origin +$GITHUB_REF && \ # If AXOLOTL_EXTRAS is set, append it in brackets RUN if [ "$AXOLOTL_EXTRAS" != "" ] ; then \ - pip install -e .[deepspeed,flash-attn,mamba-ssm,$AXOLOTL_EXTRAS]; \ + pip install -e .[deepspeed,flash-attn,mamba-ssm,$AXOLOTL_EXTRAS] $AXOLOTL_ARGS; \ else \ - pip install -e .[deepspeed,flash-attn,mamba-ssm]; \ + pip install -e .[deepspeed,flash-attn,mamba-ssm] $AXOLOTL_ARGS; \ fi # So we can test the Docker image diff --git a/requirements.txt b/requirements.txt index a5986fa4ff..6532d39991 100644 --- a/requirements.txt +++ b/requirements.txt @@ -21,7 +21,6 @@ hf_transfer colorama numba numpy>=1.24.4 -mlflow # qlora things evaluate==0.4.1 scipy diff --git a/setup.py b/setup.py index d4a39b76ea..aa730fbe69 100644 --- a/setup.py +++ b/setup.py @@ -82,5 +82,8 @@ def parse_requirements(): "auto-gptq": [ "auto-gptq==0.5.1", ], + "mlflow": [ + "mlflow", + ], }, ) diff --git a/src/axolotl/core/trainer_builder.py b/src/axolotl/core/trainer_builder.py index c69f6cf5ab..5dca1e2b6b 100644 --- a/src/axolotl/core/trainer_builder.py +++ b/src/axolotl/core/trainer_builder.py @@ -5,6 +5,7 @@ import abc import importlib +import importlib.util import logging import math import sys @@ -34,7 +35,6 @@ EvalFirstStepCallback, GPUStatsCallback, LossWatchDogCallback, - SaveAxolotlConfigtoMlflowCallback, SaveAxolotlConfigtoWandBCallback, SaveBetterTransformerModelCallback, bench_eval_callback_factory, @@ -62,6 +62,10 @@ LOG = logging.getLogger("axolotl.core.trainer_builder") +def is_mlflow_available(): + return importlib.util.find_spec("mlflow") is not None + + def _sanitize_kwargs_for_tagging(tag_names, kwargs=None): if isinstance(tag_names, str): tag_names = [tag_names] @@ -648,7 +652,11 @@ def get_callbacks(self): callbacks.append( SaveAxolotlConfigtoWandBCallback(self.cfg.axolotl_config_path) ) - if self.cfg.use_mlflow: + if self.cfg.use_mlflow and is_mlflow_available(): + from axolotl.utils.callbacks.mlflow_ import ( + SaveAxolotlConfigtoMlflowCallback, + ) + callbacks.append( SaveAxolotlConfigtoMlflowCallback(self.cfg.axolotl_config_path) ) diff --git a/src/axolotl/monkeypatch/llama_attn_hijack_flash.py b/src/axolotl/monkeypatch/llama_attn_hijack_flash.py index 86dde18a6a..f727c74b82 100644 --- a/src/axolotl/monkeypatch/llama_attn_hijack_flash.py +++ b/src/axolotl/monkeypatch/llama_attn_hijack_flash.py @@ -44,6 +44,18 @@ LOG = logging.getLogger("axolotl") +def is_xformers_swiglu_available() -> bool: + from xformers.ops.common import get_xformers_operator + + try: + get_xformers_operator("swiglu_packedw")() + return True + except RuntimeError as exc: + if "No such operator xformers::swiglu_packedw " in str(exc): + return False + return True + + def replace_llama_mlp_with_swiglu(model): for name, module in model.named_modules(): if isinstance(module, LlamaMLP): diff --git a/src/axolotl/utils/callbacks.py b/src/axolotl/utils/callbacks/__init__.py similarity index 96% rename from src/axolotl/utils/callbacks.py rename to src/axolotl/utils/callbacks/__init__.py index f5357b02c6..6a489f6c0e 100644 --- a/src/axolotl/utils/callbacks.py +++ b/src/axolotl/utils/callbacks/__init__.py @@ -9,7 +9,6 @@ from typing import TYPE_CHECKING, Dict, List import evaluate -import mlflow import numpy as np import pandas as pd import torch @@ -42,8 +41,8 @@ if TYPE_CHECKING: from axolotl.core.trainer_builder import AxolotlTrainingArguments -LOG = logging.getLogger("axolotl.callbacks") IGNORE_INDEX = -100 +LOG = logging.getLogger("axolotl.callbacks") class EvalFirstStepCallback( @@ -756,31 +755,3 @@ def on_train_begin( except (FileNotFoundError, ConnectionError) as err: LOG.warning(f"Error while saving Axolotl config to WandB: {err}") return control - - -class SaveAxolotlConfigtoMlflowCallback(TrainerCallback): - """Callback to save axolotl config to mlflow""" - - def __init__(self, axolotl_config_path): - self.axolotl_config_path = axolotl_config_path - - def on_train_begin( - self, - args: AxolotlTrainingArguments, # pylint: disable=unused-argument - state: TrainerState, # pylint: disable=unused-argument - control: TrainerControl, - **kwargs, # pylint: disable=unused-argument - ): - if is_main_process(): - try: - with NamedTemporaryFile( - mode="w", delete=False, suffix=".yml", prefix="axolotl_config_" - ) as temp_file: - copyfile(self.axolotl_config_path, temp_file.name) - mlflow.log_artifact(temp_file.name, artifact_path="") - LOG.info( - "The Axolotl config has been saved to the MLflow artifacts." - ) - except (FileNotFoundError, ConnectionError) as err: - LOG.warning(f"Error while saving Axolotl config to MLflow: {err}") - return control diff --git a/src/axolotl/utils/callbacks/mlflow_.py b/src/axolotl/utils/callbacks/mlflow_.py new file mode 100644 index 0000000000..fcbb88edcd --- /dev/null +++ b/src/axolotl/utils/callbacks/mlflow_.py @@ -0,0 +1,44 @@ +"""MLFlow module for trainer callbacks""" +import logging +from shutil import copyfile +from tempfile import NamedTemporaryFile +from typing import TYPE_CHECKING + +import mlflow +from transformers import TrainerCallback, TrainerControl, TrainerState + +from axolotl.utils.distributed import is_main_process + +if TYPE_CHECKING: + from axolotl.core.trainer_builder import AxolotlTrainingArguments + +LOG = logging.getLogger("axolotl.callbacks") + + +class SaveAxolotlConfigtoMlflowCallback(TrainerCallback): + # pylint: disable=duplicate-code + """Callback to save axolotl config to mlflow""" + + def __init__(self, axolotl_config_path): + self.axolotl_config_path = axolotl_config_path + + def on_train_begin( + self, + args: "AxolotlTrainingArguments", # pylint: disable=unused-argument + state: TrainerState, # pylint: disable=unused-argument + control: TrainerControl, + **kwargs, # pylint: disable=unused-argument + ): + if is_main_process(): + try: + with NamedTemporaryFile( + mode="w", delete=False, suffix=".yml", prefix="axolotl_config_" + ) as temp_file: + copyfile(self.axolotl_config_path, temp_file.name) + mlflow.log_artifact(temp_file.name, artifact_path="") + LOG.info( + "The Axolotl config has been saved to the MLflow artifacts." + ) + except (FileNotFoundError, ConnectionError) as err: + LOG.warning(f"Error while saving Axolotl config to MLflow: {err}") + return control diff --git a/src/axolotl/utils/models.py b/src/axolotl/utils/models.py index c5f3754458..c2006997de 100644 --- a/src/axolotl/utils/models.py +++ b/src/axolotl/utils/models.py @@ -512,11 +512,12 @@ def load_model( if cfg.flash_attention and not inference: from axolotl.monkeypatch.llama_attn_hijack_flash import ( + is_xformers_swiglu_available, replace_llama_mlp_with_swiglu, replace_llama_qkv_with_fused, ) - if cfg.flash_attn_fuse_mlp: + if cfg.flash_attn_fuse_mlp and is_xformers_swiglu_available(): LOG.info("patching with SwiGLU") replace_llama_mlp_with_swiglu(model) diff --git a/tests/e2e/patched/test_fused_llama.py b/tests/e2e/patched/test_fused_llama.py index dda08a4636..de1195c368 100644 --- a/tests/e2e/patched/test_fused_llama.py +++ b/tests/e2e/patched/test_fused_llama.py @@ -57,9 +57,9 @@ def test_fft_packing(self, temp_dir): "learning_rate": 0.00001, "optimizer": "adamw_torch", "lr_scheduler": "cosine", - "max_steps": 20, - "save_steps": 10, - "eval_steps": 10, + "max_steps": 10, + "save_steps": 5, + "eval_steps": 5, } ) if is_torch_bf16_gpu_available():