merge

mosaicml · Dec 14, 2023 · 6001db6 · 6001db6
2 parents 242bbe4 + 5fdcc43
commit 6001db6
Show file tree

Hide file tree

Showing 156 changed files with 76,382 additions and 1,502 deletions.
diff --git a/.github/CODEOWNERS b/.github/CODEOWNERS
@@ -0,0 +1,8 @@
+# Require admin approval to modify all files in the root of the repository
+# This includes setup.py, the README, and the CODEOWNERS file itself!
+/* @mosaicml/composer-team-admins
+
+# Require admin approval to change the CI build configuration
+# All CI Changes should be reviewed for security
+/.ci/ @mosaicml/composer-team-admins
+/.github/ @mosaicml/composer-team-admins
diff --git a/.github/mcp/mcp_pytest.py b/.github/mcp/mcp_pytest.py
@@ -6,8 +6,8 @@
 import argparse
 import time
 
-from mcli.sdk import (RunConfig, RunStatus, create_run, follow_run_logs,
-                      wait_for_run_status)
+from mcli import (RunConfig, RunStatus, create_run, follow_run_logs,
+                  wait_for_run_status)
 
 if __name__ == '__main__':
 
@@ -54,6 +54,9 @@
                         type=int,
                         default=1800,
                         help='Timeout for run (in seconds)')
+    parser.add_argument('--deps_group',
+                        type=str,
+                        help='Dependency group to install')
     args = parser.parse_args()
 
     name = args.name
@@ -89,7 +92,7 @@
     clear_tmp_path_flag = '-o tmp_path_retention_policy=none'
     command += f'''
 
-    pip install --upgrade --user .[all]
+    pip install --upgrade --user .[{args.deps_group}]
 
     export COMMON_ARGS="-v --durations=20 -m '{args.pytest_markers}' {clear_tmp_path_flag}"
 
@@ -104,13 +107,25 @@
 
     config = RunConfig(
         name=name,
-        cluster=args.cluster,
-        gpu_type=args.gpu_type,
-        gpu_num=args.gpu_num,
+        compute={
+            'cluster': args.cluster,
+            'gpu_type': args.gpu_type,
+            'gpus': args.gpu_num
+        },
         image=args.image,
         integrations=[git_integration],
         command=command,
         scheduling={'max_duration': args.timeout / 60 / 60},
+        env_variables=[
+            {
+                'key': 'MOSAICML_PLATFORM',
+                'value': 'False',
+            },
+            {
+                'key': 'PYTHONUNBUFFERED',
+                'value': '1',
+            },
+        ],
     )
 
     # Create run
@@ -127,7 +142,7 @@
         print(line, end='')
 
     print('[GHA] Run completed. Waiting for run to finish...')
-    run = wait_for_run_status(run, status='completed')
+    run = wait_for_run_status(run, status=RunStatus.COMPLETED)
 
-    # Fail if command exited with non-zero exit code or timed out
-    assert run.status == RunStatus.COMPLETED
+    # Fail if command exited with non-zero exit code or timed out (didn't reach COMPLETED)
+    assert run.status == RunStatus.COMPLETED, f'Run did not complete: {run.status} ({run.reason})'
diff --git a/.github/workflows/docker.yaml b/.github/workflows/docker.yaml
@@ -17,19 +17,18 @@ jobs:
     strategy:
       matrix:
         include:
-        - name: '1.13.1_cu117'
-          base_image: mosaicml/pytorch:1.13.1_cu117-python3.10-ubuntu20.04
-          dep_groups: '[gpu]'
-        - name: '2.0.1_cu118'
-          base_image: mosaicml/pytorch:2.0.1_cu118-python3.10-ubuntu20.04
-          dep_groups: '[gpu]'
         - name: '2.1.0_cu121'
           base_image: mosaicml/pytorch:2.1.0_cu121-python3.10-ubuntu20.04
           dep_groups: '[gpu]'
         - name: '2.1.0_cu121_flash2'
           base_image: mosaicml/pytorch:2.1.0_cu121-python3.10-ubuntu20.04
           dep_groups: '[gpu-flash2]'
-
+        - name: '2.1.0_cu121_aws'
+          base_image: mosaicml/pytorch:2.1.0_cu121-python3.10-ubuntu20.04-aws
+          dep_groups: '[gpu]'
+        - name: '2.1.0_cu121_flash2_aws'
+          base_image: mosaicml/pytorch:2.1.0_cu121-python3.10-ubuntu20.04-aws
+          dep_groups: '[gpu-flash2]'
     steps:
     - name: Maximize Build Space on Worker
       uses: easimon/maximize-build-space@v4
@@ -64,19 +63,17 @@ jobs:
         GIT_SHA=$(echo ${{ github.sha }} | cut -c1-7)
         echo "IMAGE_TAG=${GIT_SHA}" >> ${GITHUB_ENV}
 
-        if [ "${{ github.event_name }}" == "push" ]; then
-          echo "Triggered by push event."
-          PROD_REPO="mosaicml/llm-foundry"
-          IMAGE_TAG="${PROD_REPO}:${{matrix.name}}-${GIT_SHA},${PROD_REPO}:${{matrix.name}}-latest"
-          IMAGE_CACHE="${PROD_REPO}:${{matrix.name}}-buildcache"
-        elif [ "${{ github.event_name }}" == "pull_request" ]; then
+        if [ "${{ github.event_name }}" == "pull_request" ]; then
           echo "Triggered by pull_request event."
           STAGING_REPO="mosaicml/ci-staging"
           IMAGE_TAG="${STAGING_REPO}:${{matrix.name}}-${GIT_SHA}"
           IMAGE_CACHE="${STAGING_REPO}:${{matrix.name}}-buildcache"
         else
-          echo "Triggered by unknown event: ${{ github.event_name }}"
-          exit 1
+          # Triggered by push or workflow_dispatch event
+          echo "Triggered by ${{ github.event_name }} event, releasing to prod"
+          PROD_REPO="mosaicml/llm-foundry"
+          IMAGE_TAG="${PROD_REPO}:${{matrix.name}}-${GIT_SHA},${PROD_REPO}:${{matrix.name}}-latest"
+          IMAGE_CACHE="${PROD_REPO}:${{matrix.name}}-buildcache"
         fi
 
         echo "IMAGE_TAG=${IMAGE_TAG}" >> ${GITHUB_ENV}

diff --git a/.github/workflows/pr-cpu.yaml b/.github/workflows/pr-cpu.yaml
@@ -19,14 +19,6 @@ jobs:
     strategy:
       matrix:
         include:
-        - name: 'cpu-latest'
-          container: mosaicml/pytorch:latest_cpu  # mosaicml/pytorch:1.13.1_cpu-python3.10-ubuntu20.04
-          markers: 'not gpu'
-          pytest_command: 'coverage run -m pytest'
-        - name: 'cpu-2.0.1'
-          container: mosaicml/pytorch:2.0.1_cpu-python3.10-ubuntu20.04
-          markers: 'not gpu'
-          pytest_command: 'coverage run -m pytest'
         - name: 'cpu-2.1.0'
           container: mosaicml/pytorch:2.1.0_cpu-python3.10-ubuntu20.04
           markers: 'not gpu'

diff --git a/.github/workflows/pr-gpu.yaml b/.github/workflows/pr-gpu.yaml
@@ -18,24 +18,17 @@ jobs:
     uses: ./.github/workflows/pytest-gpu.yaml
     strategy:
       matrix:
-        # TODO: After the PR with the flash attention 2 images goes in, add the new unit test suite
         include:
-        - name: 'gpu-latest'
-          container: mosaicml/pytorch:latest  # mosaicml/pytorch:1.13.1_cu117-python3.10-ubuntu20.04
-          markers: 'gpu'
-          pytest_command: 'coverage run -m pytest'
-        - name: 'gpu-2.0.1'
-          container: mosaicml/pytorch:2.0.1_cu118-python3.10-ubuntu20.04
-          markers: 'gpu'
-          pytest_command: 'coverage run -m pytest'
         - name: 'gpu-2.1.0'
           container: mosaicml/pytorch:2.1.0_cu121-python3.10-ubuntu20.04
           markers: 'gpu'
           pytest_command: 'coverage run -m pytest'
+          deps_group: 'all'
         - name: 'gpu-2.1.0-flash2'
           container: mosaicml/llm-foundry:2.1.0_cu121_flash2-latest
           markers: 'gpu'
           pytest_command: 'coverage run -m pytest'
+          deps_group: 'all-flash2'
     name: ${{ matrix.name }}
     if: github.repository_owner == 'mosaicml'
     with:
@@ -45,5 +38,6 @@ jobs:
       pytest-command: ${{ matrix.pytest_command }}
       pytest-markers: ${{ matrix.markers }}
       python-version: 3.9
+      deps-group: ${{ matrix.deps_group }}
     secrets:
       mcloud-api-key: ${{ secrets.MCLOUD_API_KEY }}
diff --git a/.github/workflows/pytest-gpu.yaml b/.github/workflows/pytest-gpu.yaml
@@ -22,6 +22,9 @@ on:
         required: false
         type: string
         default: 3.9
+      deps-group:
+        required: true
+        type: string
     secrets:
       mcloud-api-key:
         required: true
@@ -77,4 +80,5 @@ jobs:
               --image '${{ inputs.container }}' \
               --pytest_markers '${{ inputs.pytest-markers }}' \
               --pytest_command '${{ inputs.pytest-command }}' \
-              --timeout ${{ inputs.mcloud-timeout }} ${REF_ARGS}
+              --timeout ${{ inputs.mcloud-timeout }} ${REF_ARGS} \
+              --deps_group ${{ inputs.deps-group }}
diff --git a/.gitignore b/.gitignore
@@ -150,3 +150,8 @@ dmypy.json
 
 # notebooks
 notebooks/
+
+# artifacts from training
+**/*.pt
+**/mlruns/*
+**/tokenizer-save-dir-*/**
diff --git a/README.md b/README.md
@@ -26,7 +26,7 @@
 
 # LLM Foundry
 
-This repository contains code for training, finetuning, evaluating, and deploying LLMs for inference with [Composer](https://github.com/mosaicml/composer) and the [MosaicML platform](https://forms.mosaicml.com/demo?utm_source=github.com&utm_medium=referral&utm_campaign=llm-foundry). Designed to be easy-to-use, efficient _and_ flexible, this codebase is designed to enable rapid experimentation with the latest techniques.
+This repository contains code for training, finetuning, evaluating, and deploying LLMs for inference with [Composer](https://github.com/mosaicml/composer) and the [MosaicML platform](https://forms.mosaicml.com/demo?utm_source=github.com&utm_medium=referral&utm_campaign=llm-foundry). Designed to be easy-to-use, efficient _and_ flexible, this codebase enables rapid experimentation with the latest techniques.
 
 You'll find in this repo:
 * `llmfoundry/` - source code for models, datasets, callbacks, utilities, etc.
@@ -45,15 +45,17 @@ You'll find in this repo:
 Mosaic Pretrained Transformers (MPT) are GPT-style models with some special features -- Flash Attention for efficiency, ALiBi for context length extrapolation, and stability improvements to mitigate loss spikes. As part of MosaicML's Foundation series, we have open-sourced several MPT models:
 
 
-| Model              | Context Length | Download                                           | Demo                                                             | Commercial use? |
-|--------------------|----------------|----------------------------------------------------|------------------------------------------------------------------|-----------------|
-| MPT-30B            | 8192           | https://huggingface.co/mosaicml/mpt-30b            |                                                                  | Yes             |
-| MPT-30B-Instruct   | 8192           | https://huggingface.co/mosaicml/mpt-30b-instruct   |                                                                  | Yes             |
-| MPT-30B-Chat       | 8192           | https://huggingface.co/mosaicml/mpt-30b-chat       | [Demo](https://huggingface.co/spaces/mosaicml/mpt-30b-chat)      | No              |
-| MPT-7B             | 2048           | https://huggingface.co/mosaicml/mpt-7b             |                                                                  | Yes             |
-| MPT-7B-Instruct    | 2048           | https://huggingface.co/mosaicml/mpt-7b-instruct    |                                                                  | Yes             |
-| MPT-7B-Chat        | 2048           | https://huggingface.co/mosaicml/mpt-7b-chat        | [Demo](https://huggingface.co/spaces/mosaicml/mpt-7b-chat)       | No              |
-| MPT-7B-StoryWriter | 65536          | https://huggingface.co/mosaicml/mpt-7b-storywriter |                                                                  | Yes             |
+| Model              | Context Length | Download                                           | Commercial use? |
+| ------------------ | -------------- | -------------------------------------------------- | --------------- |
+| MPT-30B            | 8192           | https://huggingface.co/mosaicml/mpt-30b            | Yes             |
+| MPT-30B-Instruct   | 8192           | https://huggingface.co/mosaicml/mpt-30b-instruct   | Yes             |
+| MPT-30B-Chat       | 8192           | https://huggingface.co/mosaicml/mpt-30b-chat       | No              |
+| MPT-7b-8k          | 8192           | https://huggingface.co/mosaicml/mpt-7b-8k          | Yes             |
+| MPT-7b-8k-Chat | 8192           | https://huggingface.co/mosaicml/mpt-7b-8k-chat         | No              |
+| MPT-7B             | 2048           | https://huggingface.co/mosaicml/mpt-7b             | Yes             |
+| MPT-7B-Instruct    | 2048           | https://huggingface.co/mosaicml/mpt-7b-instruct    | Yes             |
+| MPT-7B-Chat        | 2048           | https://huggingface.co/mosaicml/mpt-7b-chat        | No              |
+| MPT-7B-StoryWriter | 65536          | https://huggingface.co/mosaicml/mpt-7b-storywriter | Yes             |
 
 To try out these models locally, [follow the instructions](https://github.com/mosaicml/llm-foundry/tree/main/scripts/inference#interactive-generation-with-modelgenerate) in `scripts/inference/README.md` to prompt HF models using our [hf_generate.py](https://github.com/mosaicml/llm-foundry/blob/main/scripts/inference/hf_generate.py) or [hf_chat.py](https://github.com/mosaicml/llm-foundry/blob/main/scripts/inference/hf_chat.py) scripts.
 
@@ -75,6 +77,8 @@ Tutorial videos from the community:
 Something missing? Contribute with a PR!
 
 # Latest News
+* [Blog: Announcing MPT-7B-8K: 8K Context Length for Document Understanding](https://www.mosaicml.com/blog/long-context-mpt-7b-8k)
+* [Blog: Training LLMs with AMD MI250 GPUs and MosaicML](https://www.mosaicml.com/blog/amd-mi250)
 * [Blog: MPT-30B: Raising the bar for open-source foundation models](https://www.mosaicml.com/blog/mpt-30b)
 * [Blog: Introducing MPT-7B](https://www.mosaicml.com/blog/mpt-7b)
 * [Blog: Benchmarking LLMs on H100](https://www.mosaicml.com/blog/coreweave-nvidia-h100-part-1)
@@ -85,21 +89,14 @@ Something missing? Contribute with a PR!
 
 
 # Hardware and Software Requirements
-This codebase has been tested with PyTorch 1.13.1 and PyTorch 2.0.1 on systems with NVIDIA A100s and H100s.
+This codebase has been tested with PyTorch 2.1 with NVIDIA A100s and H100s.
 This codebase may also work on systems with other devices, such as consumer NVIDIA cards and AMD cards, but we are not actively testing these systems.
 If you have success/failure using LLM Foundry on other systems, please let us know in a Github issue and we will update the support matrix!
 
-| Device                    | Torch Version    | Cuda Version | Status                        |
-|---------------------------|------------------|--------------|-------------------------------|
-| A100-40GB/80GB            | 1.13.1           | 11.7         | :white_check_mark: Supported  |
-| A100-40GB/80GB            | 2.0.1            | 11.7, 11.8   | :white_check_mark: Supported  |
-| A100-40GB/80GB            | 2.1.0            | 11.8, 12.1   | :white_check_mark: Supported  |
-| H100-80GB                 | 1.13.1           | 11.7         | :x: Not Supported             |
-| H100-80GB                 | 2.0.1            | 11.8         | :white_check_mark: Supported  |
-| H100-80GB                 | 2.1.0            | 12.1         | :white_check_mark: Supported  |
-| A10-24GB                  | 1.13.1           | 11.7         | :construction: In Progress    |
-| A10-24GB                  | 2.0.1            | 11.7, 11.8   | :construction: In Progress    |
-| MI250                     | 2.0.1            | ROCm 5.4     | :construction: In Progress    |
+| Device         | Torch Version | Cuda Version | Status                       |
+| -------------- | ------------- | ------------ | ---------------------------- |
+| A100-40GB/80GB | 2.1.0         | 12.1         | :white_check_mark: Supported |
+| H100-80GB      | 2.1.0         | 12.1         | :white_check_mark: Supported |
 
 ## MosaicML Docker Images
 We highly recommend using our prebuilt Docker images. You can find them here: https://hub.docker.com/orgs/mosaicml/repositories.
@@ -111,22 +108,21 @@ You can select a specific commit hash such as `mosaicml/llm-foundry:1.13.1_cu117
 
 **Please Note:** The `mosaicml/llm-foundry` images do not come with the `llm-foundry` package preinstalled, just the dependencies. You will still need to `pip install llm-foundry` either from PyPi or from source.
 
-| Docker Image                                                | Torch Version  | Cuda Version | LLM Foundry dependencies installed? |
-|-------------------------------------------------------------|----------------|--------------|-------------------------------------|
-| `mosaicml/pytorch:1.13.1_cu117-python3.10-ubuntu20.04`      | 1.13.1         | 11.7         | No                                  |
-| `mosaicml/pytorch:2.0.1_cu118-python3.10-ubuntu20.04`       | 2.0.1          | 11.8         | No                                  |
-| `mosaicml/pytorch:2.1.0_cu121-python3.10-ubuntu20.04`       | 2.1.0          | 12.1         | No                                  |
-| `mosaicml/llm-foundry:1.13.1_cu117-latest`                  | 1.13.1         | 11.7         | Yes                                 |
-| `mosaicml/llm-foundry:2.0.1_cu118-latest`                   | 2.0.1          | 11.8         | Yes                                 |
-| `mosaicml/llm-foundry:2.1.0_cu121-latest`                   | 2.1.0          | 12.1         | Yes (flash attention v1)            |
-| `mosaicml/llm-foundry:2.1.0_cu121_flash2-latest`            | 2.1.0          | 12.1         | Yes (flash attention v2)            |
+| Docker Image                                           | Torch Version | Cuda Version      | LLM Foundry dependencies installed? |
+| ------------------------------------------------------ | ------------- | ----------------- | ----------------------------------- |
+| `mosaicml/pytorch:2.1.0_cu121-python3.10-ubuntu20.04`  | 2.1.0         | 12.1 (Infiniband) | No                                  |
+| `mosaicml/llm-foundry:2.1.0_cu121-latest`              | 2.1.0         | 12.1 (Infiniband) | Yes (flash attention v1)            |
+| `mosaicml/llm-foundry:2.1.0_cu121_flash2-latest`       | 2.1.0         | 12.1 (Infiniband) | Yes (flash attention v2)            |
+| `mosaicml/llm-foundry:2.1.0_cu121_aws-latest`          | 2.1.0         | 12.1 (EFA)        | Yes (flash attention v1)            |
+| `mosaicml/llm-foundry:2.1.0_cu121_flash2_aws-latest`   | 2.1.0         | 12.1 (EFA)        | Yes (flash attention v2)            |
 
 
 # Installation
 
-This assumes you already have PyTorch and CMake installed.
+This assumes you already have PyTorch, CMake, and packaging installed. If not, you can install them with `pip install cmake packaging torch`.
 
 To get started, clone the repo and set up your environment. Instructions to do so differ slightly depending on whether you're using Docker.
+
 ### With Docker (recommended)
 
 We *strongly* recommend working with LLM Foundry inside a Docker container (see our recommended Docker image above). If you are doing so, follow these steps to clone the repo and install the requirements.
@@ -188,7 +184,7 @@ pip3 install torch torchvision torchaudio --index-url https://download.pytorch.o
 
 Notes:
 1. `attn_impl: triton` does not work.
-1. We don't yet have a docker img where everything works perfectly. You might need to up/downgrade some packages (in our case, we needed to downgrade to `numpy==1.23.5`) before everything works without issue.
+1. We don't yet have a Docker image where everything works perfectly. You might need to up/downgrade some packages (in our case, we needed to downgrade to `numpy==1.23.5`) before everything works without issue.
 
 # Quickstart
 
@@ -242,7 +238,7 @@ python inference/hf_generate.py \
     "Here's a quick recipe for baking chocolate chip cookies: Start by"
 ```
 
-Note: the `composer` command used above to train the model refers to [Composer](https://github.com/mosaicml/composer) library's distributed launcher.
+Note: the `composer` command used above to train the model refers to the [Composer](https://github.com/mosaicml/composer) library's distributed launcher.
 
 If you have a write-enabled [HuggingFace auth token](https://huggingface.co/docs/hub/security-tokens), you can optionally upload your model to the Hub! Just export your token like this:
 

diff --git a/llmfoundry/__init__.py b/llmfoundry/__init__.py
@@ -75,4 +75,4 @@
     'TiktokenTokenizerWrapper',
 ]
 
-__version__ = '0.3.0'
+__version__ = '0.4.0'
diff --git a/llmfoundry/callbacks/eval_gauntlet_callback.py b/llmfoundry/callbacks/eval_gauntlet_callback.py
@@ -59,7 +59,7 @@ class EvalGauntlet(Callback):
                             logged under in the logger after eval
         categories (dict): This contains the list of categories, as well as the subtasks within them, the
                       random baseline accuracy of each subtask, and the number of fewshot examples
-                      used for the task. See `llmfoundry/scripts/eval/yamls/eval_gauntlet.yaml` to see the structure.
+                      used for the task. See `llmfoundry/scripts/eval/yamls/eval_gauntlet_v0.2.yaml` to see the structure.
         weighting (Weighting): The weighting scheme used to balance different tasks within each category.
                                Either assign them all equal weight, assign them weight proportional
                                to the dataset size, or assign them weight proportional to the log2 of the dataset size.