From 2d9889aad2d1dfa7bf2cda2e5fa8f461b68180cb Mon Sep 17 00:00:00 2001
From: Daniel King <daniel@mosaicml.com>
Date: Thu, 27 Jun 2024 14:40:32 -0700
Subject: [PATCH] 2.3.1

---
 .github/workflows/docker.yaml         |  6 ------
 .github/workflows/pr-cpu.yaml         |  4 ----
 .github/workflows/pr-gpu.yaml         | 15 ---------------
 README.md                             | 12 ++++++------
 mcli/mcli-1b-eval.yaml                |  2 +-
 mcli/mcli-1b-max-seq-len-8k.yaml      |  2 +-
 mcli/mcli-1b.yaml                     |  2 +-
 mcli/mcli-benchmark-mpt.yaml          |  2 +-
 mcli/mcli-convert-composer-to-hf.yaml |  2 +-
 mcli/mcli-hf-eval.yaml                |  2 +-
 mcli/mcli-hf-generate.yaml            |  2 +-
 mcli/mcli-llama2-finetune.yaml        |  2 +-
 mcli/mcli-openai-eval.yaml            |  2 +-
 mcli/mcli-pretokenize-oci-upload.yaml |  2 +-
 14 files changed, 16 insertions(+), 41 deletions(-)

diff --git a/.github/workflows/docker.yaml b/.github/workflows/docker.yaml
index 89aa917809..e4e6f83551 100644
--- a/.github/workflows/docker.yaml
+++ b/.github/workflows/docker.yaml
@@ -17,12 +17,6 @@ jobs:
     strategy:
       matrix:
         include:
-        - name: "2.3.0_cu121_flash2"
-          base_image: mosaicml/pytorch:2.3.0_cu121-python3.11-ubuntu20.04
-          dep_groups: "[gpu-flash2]"
-        - name: "2.3.0_cu121_flash2_aws"
-          base_image: mosaicml/pytorch:2.3.0_cu121-python3.11-ubuntu20.04-aws
-          dep_groups: "[gpu-flash2]"
         - name: "2.3.1_cu121"
           base_image: mosaicml/pytorch:2.3.1_cu121-python3.11-ubuntu20.04
           dep_groups: "[gpu]"
diff --git a/.github/workflows/pr-cpu.yaml b/.github/workflows/pr-cpu.yaml
index c2dc71f13d..2dd1c0edab 100644
--- a/.github/workflows/pr-cpu.yaml
+++ b/.github/workflows/pr-cpu.yaml
@@ -19,10 +19,6 @@ jobs:
     strategy:
       matrix:
         include:
-        - name: "cpu-2.3.0"
-          container: mosaicml/pytorch:2.3.0_cpu-python3.11-ubuntu20.04
-          markers: "not gpu"
-          pytest_command: "coverage run -m pytest"
         - name: "cpu-2.3.1"
           container: mosaicml/pytorch:2.3.1_cpu-python3.11-ubuntu20.04
           markers: "not gpu"
diff --git a/.github/workflows/pr-gpu.yaml b/.github/workflows/pr-gpu.yaml
index 5fe8cad483..c5638e403d 100644
--- a/.github/workflows/pr-gpu.yaml
+++ b/.github/workflows/pr-gpu.yaml
@@ -19,11 +19,6 @@ jobs:
       fail-fast: false
       matrix:
         include:
-        - name: "gpu-2.3.0-1"
-          container: mosaicml/pytorch:2.3.0_cu121-python3.11-ubuntu20.04
-          markers: "gpu"
-          pytest_command: "coverage run -m pytest"
-          pip_deps: "[all]"
         - name: "gpu-2.3.1-1"
           container: mosaicml/llm-foundry:2.3.1_cu121-latest
           markers: "gpu"
@@ -49,11 +44,6 @@ jobs:
       fail-fast: false
       matrix:
         include:
-        - name: "gpu-2.3.0-2"
-          container: mosaicml/pytorch:2.3.0_cu121-python3.11-ubuntu20.04
-          markers: "gpu"
-          pytest_command: "coverage run -m pytest"
-          pip_deps: "[all]"
         - name: "gpu-2.3.1-2"
           container: mosaicml/llm-foundry:2.3.1_cu121-latest
           markers: "gpu"
@@ -79,11 +69,6 @@ jobs:
       fail-fast: false
       matrix:
         include:
-        - name: "gpu-2.3.0-4"
-          container: mosaicml/pytorch:2.3.0_cu121-python3.11-ubuntu20.04
-          markers: "gpu"
-          pytest_command: "coverage run -m pytest"
-          pip_deps: "[all]"
         - name: "gpu-2.3.1-4"
           container: mosaicml/llm-foundry:2.3.1_cu121-latest
           markers: "gpu"
diff --git a/README.md b/README.md
index c92c252395..16c765c7e4 100644
--- a/README.md
+++ b/README.md
@@ -113,8 +113,8 @@ If you have success/failure using LLM Foundry on other systems, please let us kn
 
 | Device         | Torch Version | Cuda Version | Status                       |
 | -------------- | ------------- | ------------ | ---------------------------- |
-| A100-40GB/80GB | 2.3.0         | 12.1         | :white_check_mark: Supported |
-| H100-80GB      | 2.3.0         | 12.1         | :white_check_mark: Supported |
+| A100-40GB/80GB | 2.3.1         | 12.1         | :white_check_mark: Supported |
+| H100-80GB      | 2.3.1         | 12.1         | :white_check_mark: Supported |
 
 ## MosaicML Docker Images
 We highly recommend using our prebuilt Docker images. You can find them here: https://hub.docker.com/orgs/mosaicml/repositories.
@@ -122,15 +122,15 @@ We highly recommend using our prebuilt Docker images. You can find them here: ht
 The `mosaicml/pytorch` images are pinned to specific PyTorch and CUDA versions, and are stable and rarely updated.
 
 The `mosaicml/llm-foundry` images are built with new tags upon every commit to the `main` branch.
-You can select a specific commit hash such as `mosaicml/llm-foundry:2.3.0_cu121_flash2-36ab1ba` or take the latest one using `mosaicml/llm-foundry:2.3.0_cu121_flash2-latest`.
+You can select a specific commit hash such as `mosaicml/llm-foundry:2.3.1_cu121-36ab1ba` or take the latest one using `mosaicml/llm-foundry:2.3.1_cu121-latest`.
 
 **Please Note:** The `mosaicml/llm-foundry` images do not come with the `llm-foundry` package preinstalled, just the dependencies. You will still need to `pip install llm-foundry` either from PyPi or from source.
 
 | Docker Image                                           | Torch Version | Cuda Version      | LLM Foundry dependencies installed? |
 | ------------------------------------------------------ | ------------- | ----------------- | ----------------------------------- |
-| `mosaicml/pytorch:2.3.0_cu121-python3.11-ubuntu20.04`  | 2.3.0         | 12.1 (Infiniband) | No                                  |
-| `mosaicml/llm-foundry:2.3.0_cu121_flash2-latest`       | 2.3.0         | 12.1 (Infiniband) | Yes                                 |
-| `mosaicml/llm-foundry:2.3.0_cu121_flash2_aws-latest`   | 2.3.0         | 12.1 (EFA)        | Yes                                 |
+| `mosaicml/pytorch:2.3.1_cu121-python3.11-ubuntu20.04`  | 2.3.1         | 12.1 (Infiniband) | No                                  |
+| `mosaicml/llm-foundry:2.3.1_cu121-latest`              | 2.3.1         | 12.1 (Infiniband) | Yes                                 |
+| `mosaicml/llm-foundry:2.3.1_cu121_aws-latest`          | 2.3.1         | 12.1 (EFA)        | Yes                                 |
 
 
 # Installation
diff --git a/mcli/mcli-1b-eval.yaml b/mcli/mcli-1b-eval.yaml
index fc72bac974..d8ef42d5d5 100644
--- a/mcli/mcli-1b-eval.yaml
+++ b/mcli/mcli-1b-eval.yaml
@@ -9,7 +9,7 @@ integrations:
 command: |
   cd llm-foundry/scripts/
   composer eval/eval.py /mnt/config/parameters.yaml
-image: mosaicml/llm-foundry:2.3.0_cu121_flash2-latest
+image: mosaicml/llm-foundry:2.3.1_cu121-latest
 name: mpt-1b-eval
 
 compute:
diff --git a/mcli/mcli-1b-max-seq-len-8k.yaml b/mcli/mcli-1b-max-seq-len-8k.yaml
index 512ddc90c8..4b8eb601b2 100644
--- a/mcli/mcli-1b-max-seq-len-8k.yaml
+++ b/mcli/mcli-1b-max-seq-len-8k.yaml
@@ -17,7 +17,7 @@ command: |
     --out_root ./my-copy-c4 --splits train_small val_small \
     --concat_tokens 8192 --tokenizer EleutherAI/gpt-neox-20b --eos_text '<|endoftext|>'
   composer train/train.py /mnt/config/parameters.yaml
-image: mosaicml/llm-foundry:2.3.0_cu121_flash2-latest
+image: mosaicml/llm-foundry:2.3.1_cu121-latest
 name: mpt-1b-ctx-8k-gpus-8
 
 compute:
diff --git a/mcli/mcli-1b.yaml b/mcli/mcli-1b.yaml
index 9850860358..20d41fb2cc 100644
--- a/mcli/mcli-1b.yaml
+++ b/mcli/mcli-1b.yaml
@@ -21,7 +21,7 @@ command: |
     eval_loader.dataset.split=val_small \
     max_duration=100ba \
     eval_interval=0
-image: mosaicml/llm-foundry:2.3.0_cu121_flash2-latest
+image: mosaicml/llm-foundry:2.3.1_cu121-latest
 name: mpt-1b-gpus-8
 
 compute:
diff --git a/mcli/mcli-benchmark-mpt.yaml b/mcli/mcli-benchmark-mpt.yaml
index a4b3f52ba7..a7d44239b5 100644
--- a/mcli/mcli-benchmark-mpt.yaml
+++ b/mcli/mcli-benchmark-mpt.yaml
@@ -6,7 +6,7 @@ compute:
   # cluster: TODO # Name of the cluster to use for this run
   # gpu_type: a100_80gb # Type of GPU to use. We use a100_80gb in our experiments
 
-image: mosaicml/llm-foundry:2.3.0_cu121_flash2-latest
+image: mosaicml/llm-foundry:2.3.1_cu121-latest
 
 integrations:
 - integration_type: git_repo
diff --git a/mcli/mcli-convert-composer-to-hf.yaml b/mcli/mcli-convert-composer-to-hf.yaml
index bebdf42926..60c708e2f6 100644
--- a/mcli/mcli-convert-composer-to-hf.yaml
+++ b/mcli/mcli-convert-composer-to-hf.yaml
@@ -13,7 +13,7 @@ command: |
     --hf_output_path s3://bucket/folder/hf/ \
     --output_precision bf16 \
 
-image: mosaicml/llm-foundry:2.3.0_cu121_flash2-latest
+image: mosaicml/llm-foundry:2.3.1_cu121-latest
 name: convert-composer-hf
 
 compute:
diff --git a/mcli/mcli-hf-eval.yaml b/mcli/mcli-hf-eval.yaml
index 3e24bba9ae..e69e6dadda 100644
--- a/mcli/mcli-hf-eval.yaml
+++ b/mcli/mcli-hf-eval.yaml
@@ -16,7 +16,7 @@ gpu_num: 8
 # gpu_type:
 # cluster:  # replace with your cluster here!
 
-image: mosaicml/llm-foundry:2.3.0_cu121_flash2-latest
+image: mosaicml/llm-foundry:2.3.1_cu121-latest
 
 # The below is injected as a YAML file: /mnt/config/parameters.yaml
 parameters:
diff --git a/mcli/mcli-hf-generate.yaml b/mcli/mcli-hf-generate.yaml
index c3bf6d48cc..8b382c41f0 100644
--- a/mcli/mcli-hf-generate.yaml
+++ b/mcli/mcli-hf-generate.yaml
@@ -35,7 +35,7 @@ command: |
       "Here's a quick recipe for baking chocolate chip cookies: Start by" \
       "The best 5 cities to visit in Europe are"
 
-image: mosaicml/llm-foundry:2.3.0_cu121_flash2-latest
+image: mosaicml/llm-foundry:2.3.1_cu121-latest
 name: hf-generate
 
 compute:
diff --git a/mcli/mcli-llama2-finetune.yaml b/mcli/mcli-llama2-finetune.yaml
index 932d013442..f70cfd056d 100644
--- a/mcli/mcli-llama2-finetune.yaml
+++ b/mcli/mcli-llama2-finetune.yaml
@@ -9,7 +9,7 @@ integrations:
 command: |
   cd llm-foundry/scripts
   composer train/train.py /mnt/config/parameters.yaml
-image: mosaicml/llm-foundry:2.3.0_cu121_flash2-latest
+image: mosaicml/llm-foundry:2.3.1_cu121-latest
 name: llama2-finetune
 
 compute:
diff --git a/mcli/mcli-openai-eval.yaml b/mcli/mcli-openai-eval.yaml
index 9a589cbf84..38b02a6019 100644
--- a/mcli/mcli-openai-eval.yaml
+++ b/mcli/mcli-openai-eval.yaml
@@ -16,7 +16,7 @@ gpu_num:  #
 gpu_type:  #
 cluster:  # replace with your cluster here!
 
-image: mosaicml/llm-foundry:2.3.0_cu121_flash2-latest
+image: mosaicml/llm-foundry:2.3.1_cu121-latest
 
 # The below is injected as a YAML file: /mnt/config/parameters.yaml
 parameters:
diff --git a/mcli/mcli-pretokenize-oci-upload.yaml b/mcli/mcli-pretokenize-oci-upload.yaml
index b3ad09ca28..0749dcc86e 100644
--- a/mcli/mcli-pretokenize-oci-upload.yaml
+++ b/mcli/mcli-pretokenize-oci-upload.yaml
@@ -1,5 +1,5 @@
 name: c4-2k-pre-tokenized
-image: mosaicml/llm-foundry:2.3.0_cu121_flash2-latest
+image: mosaicml/llm-foundry:2.3.1_cu121-latest
 compute:
   gpus: 8  # Number of GPUs to use