diff --git a/.github/workflows/docker.yaml b/.github/workflows/docker.yaml index 89aa917809..e4e6f83551 100644 --- a/.github/workflows/docker.yaml +++ b/.github/workflows/docker.yaml @@ -17,12 +17,6 @@ jobs: strategy: matrix: include: - - name: "2.3.0_cu121_flash2" - base_image: mosaicml/pytorch:2.3.0_cu121-python3.11-ubuntu20.04 - dep_groups: "[gpu-flash2]" - - name: "2.3.0_cu121_flash2_aws" - base_image: mosaicml/pytorch:2.3.0_cu121-python3.11-ubuntu20.04-aws - dep_groups: "[gpu-flash2]" - name: "2.3.1_cu121" base_image: mosaicml/pytorch:2.3.1_cu121-python3.11-ubuntu20.04 dep_groups: "[gpu]" diff --git a/.github/workflows/pr-cpu.yaml b/.github/workflows/pr-cpu.yaml index c2dc71f13d..2dd1c0edab 100644 --- a/.github/workflows/pr-cpu.yaml +++ b/.github/workflows/pr-cpu.yaml @@ -19,10 +19,6 @@ jobs: strategy: matrix: include: - - name: "cpu-2.3.0" - container: mosaicml/pytorch:2.3.0_cpu-python3.11-ubuntu20.04 - markers: "not gpu" - pytest_command: "coverage run -m pytest" - name: "cpu-2.3.1" container: mosaicml/pytorch:2.3.1_cpu-python3.11-ubuntu20.04 markers: "not gpu" diff --git a/.github/workflows/pr-gpu.yaml b/.github/workflows/pr-gpu.yaml index 5fe8cad483..c5638e403d 100644 --- a/.github/workflows/pr-gpu.yaml +++ b/.github/workflows/pr-gpu.yaml @@ -19,11 +19,6 @@ jobs: fail-fast: false matrix: include: - - name: "gpu-2.3.0-1" - container: mosaicml/pytorch:2.3.0_cu121-python3.11-ubuntu20.04 - markers: "gpu" - pytest_command: "coverage run -m pytest" - pip_deps: "[all]" - name: "gpu-2.3.1-1" container: mosaicml/llm-foundry:2.3.1_cu121-latest markers: "gpu" @@ -49,11 +44,6 @@ jobs: fail-fast: false matrix: include: - - name: "gpu-2.3.0-2" - container: mosaicml/pytorch:2.3.0_cu121-python3.11-ubuntu20.04 - markers: "gpu" - pytest_command: "coverage run -m pytest" - pip_deps: "[all]" - name: "gpu-2.3.1-2" container: mosaicml/llm-foundry:2.3.1_cu121-latest markers: "gpu" @@ -79,11 +69,6 @@ jobs: fail-fast: false matrix: include: - - name: "gpu-2.3.0-4" - container: mosaicml/pytorch:2.3.0_cu121-python3.11-ubuntu20.04 - markers: "gpu" - pytest_command: "coverage run -m pytest" - pip_deps: "[all]" - name: "gpu-2.3.1-4" container: mosaicml/llm-foundry:2.3.1_cu121-latest markers: "gpu" diff --git a/README.md b/README.md index c92c252395..16c765c7e4 100644 --- a/README.md +++ b/README.md @@ -113,8 +113,8 @@ If you have success/failure using LLM Foundry on other systems, please let us kn | Device | Torch Version | Cuda Version | Status | | -------------- | ------------- | ------------ | ---------------------------- | -| A100-40GB/80GB | 2.3.0 | 12.1 | :white_check_mark: Supported | -| H100-80GB | 2.3.0 | 12.1 | :white_check_mark: Supported | +| A100-40GB/80GB | 2.3.1 | 12.1 | :white_check_mark: Supported | +| H100-80GB | 2.3.1 | 12.1 | :white_check_mark: Supported | ## MosaicML Docker Images We highly recommend using our prebuilt Docker images. You can find them here: https://hub.docker.com/orgs/mosaicml/repositories. @@ -122,15 +122,15 @@ We highly recommend using our prebuilt Docker images. You can find them here: ht The `mosaicml/pytorch` images are pinned to specific PyTorch and CUDA versions, and are stable and rarely updated. The `mosaicml/llm-foundry` images are built with new tags upon every commit to the `main` branch. -You can select a specific commit hash such as `mosaicml/llm-foundry:2.3.0_cu121_flash2-36ab1ba` or take the latest one using `mosaicml/llm-foundry:2.3.0_cu121_flash2-latest`. +You can select a specific commit hash such as `mosaicml/llm-foundry:2.3.1_cu121-36ab1ba` or take the latest one using `mosaicml/llm-foundry:2.3.1_cu121-latest`. **Please Note:** The `mosaicml/llm-foundry` images do not come with the `llm-foundry` package preinstalled, just the dependencies. You will still need to `pip install llm-foundry` either from PyPi or from source. | Docker Image | Torch Version | Cuda Version | LLM Foundry dependencies installed? | | ------------------------------------------------------ | ------------- | ----------------- | ----------------------------------- | -| `mosaicml/pytorch:2.3.0_cu121-python3.11-ubuntu20.04` | 2.3.0 | 12.1 (Infiniband) | No | -| `mosaicml/llm-foundry:2.3.0_cu121_flash2-latest` | 2.3.0 | 12.1 (Infiniband) | Yes | -| `mosaicml/llm-foundry:2.3.0_cu121_flash2_aws-latest` | 2.3.0 | 12.1 (EFA) | Yes | +| `mosaicml/pytorch:2.3.1_cu121-python3.11-ubuntu20.04` | 2.3.1 | 12.1 (Infiniband) | No | +| `mosaicml/llm-foundry:2.3.1_cu121-latest` | 2.3.1 | 12.1 (Infiniband) | Yes | +| `mosaicml/llm-foundry:2.3.1_cu121_aws-latest` | 2.3.1 | 12.1 (EFA) | Yes | # Installation diff --git a/mcli/mcli-1b-eval.yaml b/mcli/mcli-1b-eval.yaml index fc72bac974..d8ef42d5d5 100644 --- a/mcli/mcli-1b-eval.yaml +++ b/mcli/mcli-1b-eval.yaml @@ -9,7 +9,7 @@ integrations: command: | cd llm-foundry/scripts/ composer eval/eval.py /mnt/config/parameters.yaml -image: mosaicml/llm-foundry:2.3.0_cu121_flash2-latest +image: mosaicml/llm-foundry:2.3.1_cu121-latest name: mpt-1b-eval compute: diff --git a/mcli/mcli-1b-max-seq-len-8k.yaml b/mcli/mcli-1b-max-seq-len-8k.yaml index 512ddc90c8..4b8eb601b2 100644 --- a/mcli/mcli-1b-max-seq-len-8k.yaml +++ b/mcli/mcli-1b-max-seq-len-8k.yaml @@ -17,7 +17,7 @@ command: | --out_root ./my-copy-c4 --splits train_small val_small \ --concat_tokens 8192 --tokenizer EleutherAI/gpt-neox-20b --eos_text '<|endoftext|>' composer train/train.py /mnt/config/parameters.yaml -image: mosaicml/llm-foundry:2.3.0_cu121_flash2-latest +image: mosaicml/llm-foundry:2.3.1_cu121-latest name: mpt-1b-ctx-8k-gpus-8 compute: diff --git a/mcli/mcli-1b.yaml b/mcli/mcli-1b.yaml index 9850860358..20d41fb2cc 100644 --- a/mcli/mcli-1b.yaml +++ b/mcli/mcli-1b.yaml @@ -21,7 +21,7 @@ command: | eval_loader.dataset.split=val_small \ max_duration=100ba \ eval_interval=0 -image: mosaicml/llm-foundry:2.3.0_cu121_flash2-latest +image: mosaicml/llm-foundry:2.3.1_cu121-latest name: mpt-1b-gpus-8 compute: diff --git a/mcli/mcli-benchmark-mpt.yaml b/mcli/mcli-benchmark-mpt.yaml index a4b3f52ba7..a7d44239b5 100644 --- a/mcli/mcli-benchmark-mpt.yaml +++ b/mcli/mcli-benchmark-mpt.yaml @@ -6,7 +6,7 @@ compute: # cluster: TODO # Name of the cluster to use for this run # gpu_type: a100_80gb # Type of GPU to use. We use a100_80gb in our experiments -image: mosaicml/llm-foundry:2.3.0_cu121_flash2-latest +image: mosaicml/llm-foundry:2.3.1_cu121-latest integrations: - integration_type: git_repo diff --git a/mcli/mcli-convert-composer-to-hf.yaml b/mcli/mcli-convert-composer-to-hf.yaml index bebdf42926..60c708e2f6 100644 --- a/mcli/mcli-convert-composer-to-hf.yaml +++ b/mcli/mcli-convert-composer-to-hf.yaml @@ -13,7 +13,7 @@ command: | --hf_output_path s3://bucket/folder/hf/ \ --output_precision bf16 \ -image: mosaicml/llm-foundry:2.3.0_cu121_flash2-latest +image: mosaicml/llm-foundry:2.3.1_cu121-latest name: convert-composer-hf compute: diff --git a/mcli/mcli-hf-eval.yaml b/mcli/mcli-hf-eval.yaml index 3e24bba9ae..e69e6dadda 100644 --- a/mcli/mcli-hf-eval.yaml +++ b/mcli/mcli-hf-eval.yaml @@ -16,7 +16,7 @@ gpu_num: 8 # gpu_type: # cluster: # replace with your cluster here! -image: mosaicml/llm-foundry:2.3.0_cu121_flash2-latest +image: mosaicml/llm-foundry:2.3.1_cu121-latest # The below is injected as a YAML file: /mnt/config/parameters.yaml parameters: diff --git a/mcli/mcli-hf-generate.yaml b/mcli/mcli-hf-generate.yaml index c3bf6d48cc..8b382c41f0 100644 --- a/mcli/mcli-hf-generate.yaml +++ b/mcli/mcli-hf-generate.yaml @@ -35,7 +35,7 @@ command: | "Here's a quick recipe for baking chocolate chip cookies: Start by" \ "The best 5 cities to visit in Europe are" -image: mosaicml/llm-foundry:2.3.0_cu121_flash2-latest +image: mosaicml/llm-foundry:2.3.1_cu121-latest name: hf-generate compute: diff --git a/mcli/mcli-llama2-finetune.yaml b/mcli/mcli-llama2-finetune.yaml index 932d013442..f70cfd056d 100644 --- a/mcli/mcli-llama2-finetune.yaml +++ b/mcli/mcli-llama2-finetune.yaml @@ -9,7 +9,7 @@ integrations: command: | cd llm-foundry/scripts composer train/train.py /mnt/config/parameters.yaml -image: mosaicml/llm-foundry:2.3.0_cu121_flash2-latest +image: mosaicml/llm-foundry:2.3.1_cu121-latest name: llama2-finetune compute: diff --git a/mcli/mcli-openai-eval.yaml b/mcli/mcli-openai-eval.yaml index 9a589cbf84..38b02a6019 100644 --- a/mcli/mcli-openai-eval.yaml +++ b/mcli/mcli-openai-eval.yaml @@ -16,7 +16,7 @@ gpu_num: # gpu_type: # cluster: # replace with your cluster here! -image: mosaicml/llm-foundry:2.3.0_cu121_flash2-latest +image: mosaicml/llm-foundry:2.3.1_cu121-latest # The below is injected as a YAML file: /mnt/config/parameters.yaml parameters: diff --git a/mcli/mcli-pretokenize-oci-upload.yaml b/mcli/mcli-pretokenize-oci-upload.yaml index b3ad09ca28..0749dcc86e 100644 --- a/mcli/mcli-pretokenize-oci-upload.yaml +++ b/mcli/mcli-pretokenize-oci-upload.yaml @@ -1,5 +1,5 @@ name: c4-2k-pre-tokenized -image: mosaicml/llm-foundry:2.3.0_cu121_flash2-latest +image: mosaicml/llm-foundry:2.3.1_cu121-latest compute: gpus: 8 # Number of GPUs to use