From 5f467c6f9b627c8f3d5ab917bfc7ca9762f2a41a Mon Sep 17 00:00:00 2001 From: Mihir Patel Date: Wed, 6 Mar 2024 19:34:23 -0500 Subject: [PATCH] Bump minor version in base image (#3092) * bump * fix * fix --- docker/README.md | 12 ++++++------ docker/build_matrix.yaml | 28 ++++++++++++++-------------- docker/generate_build_matrix.py | 18 +++++++++--------- 3 files changed, 29 insertions(+), 29 deletions(-) diff --git a/docker/README.md b/docker/README.md index 736bbbd70d..73b4b1e13b 100644 --- a/docker/README.md +++ b/docker/README.md @@ -30,13 +30,13 @@ To install composer, once inside the image, run `pip install mosaicml`. | Linux Distro | Flavor | PyTorch Version | CUDA Version | Python Version | Docker Tags | |----------------|----------|-------------------|---------------------|------------------|------------------------------------------------------------------------------------------| -| Ubuntu 20.04 | Base | 2.3.0 | 12.1.0 (Infiniband) | 3.11 | `mosaicml/pytorch:2.3.0_cu121-nightly20240110-python3.11-ubuntu20.04` | -| Ubuntu 20.04 | Base | 2.3.0 | 12.1.0 (EFA) | 3.11 | `mosaicml/pytorch:2.3.0_cu121-nightly20240110-python3.11-ubuntu20.04-aws` | -| Ubuntu 20.04 | Base | 2.2.1 | 12.1.0 (Infiniband) | 3.11 | `mosaicml/pytorch:2.2.1_cu121-python3.11-ubuntu20.04` | -| Ubuntu 20.04 | Base | 2.2.1 | 12.1.0 (EFA) | 3.11 | `mosaicml/pytorch:2.2.1_cu121-python3.11-ubuntu20.04-aws` | +| Ubuntu 20.04 | Base | 2.3.0 | 12.1.1 (Infiniband) | 3.11 | `mosaicml/pytorch:2.3.0_cu121-nightly20240110-python3.11-ubuntu20.04` | +| Ubuntu 20.04 | Base | 2.3.0 | 12.1.1 (EFA) | 3.11 | `mosaicml/pytorch:2.3.0_cu121-nightly20240110-python3.11-ubuntu20.04-aws` | +| Ubuntu 20.04 | Base | 2.2.1 | 12.1.1 (Infiniband) | 3.11 | `mosaicml/pytorch:2.2.1_cu121-python3.11-ubuntu20.04` | +| Ubuntu 20.04 | Base | 2.2.1 | 12.1.1 (EFA) | 3.11 | `mosaicml/pytorch:2.2.1_cu121-python3.11-ubuntu20.04-aws` | | Ubuntu 20.04 | Base | 2.2.1 | cpu | 3.11 | `mosaicml/pytorch:2.2.1_cpu-python3.11-ubuntu20.04` | -| Ubuntu 20.04 | Base | 2.1.2 | 12.1.0 (Infiniband) | 3.10 | `mosaicml/pytorch:latest`, `mosaicml/pytorch:2.1.2_cu121-python3.10-ubuntu20.04` | -| Ubuntu 20.04 | Base | 2.1.2 | 12.1.0 (EFA) | 3.10 | `mosaicml/pytorch:latest-aws`, `mosaicml/pytorch:2.1.2_cu121-python3.10-ubuntu20.04-aws` | +| Ubuntu 20.04 | Base | 2.1.2 | 12.1.1 (Infiniband) | 3.10 | `mosaicml/pytorch:latest`, `mosaicml/pytorch:2.1.2_cu121-python3.10-ubuntu20.04` | +| Ubuntu 20.04 | Base | 2.1.2 | 12.1.1 (EFA) | 3.10 | `mosaicml/pytorch:latest-aws`, `mosaicml/pytorch:2.1.2_cu121-python3.10-ubuntu20.04-aws` | | Ubuntu 20.04 | Base | 2.1.2 | cpu | 3.10 | `mosaicml/pytorch:latest_cpu`, `mosaicml/pytorch:2.1.2_cpu-python3.10-ubuntu20.04` | | Ubuntu 20.04 | Base | 2.0.1 | 11.8.0 (Infiniband) | 3.10 | `mosaicml/pytorch:2.0.1_cu118-python3.10-ubuntu20.04` | | Ubuntu 20.04 | Base | 2.0.1 | 11.8.0 (EFA) | 3.10 | `mosaicml/pytorch:2.0.1_cu118-python3.10-ubuntu20.04-aws` | diff --git a/docker/build_matrix.yaml b/docker/build_matrix.yaml index 0f6d827304..31e3e1ba27 100644 --- a/docker/build_matrix.yaml +++ b/docker/build_matrix.yaml @@ -1,7 +1,7 @@ # This file is automatically generated by generate_build_matrix.py. DO NOT EDIT! - AWS_OFI_NCCL_VERSION: '' - BASE_IMAGE: nvidia/cuda:12.1.0-cudnn8-devel-ubuntu20.04 - CUDA_VERSION: 12.1.0 + BASE_IMAGE: nvidia/cuda:12.1.1-cudnn8-devel-ubuntu20.04 + CUDA_VERSION: 12.1.1 IMAGE_NAME: torch-2-2-1-cu121 MOFED_VERSION: 5.5-1.0.3.2 NVIDIA_REQUIRE_CUDA_OVERRIDE: cuda>=12.1 brand=tesla,driver>=450,driver<451 brand=tesla,driver>=470,driver<471 @@ -27,8 +27,8 @@ TARGET: pytorch_stage TORCHVISION_VERSION: 0.17.1 - AWS_OFI_NCCL_VERSION: v1.7.4-aws - BASE_IMAGE: nvidia/cuda:12.1.0-cudnn8-devel-ubuntu20.04 - CUDA_VERSION: 12.1.0 + BASE_IMAGE: nvidia/cuda:12.1.1-cudnn8-devel-ubuntu20.04 + CUDA_VERSION: 12.1.1 IMAGE_NAME: torch-2-2-1-cu121-aws MOFED_VERSION: '' NVIDIA_REQUIRE_CUDA_OVERRIDE: cuda>=12.1 brand=tesla,driver>=450,driver<451 brand=tesla,driver>=470,driver<471 @@ -68,8 +68,8 @@ TARGET: pytorch_stage TORCHVISION_VERSION: 0.17.1 - AWS_OFI_NCCL_VERSION: '' - BASE_IMAGE: nvidia/cuda:12.1.0-cudnn8-devel-ubuntu20.04 - CUDA_VERSION: 12.1.0 + BASE_IMAGE: nvidia/cuda:12.1.1-cudnn8-devel-ubuntu20.04 + CUDA_VERSION: 12.1.1 IMAGE_NAME: torch-2-1-2-cu121 MOFED_VERSION: 5.5-1.0.3.2 NVIDIA_REQUIRE_CUDA_OVERRIDE: cuda>=12.1 brand=tesla,driver>=450,driver<451 brand=tesla,driver>=470,driver<471 @@ -96,8 +96,8 @@ TARGET: pytorch_stage TORCHVISION_VERSION: 0.16.2 - AWS_OFI_NCCL_VERSION: v1.7.4-aws - BASE_IMAGE: nvidia/cuda:12.1.0-cudnn8-devel-ubuntu20.04 - CUDA_VERSION: 12.1.0 + BASE_IMAGE: nvidia/cuda:12.1.1-cudnn8-devel-ubuntu20.04 + CUDA_VERSION: 12.1.1 IMAGE_NAME: torch-2-1-2-cu121-aws MOFED_VERSION: '' NVIDIA_REQUIRE_CUDA_OVERRIDE: cuda>=12.1 brand=tesla,driver>=450,driver<451 brand=tesla,driver>=470,driver<471 @@ -191,8 +191,8 @@ TARGET: pytorch_stage TORCHVISION_VERSION: 0.15.2 - AWS_OFI_NCCL_VERSION: v1.7.4-aws - BASE_IMAGE: nvidia/cuda:12.1.0-cudnn8-devel-ubuntu20.04 - CUDA_VERSION: 12.1.0 + BASE_IMAGE: nvidia/cuda:12.1.1-cudnn8-devel-ubuntu20.04 + CUDA_VERSION: 12.1.1 IMAGE_NAME: torch-nightly-2-3-0-20240110-cu121-python3-11-aws MOFED_VERSION: '' NVIDIA_REQUIRE_CUDA_OVERRIDE: cuda>=12.1 brand=tesla,driver>=450,driver<451 brand=tesla,driver>=470,driver<471 @@ -218,8 +218,8 @@ TARGET: pytorch_stage TORCHVISION_VERSION: 0.18.0 - AWS_OFI_NCCL_VERSION: '' - BASE_IMAGE: nvidia/cuda:12.1.0-cudnn8-devel-ubuntu20.04 - CUDA_VERSION: 12.1.0 + BASE_IMAGE: nvidia/cuda:12.1.1-cudnn8-devel-ubuntu20.04 + CUDA_VERSION: 12.1.1 IMAGE_NAME: torch-nightly-2-3-0-20240110-cu121-python3-11 MOFED_VERSION: 5.5-1.0.3.2 NVIDIA_REQUIRE_CUDA_OVERRIDE: cuda>=12.1 brand=tesla,driver>=450,driver<451 brand=tesla,driver>=470,driver<471 @@ -245,9 +245,9 @@ TARGET: pytorch_stage TORCHVISION_VERSION: 0.18.0 - AWS_OFI_NCCL_VERSION: '' - BASE_IMAGE: nvidia/cuda:12.1.0-cudnn8-devel-ubuntu20.04 + BASE_IMAGE: nvidia/cuda:12.1.1-cudnn8-devel-ubuntu20.04 COMPOSER_INSTALL_COMMAND: mosaicml[all]==0.20.1 - CUDA_VERSION: 12.1.0 + CUDA_VERSION: 12.1.1 IMAGE_NAME: composer-0-20-1 MOFED_VERSION: 5.5-1.0.3.2 NVIDIA_REQUIRE_CUDA_OVERRIDE: cuda>=12.1 brand=tesla,driver>=450,driver<451 brand=tesla,driver>=470,driver<471 diff --git a/docker/generate_build_matrix.py b/docker/generate_build_matrix.py index 10b07610ca..a660c8ef72 100644 --- a/docker/generate_build_matrix.py +++ b/docker/generate_build_matrix.py @@ -43,9 +43,9 @@ def _get_cuda_version(pytorch_version: str, use_cuda: bool): if not use_cuda: return '' if pytorch_version == '2.2.1': - return '12.1.0' + return '12.1.1' if pytorch_version == '2.1.2': - return '12.1.0' + return '12.1.1' if pytorch_version == '2.0.1': return '11.8.0' raise ValueError(f'Invalid pytorch_version: {pytorch_version}') @@ -58,7 +58,7 @@ def _get_cuda_version_tag(cuda_version: str): def _get_cuda_override(cuda_version: str): - if cuda_version == '12.1.0': + if cuda_version == '12.1.1': cuda_121_override_string = ('cuda>=12.1 brand=tesla,driver>=450,driver<451 ' 'brand=tesla,driver>=470,driver<471 brand=unknown,driver>=470,driver<471 ' 'brand=nvidia,driver>=470,driver<471 brand=nvidiartx,driver>=470,driver<471 ' @@ -226,11 +226,11 @@ def _main(): nightly_entry_311_aws = { 'AWS_OFI_NCCL_VERSION': 'v1.7.4-aws', - 'BASE_IMAGE': 'nvidia/cuda:12.1.0-cudnn8-devel-ubuntu20.04', - 'CUDA_VERSION': '12.1.0', + 'BASE_IMAGE': 'nvidia/cuda:12.1.1-cudnn8-devel-ubuntu20.04', + 'CUDA_VERSION': '12.1.1', 'IMAGE_NAME': 'torch-nightly-2-3-0-20240110-cu121-python3-11-aws', 'MOFED_VERSION': '', - 'NVIDIA_REQUIRE_CUDA_OVERRIDE': _get_cuda_override('12.1.0'), + 'NVIDIA_REQUIRE_CUDA_OVERRIDE': _get_cuda_override('12.1.1'), 'PYTHON_VERSION': '3.11', 'PYTORCH_VERSION': '2.3.0', 'PYTORCH_NIGHTLY_URL': 'https://download.pytorch.org/whl/nightly/cu121', @@ -243,11 +243,11 @@ def _main(): nightly_entry_311 = { 'AWS_OFI_NCCL_VERSION': '', - 'BASE_IMAGE': 'nvidia/cuda:12.1.0-cudnn8-devel-ubuntu20.04', - 'CUDA_VERSION': '12.1.0', + 'BASE_IMAGE': 'nvidia/cuda:12.1.1-cudnn8-devel-ubuntu20.04', + 'CUDA_VERSION': '12.1.1', 'IMAGE_NAME': 'torch-nightly-2-3-0-20240110-cu121-python3-11', 'MOFED_VERSION': '5.5-1.0.3.2', - 'NVIDIA_REQUIRE_CUDA_OVERRIDE': _get_cuda_override('12.1.0'), + 'NVIDIA_REQUIRE_CUDA_OVERRIDE': _get_cuda_override('12.1.1'), 'PYTHON_VERSION': '3.11', 'PYTORCH_VERSION': '2.3.0', 'PYTORCH_NIGHTLY_URL': 'https://download.pytorch.org/whl/nightly/cu121',