From fe2b3825fb6266fe1004e894ee609743055e33f2 Mon Sep 17 00:00:00 2001 From: Mihir Patel Date: Tue, 27 Feb 2024 16:49:34 -0500 Subject: [PATCH 1/4] add --- docker/generate_build_matrix.py | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/docker/generate_build_matrix.py b/docker/generate_build_matrix.py index 10b07610ca..8d4fce3171 100644 --- a/docker/generate_build_matrix.py +++ b/docker/generate_build_matrix.py @@ -258,6 +258,23 @@ def _main(): } pytorch_entries.append(nightly_entry_311) + nightly_entry2_311 = { + 'AWS_OFI_NCCL_VERSION': '', + 'BASE_IMAGE': 'nvidia/cuda:12.1.0-cudnn8-devel-ubuntu20.04', + 'CUDA_VERSION': '12.1.0', + 'IMAGE_NAME': 'torch-nightly-2-3-0-20240225-cu121-python3-11', + 'MOFED_VERSION': '5.5-1.0.3.2', + 'NVIDIA_REQUIRE_CUDA_OVERRIDE': _get_cuda_override('12.1.0'), + 'PYTHON_VERSION': '3.11', + 'PYTORCH_VERSION': '2.3.0', + 'PYTORCH_NIGHTLY_URL': 'https://download.pytorch.org/whl/nightly/cu121', + 'PYTORCH_NIGHTLY_VERSION': 'dev20240225+cu121', + 'TAGS': ['mosaicml/pytorch:2.3.0_cu121-nightly20240225-python3.11-ubuntu20.04'], + 'TARGET': 'pytorch_stage', + 'TORCHVISION_VERSION': '0.18.0' + } + pytorch_entries.append(nightly_entry2_311) + composer_entries = [] # The `GIT_COMMIT` is a placeholder and Jenkins will substitute it with the actual git commit for the `composer_staging` images From ced0b3a9ed4dab8cbf50d06ffde1099c28adc401 Mon Sep 17 00:00:00 2001 From: Mihir Patel Date: Tue, 27 Feb 2024 17:03:13 -0500 Subject: [PATCH 2/4] save --- docker/README.md | 1 + docker/build_matrix.yaml | 27 +++++++++++++++++++++++++++ 2 files changed, 28 insertions(+) diff --git a/docker/README.md b/docker/README.md index 736bbbd70d..9f2f432575 100644 --- a/docker/README.md +++ b/docker/README.md @@ -31,6 +31,7 @@ To install composer, once inside the image, run `pip install mosaicml`. | Linux Distro | Flavor | PyTorch Version | CUDA Version | Python Version | Docker Tags | |----------------|----------|-------------------|---------------------|------------------|------------------------------------------------------------------------------------------| | Ubuntu 20.04 | Base | 2.3.0 | 12.1.0 (Infiniband) | 3.11 | `mosaicml/pytorch:2.3.0_cu121-nightly20240110-python3.11-ubuntu20.04` | +| Ubuntu 20.04 | Base | 2.3.0 | 12.1.0 (Infiniband) | 3.11 | `mosaicml/pytorch:2.3.0_cu121-nightly20240225-python3.11-ubuntu20.04` | | Ubuntu 20.04 | Base | 2.3.0 | 12.1.0 (EFA) | 3.11 | `mosaicml/pytorch:2.3.0_cu121-nightly20240110-python3.11-ubuntu20.04-aws` | | Ubuntu 20.04 | Base | 2.2.1 | 12.1.0 (Infiniband) | 3.11 | `mosaicml/pytorch:2.2.1_cu121-python3.11-ubuntu20.04` | | Ubuntu 20.04 | Base | 2.2.1 | 12.1.0 (EFA) | 3.11 | `mosaicml/pytorch:2.2.1_cu121-python3.11-ubuntu20.04-aws` | diff --git a/docker/build_matrix.yaml b/docker/build_matrix.yaml index 0f6d827304..26b488ccfb 100644 --- a/docker/build_matrix.yaml +++ b/docker/build_matrix.yaml @@ -244,6 +244,33 @@ - mosaicml/pytorch:2.3.0_cu121-nightly20240110-python3.11-ubuntu20.04 TARGET: pytorch_stage TORCHVISION_VERSION: 0.18.0 +- AWS_OFI_NCCL_VERSION: '' + BASE_IMAGE: nvidia/cuda:12.1.0-cudnn8-devel-ubuntu20.04 + CUDA_VERSION: 12.1.0 + IMAGE_NAME: torch-nightly-2-3-0-20240225-cu121-python3-11 + MOFED_VERSION: 5.5-1.0.3.2 + NVIDIA_REQUIRE_CUDA_OVERRIDE: cuda>=12.1 brand=tesla,driver>=450,driver<451 brand=tesla,driver>=470,driver<471 + brand=unknown,driver>=470,driver<471 brand=nvidia,driver>=470,driver<471 brand=nvidiartx,driver>=470,driver<471 + brand=geforce,driver>=470,driver<471 brand=geforcertx,driver>=470,driver<471 brand=quadro,driver>=470,driver<471 + brand=quadrortx,driver>=470,driver<471 brand=titan,driver>=470,driver<471 brand=titanrtx,driver>=470,driver<471 + brand=tesla,driver>=510,driver<511 brand=unknown,driver>=510,driver<511 brand=nvidia,driver>=510,driver<511 + brand=nvidiartx,driver>=510,driver<511 brand=geforce,driver>=510,driver<511 brand=geforcertx,driver>=510,driver<511 + brand=quadro,driver>=510,driver<511 brand=quadrortx,driver>=510,driver<511 brand=titan,driver>=510,driver<511 + brand=titanrtx,driver>=510,driver<511 brand=tesla,driver>=515,driver<516 brand=unknown,driver>=515,driver<516 + brand=nvidia,driver>=515,driver<516 brand=nvidiartx,driver>=515,driver<516 brand=geforce,driver>=515,driver<516 + brand=geforcertx,driver>=515,driver<516 brand=quadro,driver>=515,driver<516 brand=quadrortx,driver>=515,driver<516 + brand=titan,driver>=515,driver<516 brand=titanrtx,driver>=515,driver<516 brand=tesla,driver>=525,driver<526 + brand=unknown,driver>=525,driver<526 brand=nvidia,driver>=525,driver<526 brand=nvidiartx,driver>=525,driver<526 + brand=geforce,driver>=525,driver<526 brand=geforcertx,driver>=525,driver<526 brand=quadro,driver>=525,driver<526 + brand=quadrortx,driver>=525,driver<526 brand=titan,driver>=525,driver<526 brand=titanrtx,driver>=525,driver<526 + PYTHON_VERSION: '3.11' + PYTORCH_NIGHTLY_URL: https://download.pytorch.org/whl/nightly/cu121 + PYTORCH_NIGHTLY_VERSION: dev20240225+cu121 + PYTORCH_VERSION: 2.3.0 + TAGS: + - mosaicml/pytorch:2.3.0_cu121-nightly20240225-python3.11-ubuntu20.04 + TARGET: pytorch_stage + TORCHVISION_VERSION: 0.18.0 - AWS_OFI_NCCL_VERSION: '' BASE_IMAGE: nvidia/cuda:12.1.0-cudnn8-devel-ubuntu20.04 COMPOSER_INSTALL_COMMAND: mosaicml[all]==0.20.1 From ab466e5fe062d694b24f6c57b2110336f39ac0d6 Mon Sep 17 00:00:00 2001 From: Mihir Patel Date: Tue, 27 Feb 2024 17:19:48 -0500 Subject: [PATCH 3/4] fix --- docker/README.md | 2 +- docker/build_matrix.yaml | 6 +++--- docker/generate_build_matrix.py | 6 +++--- 3 files changed, 7 insertions(+), 7 deletions(-) diff --git a/docker/README.md b/docker/README.md index 9f2f432575..e1326c735c 100644 --- a/docker/README.md +++ b/docker/README.md @@ -31,7 +31,7 @@ To install composer, once inside the image, run `pip install mosaicml`. | Linux Distro | Flavor | PyTorch Version | CUDA Version | Python Version | Docker Tags | |----------------|----------|-------------------|---------------------|------------------|------------------------------------------------------------------------------------------| | Ubuntu 20.04 | Base | 2.3.0 | 12.1.0 (Infiniband) | 3.11 | `mosaicml/pytorch:2.3.0_cu121-nightly20240110-python3.11-ubuntu20.04` | -| Ubuntu 20.04 | Base | 2.3.0 | 12.1.0 (Infiniband) | 3.11 | `mosaicml/pytorch:2.3.0_cu121-nightly20240225-python3.11-ubuntu20.04` | +| Ubuntu 20.04 | Base | 2.3.0 | 12.1.0 (Infiniband) | 3.11 | `mosaicml/pytorch:2.3.0_cu121-nightly20240224-python3.11-ubuntu20.04` | | Ubuntu 20.04 | Base | 2.3.0 | 12.1.0 (EFA) | 3.11 | `mosaicml/pytorch:2.3.0_cu121-nightly20240110-python3.11-ubuntu20.04-aws` | | Ubuntu 20.04 | Base | 2.2.1 | 12.1.0 (Infiniband) | 3.11 | `mosaicml/pytorch:2.2.1_cu121-python3.11-ubuntu20.04` | | Ubuntu 20.04 | Base | 2.2.1 | 12.1.0 (EFA) | 3.11 | `mosaicml/pytorch:2.2.1_cu121-python3.11-ubuntu20.04-aws` | diff --git a/docker/build_matrix.yaml b/docker/build_matrix.yaml index 26b488ccfb..781ec74545 100644 --- a/docker/build_matrix.yaml +++ b/docker/build_matrix.yaml @@ -247,7 +247,7 @@ - AWS_OFI_NCCL_VERSION: '' BASE_IMAGE: nvidia/cuda:12.1.0-cudnn8-devel-ubuntu20.04 CUDA_VERSION: 12.1.0 - IMAGE_NAME: torch-nightly-2-3-0-20240225-cu121-python3-11 + IMAGE_NAME: torch-nightly-2-3-0-20240224-cu121-python3-11 MOFED_VERSION: 5.5-1.0.3.2 NVIDIA_REQUIRE_CUDA_OVERRIDE: cuda>=12.1 brand=tesla,driver>=450,driver<451 brand=tesla,driver>=470,driver<471 brand=unknown,driver>=470,driver<471 brand=nvidia,driver>=470,driver<471 brand=nvidiartx,driver>=470,driver<471 @@ -265,10 +265,10 @@ brand=quadrortx,driver>=525,driver<526 brand=titan,driver>=525,driver<526 brand=titanrtx,driver>=525,driver<526 PYTHON_VERSION: '3.11' PYTORCH_NIGHTLY_URL: https://download.pytorch.org/whl/nightly/cu121 - PYTORCH_NIGHTLY_VERSION: dev20240225+cu121 + PYTORCH_NIGHTLY_VERSION: dev20240224+cu121 PYTORCH_VERSION: 2.3.0 TAGS: - - mosaicml/pytorch:2.3.0_cu121-nightly20240225-python3.11-ubuntu20.04 + - mosaicml/pytorch:2.3.0_cu121-nightly20240224-python3.11-ubuntu20.04 TARGET: pytorch_stage TORCHVISION_VERSION: 0.18.0 - AWS_OFI_NCCL_VERSION: '' diff --git a/docker/generate_build_matrix.py b/docker/generate_build_matrix.py index 8d4fce3171..15f44e2f08 100644 --- a/docker/generate_build_matrix.py +++ b/docker/generate_build_matrix.py @@ -262,14 +262,14 @@ def _main(): 'AWS_OFI_NCCL_VERSION': '', 'BASE_IMAGE': 'nvidia/cuda:12.1.0-cudnn8-devel-ubuntu20.04', 'CUDA_VERSION': '12.1.0', - 'IMAGE_NAME': 'torch-nightly-2-3-0-20240225-cu121-python3-11', + 'IMAGE_NAME': 'torch-nightly-2-3-0-20240224-cu121-python3-11', 'MOFED_VERSION': '5.5-1.0.3.2', 'NVIDIA_REQUIRE_CUDA_OVERRIDE': _get_cuda_override('12.1.0'), 'PYTHON_VERSION': '3.11', 'PYTORCH_VERSION': '2.3.0', 'PYTORCH_NIGHTLY_URL': 'https://download.pytorch.org/whl/nightly/cu121', - 'PYTORCH_NIGHTLY_VERSION': 'dev20240225+cu121', - 'TAGS': ['mosaicml/pytorch:2.3.0_cu121-nightly20240225-python3.11-ubuntu20.04'], + 'PYTORCH_NIGHTLY_VERSION': 'dev20240224+cu121', + 'TAGS': ['mosaicml/pytorch:2.3.0_cu121-nightly20240224-python3.11-ubuntu20.04'], 'TARGET': 'pytorch_stage', 'TORCHVISION_VERSION': '0.18.0' } From 58471cec63682da4c8745c1caa56cf084ad0e746 Mon Sep 17 00:00:00 2001 From: Mihir Patel Date: Tue, 27 Feb 2024 18:16:55 -0500 Subject: [PATCH 4/4] fix --- docker/README.md | 2 +- docker/build_matrix.yaml | 6 +++--- docker/generate_build_matrix.py | 6 +++--- 3 files changed, 7 insertions(+), 7 deletions(-) diff --git a/docker/README.md b/docker/README.md index e1326c735c..f3f9c257f0 100644 --- a/docker/README.md +++ b/docker/README.md @@ -31,7 +31,7 @@ To install composer, once inside the image, run `pip install mosaicml`. | Linux Distro | Flavor | PyTorch Version | CUDA Version | Python Version | Docker Tags | |----------------|----------|-------------------|---------------------|------------------|------------------------------------------------------------------------------------------| | Ubuntu 20.04 | Base | 2.3.0 | 12.1.0 (Infiniband) | 3.11 | `mosaicml/pytorch:2.3.0_cu121-nightly20240110-python3.11-ubuntu20.04` | -| Ubuntu 20.04 | Base | 2.3.0 | 12.1.0 (Infiniband) | 3.11 | `mosaicml/pytorch:2.3.0_cu121-nightly20240224-python3.11-ubuntu20.04` | +| Ubuntu 20.04 | Base | 2.3.0 | 12.1.0 (Infiniband) | 3.11 | `mosaicml/pytorch:2.3.0_cu121-nightly20240226-python3.11-ubuntu20.04` | | Ubuntu 20.04 | Base | 2.3.0 | 12.1.0 (EFA) | 3.11 | `mosaicml/pytorch:2.3.0_cu121-nightly20240110-python3.11-ubuntu20.04-aws` | | Ubuntu 20.04 | Base | 2.2.1 | 12.1.0 (Infiniband) | 3.11 | `mosaicml/pytorch:2.2.1_cu121-python3.11-ubuntu20.04` | | Ubuntu 20.04 | Base | 2.2.1 | 12.1.0 (EFA) | 3.11 | `mosaicml/pytorch:2.2.1_cu121-python3.11-ubuntu20.04-aws` | diff --git a/docker/build_matrix.yaml b/docker/build_matrix.yaml index 781ec74545..bc43fbba18 100644 --- a/docker/build_matrix.yaml +++ b/docker/build_matrix.yaml @@ -247,7 +247,7 @@ - AWS_OFI_NCCL_VERSION: '' BASE_IMAGE: nvidia/cuda:12.1.0-cudnn8-devel-ubuntu20.04 CUDA_VERSION: 12.1.0 - IMAGE_NAME: torch-nightly-2-3-0-20240224-cu121-python3-11 + IMAGE_NAME: torch-nightly-2-3-0-20240226-cu121-python3-11 MOFED_VERSION: 5.5-1.0.3.2 NVIDIA_REQUIRE_CUDA_OVERRIDE: cuda>=12.1 brand=tesla,driver>=450,driver<451 brand=tesla,driver>=470,driver<471 brand=unknown,driver>=470,driver<471 brand=nvidia,driver>=470,driver<471 brand=nvidiartx,driver>=470,driver<471 @@ -265,10 +265,10 @@ brand=quadrortx,driver>=525,driver<526 brand=titan,driver>=525,driver<526 brand=titanrtx,driver>=525,driver<526 PYTHON_VERSION: '3.11' PYTORCH_NIGHTLY_URL: https://download.pytorch.org/whl/nightly/cu121 - PYTORCH_NIGHTLY_VERSION: dev20240224+cu121 + PYTORCH_NIGHTLY_VERSION: dev20240226+cu121 PYTORCH_VERSION: 2.3.0 TAGS: - - mosaicml/pytorch:2.3.0_cu121-nightly20240224-python3.11-ubuntu20.04 + - mosaicml/pytorch:2.3.0_cu121-nightly20240226-python3.11-ubuntu20.04 TARGET: pytorch_stage TORCHVISION_VERSION: 0.18.0 - AWS_OFI_NCCL_VERSION: '' diff --git a/docker/generate_build_matrix.py b/docker/generate_build_matrix.py index 15f44e2f08..fc72c8a435 100644 --- a/docker/generate_build_matrix.py +++ b/docker/generate_build_matrix.py @@ -262,14 +262,14 @@ def _main(): 'AWS_OFI_NCCL_VERSION': '', 'BASE_IMAGE': 'nvidia/cuda:12.1.0-cudnn8-devel-ubuntu20.04', 'CUDA_VERSION': '12.1.0', - 'IMAGE_NAME': 'torch-nightly-2-3-0-20240224-cu121-python3-11', + 'IMAGE_NAME': 'torch-nightly-2-3-0-20240226-cu121-python3-11', 'MOFED_VERSION': '5.5-1.0.3.2', 'NVIDIA_REQUIRE_CUDA_OVERRIDE': _get_cuda_override('12.1.0'), 'PYTHON_VERSION': '3.11', 'PYTORCH_VERSION': '2.3.0', 'PYTORCH_NIGHTLY_URL': 'https://download.pytorch.org/whl/nightly/cu121', - 'PYTORCH_NIGHTLY_VERSION': 'dev20240224+cu121', - 'TAGS': ['mosaicml/pytorch:2.3.0_cu121-nightly20240224-python3.11-ubuntu20.04'], + 'PYTORCH_NIGHTLY_VERSION': 'dev20240226+cu121', + 'TAGS': ['mosaicml/pytorch:2.3.0_cu121-nightly20240226-python3.11-ubuntu20.04'], 'TARGET': 'pytorch_stage', 'TORCHVISION_VERSION': '0.18.0' }