diff --git a/docker/README.md b/docker/README.md index b05733bb1b..3a44d57396 100644 --- a/docker/README.md +++ b/docker/README.md @@ -32,14 +32,17 @@ To install composer, once inside the image, run `pip install mosaicml`. | Linux Distro | Flavor | PyTorch Version | CUDA Version | Python Version | Docker Tags | |----------------|----------|-------------------|---------------------|------------------|------------------------------------------------------------------------------------------| | Ubuntu 20.04 | Base | 2.2.0 | 12.1.0 (Infiniband) | 3.10 | `mosaicml/pytorch:2.2.0_cu121-nightly20231213-python3.10-ubuntu20.04` | -| Ubuntu 20.04 | Base | 2.1.2 | 12.1.0 (Infiniband) | 3.10 | `mosaicml/pytorch:latest`, `mosaicml/pytorch:2.1.2_cu121-python3.10-ubuntu20.04` | -| Ubuntu 20.04 | Base | 2.1.2 | 12.1.0 (EFA) | 3.10 | `mosaicml/pytorch:latest-aws`, `mosaicml/pytorch:2.1.2_cu121-python3.10-ubuntu20.04-aws` | -| Ubuntu 20.04 | Base | 2.1.2 | cpu | 3.10 | `mosaicml/pytorch:latest_cpu`, `mosaicml/pytorch:2.1.2_cpu-python3.10-ubuntu20.04` | +| Ubuntu 20.04 | Base | 2.1.2 | 12.1.0 (Infiniband) | 3.11 | `mosaicml/pytorch:latest`, `mosaicml/pytorch:2.1.2_cu121-python3.11-ubuntu20.04` | +| Ubuntu 20.04 | Base | 2.1.2 | 12.1.0 (EFA) | 3.11 | `mosaicml/pytorch:latest-aws`, `mosaicml/pytorch:2.1.2_cu121-python3.11-ubuntu20.04-aws` | +| Ubuntu 20.04 | Base | 2.1.2 | cpu | 3.11 | `mosaicml/pytorch:latest_cpu`, `mosaicml/pytorch:2.1.2_cpu-python3.11-ubuntu20.04` | +| Ubuntu 20.04 | Base | 2.1.2 | 12.1.0 (Infiniband) | 3.10 | `mosaicml/pytorch:2.1.2_cu121-python3.10-ubuntu20.04` | +| Ubuntu 20.04 | Base | 2.1.2 | cpu | 3.10 | `mosaicml/pytorch:2.1.2_cpu-python3.10-ubuntu20.04` | +| Ubuntu 20.04 | Base | 2.0.1 | 11.8.0 (Infiniband) | 3.11 | `mosaicml/pytorch:2.0.1_cu118-python3.11-ubuntu20.04` | +| Ubuntu 20.04 | Base | 2.0.1 | 11.8.0 (EFA) | 3.11 | `mosaicml/pytorch:2.0.1_cu118-python3.11-ubuntu20.04-aws` | +| Ubuntu 20.04 | Base | 2.0.1 | cpu | 3.11 | `mosaicml/pytorch:2.0.1_cpu-python3.11-ubuntu20.04` | | Ubuntu 20.04 | Base | 2.0.1 | 11.8.0 (Infiniband) | 3.10 | `mosaicml/pytorch:2.0.1_cu118-python3.10-ubuntu20.04` | -| Ubuntu 20.04 | Base | 2.0.1 | 11.8.0 (EFA) | 3.10 | `mosaicml/pytorch:2.0.1_cu118-python3.10-ubuntu20.04-aws` | | Ubuntu 20.04 | Base | 2.0.1 | cpu | 3.10 | `mosaicml/pytorch:2.0.1_cpu-python3.10-ubuntu20.04` | | Ubuntu 20.04 | Base | 1.13.1 | 11.7.1 (Infiniband) | 3.10 | `mosaicml/pytorch:1.13.1_cu117-python3.10-ubuntu20.04` | -| Ubuntu 20.04 | Base | 1.13.1 | 11.7.1 (EFA) | 3.10 | `mosaicml/pytorch:1.13.1_cu117-python3.10-ubuntu20.04-aws` | | Ubuntu 20.04 | Base | 1.13.1 | cpu | 3.10 | `mosaicml/pytorch:1.13.1_cpu-python3.10-ubuntu20.04` | diff --git a/docker/build_matrix.yaml b/docker/build_matrix.yaml index cd2efc0e19..4726911cb7 100644 --- a/docker/build_matrix.yaml +++ b/docker/build_matrix.yaml @@ -1,4 +1,37 @@ # This file is automatically generated by generate_build_matrix.py. DO NOT EDIT! +- AWS_OFI_NCCL_VERSION: '' + BASE_IMAGE: nvidia/cuda:11.7.1-cudnn8-devel-ubuntu20.04 + CUDA_VERSION: 11.7.1 + IMAGE_NAME: torch-1-13-1-cu117 + MOFED_VERSION: 5.5-1.0.3.2 + NVIDIA_REQUIRE_CUDA_OVERRIDE: '' + PYTHON_VERSION: '3.10' + PYTORCH_NIGHTLY_URL: '' + PYTORCH_NIGHTLY_VERSION: '' + PYTORCH_VERSION: 1.13.1 + TAGS: + - mosaicml/pytorch:1.13.1_cu117-python3.10-ubuntu20.04 + TARGET: pytorch_stage + TORCHVISION_VERSION: 0.14.1 +- AWS_OFI_NCCL_VERSION: '' + BASE_IMAGE: nvidia/cuda:11.8.0-cudnn8-devel-ubuntu20.04 + CUDA_VERSION: 11.8.0 + IMAGE_NAME: torch-2-0-1-cu118 + MOFED_VERSION: 5.5-1.0.3.2 + NVIDIA_REQUIRE_CUDA_OVERRIDE: cuda>=11.8 brand=tesla,driver>=470,driver<471 brand=tesla,driver>=515,driver<516 + brand=unknown,driver>=470,driver<471 brand=unknown,driver>=515,driver<516 brand=nvidia,driver>=470,driver<471 + brand=nvidia,driver>=515,driver<516 brand=nvidiartx,driver>=470,driver<471 brand=nvidiartx,driver>=515,driver<516 + brand=geforce,driver>=470,driver<471 brand=geforce,driver>=515,driver<516 brand=quadro,driver>=470,driver<471 + brand=quadro,driver>=515,driver<516 brand=titan,driver>=470,driver<471 brand=titan,driver>=515,driver<516 + brand=titanrtx,driver>=470,driver<471 brand=titanrtx,driver>=515,driver<516 + PYTHON_VERSION: '3.10' + PYTORCH_NIGHTLY_URL: '' + PYTORCH_NIGHTLY_VERSION: '' + PYTORCH_VERSION: 2.0.1 + TAGS: + - mosaicml/pytorch:2.0.1_cu118-python3.10-ubuntu20.04 + TARGET: pytorch_stage + TORCHVISION_VERSION: 0.15.2 - AWS_OFI_NCCL_VERSION: '' BASE_IMAGE: nvidia/cuda:12.1.0-cudnn8-devel-ubuntu20.04 CUDA_VERSION: 12.1.0 @@ -24,37 +57,36 @@ PYTORCH_VERSION: 2.1.2 TAGS: - mosaicml/pytorch:2.1.2_cu121-python3.10-ubuntu20.04 - - mosaicml/pytorch:latest TARGET: pytorch_stage TORCHVISION_VERSION: 0.16.2 -- AWS_OFI_NCCL_VERSION: v1.7.4-aws - BASE_IMAGE: nvidia/cuda:12.1.0-cudnn8-devel-ubuntu20.04 - CUDA_VERSION: 12.1.0 - IMAGE_NAME: torch-2-1-2-cu121-aws +- AWS_OFI_NCCL_VERSION: '' + BASE_IMAGE: ubuntu:20.04 + CUDA_VERSION: '' + IMAGE_NAME: torch-1-13-1-cpu MOFED_VERSION: '' - NVIDIA_REQUIRE_CUDA_OVERRIDE: cuda>=12.1 brand=tesla,driver>=450,driver<451 brand=tesla,driver>=470,driver<471 - brand=unknown,driver>=470,driver<471 brand=nvidia,driver>=470,driver<471 brand=nvidiartx,driver>=470,driver<471 - brand=geforce,driver>=470,driver<471 brand=geforcertx,driver>=470,driver<471 brand=quadro,driver>=470,driver<471 - brand=quadrortx,driver>=470,driver<471 brand=titan,driver>=470,driver<471 brand=titanrtx,driver>=470,driver<471 - brand=tesla,driver>=510,driver<511 brand=unknown,driver>=510,driver<511 brand=nvidia,driver>=510,driver<511 - brand=nvidiartx,driver>=510,driver<511 brand=geforce,driver>=510,driver<511 brand=geforcertx,driver>=510,driver<511 - brand=quadro,driver>=510,driver<511 brand=quadrortx,driver>=510,driver<511 brand=titan,driver>=510,driver<511 - brand=titanrtx,driver>=510,driver<511 brand=tesla,driver>=515,driver<516 brand=unknown,driver>=515,driver<516 - brand=nvidia,driver>=515,driver<516 brand=nvidiartx,driver>=515,driver<516 brand=geforce,driver>=515,driver<516 - brand=geforcertx,driver>=515,driver<516 brand=quadro,driver>=515,driver<516 brand=quadrortx,driver>=515,driver<516 - brand=titan,driver>=515,driver<516 brand=titanrtx,driver>=515,driver<516 brand=tesla,driver>=525,driver<526 - brand=unknown,driver>=525,driver<526 brand=nvidia,driver>=525,driver<526 brand=nvidiartx,driver>=525,driver<526 - brand=geforce,driver>=525,driver<526 brand=geforcertx,driver>=525,driver<526 brand=quadro,driver>=525,driver<526 - brand=quadrortx,driver>=525,driver<526 brand=titan,driver>=525,driver<526 brand=titanrtx,driver>=525,driver<526 + NVIDIA_REQUIRE_CUDA_OVERRIDE: '' PYTHON_VERSION: '3.10' PYTORCH_NIGHTLY_URL: '' PYTORCH_NIGHTLY_VERSION: '' - PYTORCH_VERSION: 2.1.2 + PYTORCH_VERSION: 1.13.1 TAGS: - - mosaicml/pytorch:2.1.2_cu121-python3.10-ubuntu20.04-aws - - mosaicml/pytorch:latest-aws + - mosaicml/pytorch:1.13.1_cpu-python3.10-ubuntu20.04 TARGET: pytorch_stage - TORCHVISION_VERSION: 0.16.2 + TORCHVISION_VERSION: 0.14.1 +- AWS_OFI_NCCL_VERSION: '' + BASE_IMAGE: ubuntu:20.04 + CUDA_VERSION: '' + IMAGE_NAME: torch-2-0-1-cpu + MOFED_VERSION: '' + NVIDIA_REQUIRE_CUDA_OVERRIDE: '' + PYTHON_VERSION: '3.10' + PYTORCH_NIGHTLY_URL: '' + PYTORCH_NIGHTLY_VERSION: '' + PYTORCH_VERSION: 2.0.1 + TAGS: + - mosaicml/pytorch:2.0.1_cpu-python3.10-ubuntu20.04 + TARGET: pytorch_stage + TORCHVISION_VERSION: 0.15.2 - AWS_OFI_NCCL_VERSION: '' BASE_IMAGE: ubuntu:20.04 CUDA_VERSION: '' @@ -67,7 +99,6 @@ PYTORCH_VERSION: 2.1.2 TAGS: - mosaicml/pytorch:2.1.2_cpu-python3.10-ubuntu20.04 - - mosaicml/pytorch:latest_cpu TARGET: pytorch_stage TORCHVISION_VERSION: 0.16.2 - AWS_OFI_NCCL_VERSION: '' @@ -81,14 +112,42 @@ brand=geforce,driver>=470,driver<471 brand=geforce,driver>=515,driver<516 brand=quadro,driver>=470,driver<471 brand=quadro,driver>=515,driver<516 brand=titan,driver>=470,driver<471 brand=titan,driver>=515,driver<516 brand=titanrtx,driver>=470,driver<471 brand=titanrtx,driver>=515,driver<516 - PYTHON_VERSION: '3.10' + PYTHON_VERSION: '3.11' PYTORCH_NIGHTLY_URL: '' PYTORCH_NIGHTLY_VERSION: '' PYTORCH_VERSION: 2.0.1 TAGS: - - mosaicml/pytorch:2.0.1_cu118-python3.10-ubuntu20.04 + - mosaicml/pytorch:2.0.1_cu118-python3.11-ubuntu20.04 TARGET: pytorch_stage TORCHVISION_VERSION: 0.15.2 +- AWS_OFI_NCCL_VERSION: '' + BASE_IMAGE: nvidia/cuda:12.1.0-cudnn8-devel-ubuntu20.04 + CUDA_VERSION: 12.1.0 + IMAGE_NAME: torch-2-1-2-cu121 + MOFED_VERSION: 5.5-1.0.3.2 + NVIDIA_REQUIRE_CUDA_OVERRIDE: cuda>=12.1 brand=tesla,driver>=450,driver<451 brand=tesla,driver>=470,driver<471 + brand=unknown,driver>=470,driver<471 brand=nvidia,driver>=470,driver<471 brand=nvidiartx,driver>=470,driver<471 + brand=geforce,driver>=470,driver<471 brand=geforcertx,driver>=470,driver<471 brand=quadro,driver>=470,driver<471 + brand=quadrortx,driver>=470,driver<471 brand=titan,driver>=470,driver<471 brand=titanrtx,driver>=470,driver<471 + brand=tesla,driver>=510,driver<511 brand=unknown,driver>=510,driver<511 brand=nvidia,driver>=510,driver<511 + brand=nvidiartx,driver>=510,driver<511 brand=geforce,driver>=510,driver<511 brand=geforcertx,driver>=510,driver<511 + brand=quadro,driver>=510,driver<511 brand=quadrortx,driver>=510,driver<511 brand=titan,driver>=510,driver<511 + brand=titanrtx,driver>=510,driver<511 brand=tesla,driver>=515,driver<516 brand=unknown,driver>=515,driver<516 + brand=nvidia,driver>=515,driver<516 brand=nvidiartx,driver>=515,driver<516 brand=geforce,driver>=515,driver<516 + brand=geforcertx,driver>=515,driver<516 brand=quadro,driver>=515,driver<516 brand=quadrortx,driver>=515,driver<516 + brand=titan,driver>=515,driver<516 brand=titanrtx,driver>=515,driver<516 brand=tesla,driver>=525,driver<526 + brand=unknown,driver>=525,driver<526 brand=nvidia,driver>=525,driver<526 brand=nvidiartx,driver>=525,driver<526 + brand=geforce,driver>=525,driver<526 brand=geforcertx,driver>=525,driver<526 brand=quadro,driver>=525,driver<526 + brand=quadrortx,driver>=525,driver<526 brand=titan,driver>=525,driver<526 brand=titanrtx,driver>=525,driver<526 + PYTHON_VERSION: '3.11' + PYTORCH_NIGHTLY_URL: '' + PYTORCH_NIGHTLY_VERSION: '' + PYTORCH_VERSION: 2.1.2 + TAGS: + - mosaicml/pytorch:2.1.2_cu121-python3.11-ubuntu20.04 + - mosaicml/pytorch:latest + TARGET: pytorch_stage + TORCHVISION_VERSION: 0.16.2 - AWS_OFI_NCCL_VERSION: v1.7.4-aws BASE_IMAGE: nvidia/cuda:11.8.0-cudnn8-devel-ubuntu20.04 CUDA_VERSION: 11.8.0 @@ -100,70 +159,71 @@ brand=geforce,driver>=470,driver<471 brand=geforce,driver>=515,driver<516 brand=quadro,driver>=470,driver<471 brand=quadro,driver>=515,driver<516 brand=titan,driver>=470,driver<471 brand=titan,driver>=515,driver<516 brand=titanrtx,driver>=470,driver<471 brand=titanrtx,driver>=515,driver<516 - PYTHON_VERSION: '3.10' + PYTHON_VERSION: '3.11' PYTORCH_NIGHTLY_URL: '' PYTORCH_NIGHTLY_VERSION: '' PYTORCH_VERSION: 2.0.1 TAGS: - - mosaicml/pytorch:2.0.1_cu118-python3.10-ubuntu20.04-aws + - mosaicml/pytorch:2.0.1_cu118-python3.11-ubuntu20.04-aws TARGET: pytorch_stage TORCHVISION_VERSION: 0.15.2 -- AWS_OFI_NCCL_VERSION: '' - BASE_IMAGE: ubuntu:20.04 - CUDA_VERSION: '' - IMAGE_NAME: torch-2-0-1-cpu +- AWS_OFI_NCCL_VERSION: v1.7.4-aws + BASE_IMAGE: nvidia/cuda:12.1.0-cudnn8-devel-ubuntu20.04 + CUDA_VERSION: 12.1.0 + IMAGE_NAME: torch-2-1-2-cu121-aws MOFED_VERSION: '' - NVIDIA_REQUIRE_CUDA_OVERRIDE: '' - PYTHON_VERSION: '3.10' + NVIDIA_REQUIRE_CUDA_OVERRIDE: cuda>=12.1 brand=tesla,driver>=450,driver<451 brand=tesla,driver>=470,driver<471 + brand=unknown,driver>=470,driver<471 brand=nvidia,driver>=470,driver<471 brand=nvidiartx,driver>=470,driver<471 + brand=geforce,driver>=470,driver<471 brand=geforcertx,driver>=470,driver<471 brand=quadro,driver>=470,driver<471 + brand=quadrortx,driver>=470,driver<471 brand=titan,driver>=470,driver<471 brand=titanrtx,driver>=470,driver<471 + brand=tesla,driver>=510,driver<511 brand=unknown,driver>=510,driver<511 brand=nvidia,driver>=510,driver<511 + brand=nvidiartx,driver>=510,driver<511 brand=geforce,driver>=510,driver<511 brand=geforcertx,driver>=510,driver<511 + brand=quadro,driver>=510,driver<511 brand=quadrortx,driver>=510,driver<511 brand=titan,driver>=510,driver<511 + brand=titanrtx,driver>=510,driver<511 brand=tesla,driver>=515,driver<516 brand=unknown,driver>=515,driver<516 + brand=nvidia,driver>=515,driver<516 brand=nvidiartx,driver>=515,driver<516 brand=geforce,driver>=515,driver<516 + brand=geforcertx,driver>=515,driver<516 brand=quadro,driver>=515,driver<516 brand=quadrortx,driver>=515,driver<516 + brand=titan,driver>=515,driver<516 brand=titanrtx,driver>=515,driver<516 brand=tesla,driver>=525,driver<526 + brand=unknown,driver>=525,driver<526 brand=nvidia,driver>=525,driver<526 brand=nvidiartx,driver>=525,driver<526 + brand=geforce,driver>=525,driver<526 brand=geforcertx,driver>=525,driver<526 brand=quadro,driver>=525,driver<526 + brand=quadrortx,driver>=525,driver<526 brand=titan,driver>=525,driver<526 brand=titanrtx,driver>=525,driver<526 + PYTHON_VERSION: '3.11' PYTORCH_NIGHTLY_URL: '' PYTORCH_NIGHTLY_VERSION: '' - PYTORCH_VERSION: 2.0.1 + PYTORCH_VERSION: 2.1.2 TAGS: - - mosaicml/pytorch:2.0.1_cpu-python3.10-ubuntu20.04 + - mosaicml/pytorch:2.1.2_cu121-python3.11-ubuntu20.04-aws + - mosaicml/pytorch:latest-aws TARGET: pytorch_stage - TORCHVISION_VERSION: 0.15.2 + TORCHVISION_VERSION: 0.16.2 - AWS_OFI_NCCL_VERSION: '' - BASE_IMAGE: nvidia/cuda:11.7.1-cudnn8-devel-ubuntu20.04 - CUDA_VERSION: 11.7.1 - IMAGE_NAME: torch-1-13-1-cu117 - MOFED_VERSION: 5.5-1.0.3.2 - NVIDIA_REQUIRE_CUDA_OVERRIDE: '' - PYTHON_VERSION: '3.10' - PYTORCH_NIGHTLY_URL: '' - PYTORCH_NIGHTLY_VERSION: '' - PYTORCH_VERSION: 1.13.1 - TAGS: - - mosaicml/pytorch:1.13.1_cu117-python3.10-ubuntu20.04 - TARGET: pytorch_stage - TORCHVISION_VERSION: 0.14.1 -- AWS_OFI_NCCL_VERSION: v1.7.4-aws - BASE_IMAGE: nvidia/cuda:11.7.1-cudnn8-devel-ubuntu20.04 - CUDA_VERSION: 11.7.1 - IMAGE_NAME: torch-1-13-1-cu117-aws + BASE_IMAGE: ubuntu:20.04 + CUDA_VERSION: '' + IMAGE_NAME: torch-2-0-1-cpu MOFED_VERSION: '' NVIDIA_REQUIRE_CUDA_OVERRIDE: '' - PYTHON_VERSION: '3.10' + PYTHON_VERSION: '3.11' PYTORCH_NIGHTLY_URL: '' PYTORCH_NIGHTLY_VERSION: '' - PYTORCH_VERSION: 1.13.1 + PYTORCH_VERSION: 2.0.1 TAGS: - - mosaicml/pytorch:1.13.1_cu117-python3.10-ubuntu20.04-aws + - mosaicml/pytorch:2.0.1_cpu-python3.11-ubuntu20.04 TARGET: pytorch_stage - TORCHVISION_VERSION: 0.14.1 + TORCHVISION_VERSION: 0.15.2 - AWS_OFI_NCCL_VERSION: '' BASE_IMAGE: ubuntu:20.04 CUDA_VERSION: '' - IMAGE_NAME: torch-1-13-1-cpu + IMAGE_NAME: torch-2-1-2-cpu MOFED_VERSION: '' NVIDIA_REQUIRE_CUDA_OVERRIDE: '' - PYTHON_VERSION: '3.10' + PYTHON_VERSION: '3.11' PYTORCH_NIGHTLY_URL: '' PYTORCH_NIGHTLY_VERSION: '' - PYTORCH_VERSION: 1.13.1 + PYTORCH_VERSION: 2.1.2 TAGS: - - mosaicml/pytorch:1.13.1_cpu-python3.10-ubuntu20.04 + - mosaicml/pytorch:2.1.2_cpu-python3.11-ubuntu20.04 + - mosaicml/pytorch:latest_cpu TARGET: pytorch_stage - TORCHVISION_VERSION: 0.14.1 + TORCHVISION_VERSION: 0.16.2 - AWS_OFI_NCCL_VERSION: '' BASE_IMAGE: nvidia/cuda:12.1.0-cudnn8-devel-ubuntu20.04 CUDA_VERSION: 12.1.0 @@ -211,7 +271,7 @@ brand=unknown,driver>=525,driver<526 brand=nvidia,driver>=525,driver<526 brand=nvidiartx,driver>=525,driver<526 brand=geforce,driver>=525,driver<526 brand=geforcertx,driver>=525,driver<526 brand=quadro,driver>=525,driver<526 brand=quadrortx,driver>=525,driver<526 brand=titan,driver>=525,driver<526 brand=titanrtx,driver>=525,driver<526 - PYTHON_VERSION: '3.10' + PYTHON_VERSION: '3.11' PYTORCH_NIGHTLY_URL: '' PYTORCH_NIGHTLY_VERSION: '' PYTORCH_VERSION: 2.1.2 @@ -227,7 +287,7 @@ IMAGE_NAME: composer-0-17-2-cpu MOFED_VERSION: 5.5-1.0.3.2 NVIDIA_REQUIRE_CUDA_OVERRIDE: '' - PYTHON_VERSION: '3.10' + PYTHON_VERSION: '3.11' PYTORCH_NIGHTLY_URL: '' PYTORCH_NIGHTLY_VERSION: '' PYTORCH_VERSION: 2.1.2 diff --git a/docker/generate_build_matrix.py b/docker/generate_build_matrix.py index 3ae69f6d77..f1fa58c598 100644 --- a/docker/generate_build_matrix.py +++ b/docker/generate_build_matrix.py @@ -18,7 +18,7 @@ import tabulate import yaml -LATEST_PYTHON_VERSION = '3.10' +LATEST_PYTHON_VERSION = '3.11' PRODUCTION_PYTORCH_VERSION = '2.1.2' @@ -31,6 +31,13 @@ def _get_torchvision_version(pytorch_version: str): return '0.14.1' raise ValueError(f'Invalid pytorch_version: {pytorch_version}') +def _get_pytorch_version(python_version: str): + if python_version == '3.10': + return ['1.13.1', '2.0.1', '2.1.2'] + if python_version == '3.11': + return ['2.0.1', '2.1.2'] + raise ValueError(f'Invalid python_version: {python_version}') + def _get_base_image(cuda_version: str): if not cuda_version: @@ -165,68 +172,66 @@ def _write_table(table_tag: str, table_contents: str): def _main(): - python_versions = ['3.10'] - pytorch_versions = ['2.1.2', '2.0.1', '1.13.1'] + python_versions = ['3.10', '3.11'] cuda_options = [True, False] stages = ['pytorch_stage'] interconnects = ['mellanox', 'EFA'] # mellanox is default, EFA needed for AWS pytorch_entries = [] - for product in itertools.product(python_versions, pytorch_versions, cuda_options, stages, interconnects): - python_version, pytorch_version, use_cuda, stage, interconnect = product - - cuda_version = _get_cuda_version(pytorch_version=pytorch_version, use_cuda=use_cuda) - - entry = { - 'IMAGE_NAME': - _get_image_name(pytorch_version, cuda_version, stage, interconnect), - 'BASE_IMAGE': - _get_base_image(cuda_version), - 'CUDA_VERSION': - cuda_version, - 'PYTHON_VERSION': - python_version, - 'PYTORCH_VERSION': - pytorch_version, - 'TARGET': - stage, - 'TORCHVISION_VERSION': - _get_torchvision_version(pytorch_version), - 'TAGS': - _get_pytorch_tags( - python_version=python_version, - pytorch_version=pytorch_version, - cuda_version=cuda_version, - stage=stage, - interconnect=interconnect, - ), - 'PYTORCH_NIGHTLY_URL': - '', - 'PYTORCH_NIGHTLY_VERSION': - '', - 'NVIDIA_REQUIRE_CUDA_OVERRIDE': - _get_cuda_override(cuda_version), - } - - # Only build EFA image on latest python with cuda on pytorch_stage - if interconnect == 'EFA' and not (python_version == LATEST_PYTHON_VERSION and use_cuda and - stage == 'pytorch_stage'): - continue - - # Skip the mellanox drivers if not in the cuda images or using EFA - if not cuda_version or interconnect == 'EFA': - entry['MOFED_VERSION'] = '' - else: - entry['MOFED_VERSION'] = '5.5-1.0.3.2' + for product in itertools.product(python_versions, cuda_options, stages, interconnects): + python_version, use_cuda, stage, interconnect = product + for pytorch_version in _get_pytorch_version(python_version): + cuda_version = _get_cuda_version(pytorch_version=pytorch_version, use_cuda=use_cuda) + entry = { + 'IMAGE_NAME': + _get_image_name(pytorch_version, cuda_version, stage, interconnect), + 'BASE_IMAGE': + _get_base_image(cuda_version), + 'CUDA_VERSION': + cuda_version, + 'PYTHON_VERSION': + python_version, + 'PYTORCH_VERSION': + pytorch_version, + 'TARGET': + stage, + 'TORCHVISION_VERSION': + _get_torchvision_version(pytorch_version), + 'TAGS': + _get_pytorch_tags( + python_version=python_version, + pytorch_version=pytorch_version, + cuda_version=cuda_version, + stage=stage, + interconnect=interconnect, + ), + 'PYTORCH_NIGHTLY_URL': + '', + 'PYTORCH_NIGHTLY_VERSION': + '', + 'NVIDIA_REQUIRE_CUDA_OVERRIDE': + _get_cuda_override(cuda_version), + } + + # Only build EFA image on latest python with cuda on pytorch_stage + if interconnect == 'EFA' and not (python_version == LATEST_PYTHON_VERSION and use_cuda and + stage == 'pytorch_stage'): + continue + + # Skip the mellanox drivers if not in the cuda images or using EFA + if not cuda_version or interconnect == 'EFA': + entry['MOFED_VERSION'] = '' + else: + entry['MOFED_VERSION'] = '5.5-1.0.3.2' - # Skip EFA drivers if not using EFA - if interconnect != 'EFA': - entry['AWS_OFI_NCCL_VERSION'] = '' - else: - entry['AWS_OFI_NCCL_VERSION'] = 'v1.7.4-aws' + # Skip EFA drivers if not using EFA + if interconnect != 'EFA': + entry['AWS_OFI_NCCL_VERSION'] = '' + else: + entry['AWS_OFI_NCCL_VERSION'] = 'v1.7.4-aws' - pytorch_entries.append(entry) + pytorch_entries.append(entry) nightly_entry = { 'AWS_OFI_NCCL_VERSION': '', 'BASE_IMAGE': 'nvidia/cuda:12.1.0-cudnn8-devel-ubuntu20.04',