Skip to content

Commit

Permalink
fix
Browse files Browse the repository at this point in the history
  • Loading branch information
mvpatel2000 committed Nov 5, 2024
1 parent 03c992b commit 2e417f4
Show file tree
Hide file tree
Showing 5 changed files with 30 additions and 30 deletions.
2 changes: 1 addition & 1 deletion composer/trainer/_patch_pytorch.py
Original file line number Diff line number Diff line change
Expand Up @@ -1055,7 +1055,7 @@ def unshard_with_sync(self):

if version.parse(torch.__version__) >= version.parse('2.5.0') and version.parse(
torch.__version__,
) < version.parse('2.5.1'):
) < version.parse('2.5.2'):

# Save original FlatParamHandle.unshard to revert back to when dropping automicrobatching hooks
from torch.distributed.fsdp._flat_param import FlatParamHandle
Expand Down
6 changes: 3 additions & 3 deletions docker/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -30,9 +30,9 @@ To install composer, once inside the image, run `pip install mosaicml`.
<!-- BEGIN_PYTORCH_BUILD_MATRIX -->
| Linux Distro | Flavor | PyTorch Version | CUDA Version | Python Version | Docker Tags |
|----------------|----------|-------------------|---------------------|------------------|----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
| Ubuntu 20.04 | Base | 2.5.0 | 12.4.1 (Infiniband) | 3.11 | `mosaicml/pytorch:latest`, `mosaicml/pytorch:2.5.0_cu124-python3.11-ubuntu20.04` |
| Ubuntu 20.04 | Base | 2.5.0 | 12.4.1 (EFA) | 3.11 | `mosaicml/pytorch:latest-aws`, `mosaicml/pytorch:2.5.0_cu124-python3.11-ubuntu20.04-aws` |
| Ubuntu 20.04 | Base | 2.5.0 | cpu | 3.11 | `mosaicml/pytorch:latest_cpu`, `mosaicml/pytorch:2.5.0_cpu-python3.11-ubuntu20.04` |
| Ubuntu 20.04 | Base | 2.5.1 | 12.4.1 (Infiniband) | 3.11 | `mosaicml/pytorch:latest`, `mosaicml/pytorch:2.5.1_cu124-python3.11-ubuntu20.04` |
| Ubuntu 20.04 | Base | 2.5.1 | 12.4.1 (EFA) | 3.11 | `mosaicml/pytorch:latest-aws`, `mosaicml/pytorch:2.5.1_cu124-python3.11-ubuntu20.04-aws` |
| Ubuntu 20.04 | Base | 2.5.1 | cpu | 3.11 | `mosaicml/pytorch:latest_cpu`, `mosaicml/pytorch:2.5.1_cpu-python3.11-ubuntu20.04` |
| Ubuntu 20.04 | Base | 2.4.1 | 12.4.1 (Infiniband) | 3.11 | `mosaicml/pytorch:2.4.1_cu124-python3.11-ubuntu20.04` |
| Ubuntu 20.04 | Base | 2.4.1 | 12.4.1 (EFA) | 3.11 | `mosaicml/pytorch:2.4.1_cu124-python3.11-ubuntu20.04-aws` |
| Ubuntu 20.04 | Base | 2.4.1 | cpu | 3.11 | `mosaicml/pytorch:2.4.1_cpu-python3.11-ubuntu20.04` |
Expand Down
38 changes: 19 additions & 19 deletions docker/build_matrix.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -2,54 +2,54 @@
- AWS_OFI_NCCL_VERSION: ''
BASE_IMAGE: nvidia/cuda:12.4.1-cudnn-devel-ubuntu20.04
CUDA_VERSION: 12.4.1
IMAGE_NAME: torch-2-5-0-cu124
IMAGE_NAME: torch-2-5-1-cu124
MOFED_VERSION: latest-23.10
NVIDIA_REQUIRE_CUDA_OVERRIDE: ''
PYTHON_VERSION: '3.11'
PYTORCH_NIGHTLY_URL: ''
PYTORCH_NIGHTLY_VERSION: ''
PYTORCH_VERSION: 2.5.0
PYTORCH_VERSION: 2.5.1
TAGS:
- mosaicml/pytorch:2.5.0_cu124-python3.11-ubuntu20.04
- ghcr.io/databricks-mosaic/pytorch:2.5.0_cu124-python3.11-ubuntu20.04
- mosaicml/pytorch:2.5.1_cu124-python3.11-ubuntu20.04
- ghcr.io/databricks-mosaic/pytorch:2.5.1_cu124-python3.11-ubuntu20.04
- mosaicml/pytorch:latest
- ghcr.io/databricks-mosaic/pytorch:latest
TARGET: pytorch_stage
TORCHVISION_VERSION: 0.20.0
TORCHVISION_VERSION: 0.20.1
- AWS_OFI_NCCL_VERSION: v1.11.0-aws
BASE_IMAGE: nvidia/cuda:12.4.1-cudnn-devel-ubuntu20.04
CUDA_VERSION: 12.4.1
IMAGE_NAME: torch-2-5-0-cu124-aws
IMAGE_NAME: torch-2-5-1-cu124-aws
MOFED_VERSION: ''
NVIDIA_REQUIRE_CUDA_OVERRIDE: ''
PYTHON_VERSION: '3.11'
PYTORCH_NIGHTLY_URL: ''
PYTORCH_NIGHTLY_VERSION: ''
PYTORCH_VERSION: 2.5.0
PYTORCH_VERSION: 2.5.1
TAGS:
- mosaicml/pytorch:2.5.0_cu124-python3.11-ubuntu20.04-aws
- ghcr.io/databricks-mosaic/pytorch:2.5.0_cu124-python3.11-ubuntu20.04-aws
- mosaicml/pytorch:2.5.1_cu124-python3.11-ubuntu20.04-aws
- ghcr.io/databricks-mosaic/pytorch:2.5.1_cu124-python3.11-ubuntu20.04-aws
- mosaicml/pytorch:latest-aws
- ghcr.io/databricks-mosaic/pytorch:latest-aws
TARGET: pytorch_stage
TORCHVISION_VERSION: 0.20.0
TORCHVISION_VERSION: 0.20.1
- AWS_OFI_NCCL_VERSION: ''
BASE_IMAGE: ubuntu:20.04
CUDA_VERSION: ''
IMAGE_NAME: torch-2-5-0-cpu
IMAGE_NAME: torch-2-5-1-cpu
MOFED_VERSION: ''
NVIDIA_REQUIRE_CUDA_OVERRIDE: ''
PYTHON_VERSION: '3.11'
PYTORCH_NIGHTLY_URL: ''
PYTORCH_NIGHTLY_VERSION: ''
PYTORCH_VERSION: 2.5.0
PYTORCH_VERSION: 2.5.1
TAGS:
- mosaicml/pytorch:2.5.0_cpu-python3.11-ubuntu20.04
- ghcr.io/databricks-mosaic/pytorch:2.5.0_cpu-python3.11-ubuntu20.04
- mosaicml/pytorch:2.5.1_cpu-python3.11-ubuntu20.04
- ghcr.io/databricks-mosaic/pytorch:2.5.1_cpu-python3.11-ubuntu20.04
- mosaicml/pytorch:latest_cpu
- ghcr.io/databricks-mosaic/pytorch:latest_cpu
TARGET: pytorch_stage
TORCHVISION_VERSION: 0.20.0
TORCHVISION_VERSION: 0.20.1
- AWS_OFI_NCCL_VERSION: ''
BASE_IMAGE: nvidia/cuda:12.4.1-cudnn-devel-ubuntu20.04
CUDA_VERSION: 12.4.1
Expand Down Expand Up @@ -176,14 +176,14 @@
PYTHON_VERSION: '3.11'
PYTORCH_NIGHTLY_URL: ''
PYTORCH_NIGHTLY_VERSION: ''
PYTORCH_VERSION: 2.5.0
PYTORCH_VERSION: 2.5.1
TAGS:
- mosaicml/composer:0.26.0
- ghcr.io/databricks-mosaic/composer:0.26.0
- mosaicml/composer:latest
- ghcr.io/databricks-mosaic/composer:latest
TARGET: composer_stage
TORCHVISION_VERSION: 0.20.0
TORCHVISION_VERSION: 0.20.1
- AWS_OFI_NCCL_VERSION: ''
BASE_IMAGE: ubuntu:20.04
COMPOSER_INSTALL_COMMAND: mosaicml[all]==0.26.0
Expand All @@ -194,11 +194,11 @@
PYTHON_VERSION: '3.11'
PYTORCH_NIGHTLY_URL: ''
PYTORCH_NIGHTLY_VERSION: ''
PYTORCH_VERSION: 2.5.0
PYTORCH_VERSION: 2.5.1
TAGS:
- mosaicml/composer:0.26.0_cpu
- ghcr.io/databricks-mosaic/composer:0.26.0_cpu
- mosaicml/composer:latest_cpu
- ghcr.io/databricks-mosaic/composer:latest_cpu
TARGET: composer_stage
TORCHVISION_VERSION: 0.20.0
TORCHVISION_VERSION: 0.20.1
10 changes: 5 additions & 5 deletions docker/generate_build_matrix.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,12 +20,12 @@
import yaml

PRODUCTION_PYTHON_VERSION = '3.11'
PRODUCTION_PYTORCH_VERSION = '2.5.0'
PRODUCTION_PYTORCH_VERSION = '2.5.1'


def _get_torchvision_version(pytorch_version: str):
if pytorch_version == '2.5.0':
return '0.20.0'
if pytorch_version == '2.5.1':
return '0.20.1'
if pytorch_version == '2.4.1':
return '0.19.1'
if pytorch_version == '2.3.1':
Expand All @@ -45,7 +45,7 @@ def _get_cuda_version(pytorch_version: str, use_cuda: bool):
# From https://docs.nvidia.com/deeplearning/frameworks/pytorch-release-notes/
if not use_cuda:
return ''
if pytorch_version == '2.5.0':
if pytorch_version == '2.5.1':
return '12.4.1'
if pytorch_version == '2.4.1':
return '12.4.1'
Expand Down Expand Up @@ -180,7 +180,7 @@ def _write_table(table_tag: str, table_contents: str):


def _main():
python_pytorch_versions = [('3.11', '2.5.0'), ('3.11', '2.4.1'), ('3.11', '2.3.1')]
python_pytorch_versions = [('3.11', '2.5.1'), ('3.11', '2.4.1'), ('3.11', '2.3.1')]
cuda_options = [True, False]
stages = ['pytorch_stage']
interconnects = ['mellanox', 'EFA'] # mellanox is default, EFA needed for AWS
Expand Down
4 changes: 2 additions & 2 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -80,8 +80,8 @@ def package_files(prefix: str, directory: str, extension: str):
'tqdm>=4.62.3,<5',
'torchmetrics>=1.0,<1.4.1',
'torch_optimizer>=0.3.0,<0.4',
'torchvision>=0.18.0,<0.20.1',
'torch>=2.3.0,<2.5.1',
'torchvision>=0.18.0,<0.20.2',
'torch>=2.3.0,<2.5.2',
'requests>=2.26.0,<3',
'numpy>=1.21.5,<2.2.0',
'psutil>=5.8.0,<7',
Expand Down

0 comments on commit 2e417f4

Please sign in to comment.