version bump to 0.24.0

mosaicml · Aug 6, 2024 · e38631b · e38631b
1 parent 2fdfd12
commit e38631b
Show file tree

Hide file tree

Showing 7 changed files with 10 additions and 191 deletions.
diff --git a/composer/_version.py b/composer/_version.py
@@ -3,4 +3,4 @@
 
 """The Composer Version."""
 
-__version__ = '0.24.0.dev0'
+__version__ = '0.24.0'
diff --git a/composer/callbacks/eval_output_logging_callback.py b/composer/callbacks/eval_output_logging_callback.py
diff --git a/composer/core/state.py b/composer/core/state.py
@@ -47,7 +47,6 @@
     ParallelismConfig,
     ParallelismType,
     TPConfig,
-    VersionedDeprecationWarning,
     batch_get,
     batch_set,
     dist,
@@ -896,21 +895,6 @@ def fsdp_state_dict_type(self):
     def fsdp_sharded_state_dict_enabled(self):
         return self.fsdp_config is not None and self.fsdp_enabled and self.fsdp_state_dict_type == 'sharded'
 
-    @property
-    def fsdp_device_mesh(self):
-        warnings.warn(VersionedDeprecationWarning('fsdp_device_mesh is deprecated. Use device_mesh instead.', '0.24'))
-        return self.device_mesh
-
-    @property
-    def load_fsdp_monolith_rank0_only(self):
-        warnings.warn(
-            VersionedDeprecationWarning(
-                'load_fsdp_monolith_rank0_only is deprecated. Use load_monolith_rank0_only instead.',
-                '0.24',
-            ),
-        )
-        return self.load_monolith_rank0_only
-
     @property
     def load_monolith_rank0_only(self):
         return (

diff --git a/composer/utils/parallelism.py b/composer/utils/parallelism.py
@@ -3,15 +3,11 @@
 
 """Parallelism configs."""
 
-import warnings
 from dataclasses import dataclass
 from typing import Any, Optional
 
 from torch.distributed._tensor.device_mesh import DeviceMesh
 
-from composer.utils.warnings import VersionedDeprecationWarning
-
-
 @dataclass
 class FSDPConfig:
     """Configuration for Fully Sharded Data Parallelism (FSDP)."""
@@ -26,7 +22,6 @@ class FSDPConfig:
     cpu_offload: bool = False
     data_parallel_shard_degree: int = -1
     data_parallel_replicate_degree: Optional[int] = None
-    device_mesh: Optional[DeviceMesh] = None
     forward_prefetch: bool = False
     forward_prefetch_limit: int = 1
     ignored_modules: Optional[Any] = None
@@ -35,7 +30,6 @@ class FSDPConfig:
     load_monolith_rank0_only: bool = False
     load_planner: Optional[Any] = None
     mixed_precision: str = 'DEFAULT'
-    process_group: Optional[Any] = None
     save_planner: Optional[Any] = None
     sharded_ckpt_prefix_dir: str = 'ep{epoch}-ba{batch}'
     sharding_strategy: str = 'FULL_SHARD'
@@ -48,36 +42,6 @@ class FSDPConfig:
 def create_fsdp_config(fsdp_config: dict[str, Any]):
     """Modify fsdp_config to set default values for missing keys."""
     fsdp_config = {**fsdp_config}  # Shallow copy to avoid modifying input
-    if 'process_group' in fsdp_config:
-        warnings.warn(
-            VersionedDeprecationWarning(
-                'process_group is deprecated. Please specify `data_parallel_shard_degree` and `data_parallel_replicate_degree` instead.',
-                remove_version='0.24',
-            ),
-        )
-
-    if 'device_mesh' in fsdp_config:
-        warnings.warn(
-            VersionedDeprecationWarning(
-                'device_mesh is deprecated. Please specify `data_parallel_shard_degree` and `data_parallel_replicate_degree` instead.',
-                remove_version='0.24',
-            ),
-        )
-        if 'data_parallel_shard_degree' in fsdp_config or 'data_parallel_replicate_degree' in fsdp_config:
-            raise ValueError(
-                'Cannot specify both `device_mesh` and `data_parallel_shard_degree` or `data_parallel_replicate_degree`. Please remove `device_mesh`.',
-            )
-        device_mesh = fsdp_config.pop('device_mesh')
-        if len(device_mesh) == 1:
-            fsdp_config['data_parallel_shard_degree'] = device_mesh[0]
-        elif len(device_mesh) == 2:
-            fsdp_config['data_parallel_replicate_degree'] = device_mesh[0]
-            fsdp_config['data_parallel_shard_degree'] = device_mesh[1]
-        else:
-            raise ValueError(
-                f'device_mesh must be of length 1 or 2 but received length {len(device_mesh)} with device mesh {device_mesh}.',
-            )
-
     return FSDPConfig(**fsdp_config)
 
 

diff --git a/docker/README.md b/docker/README.md
@@ -15,8 +15,8 @@ all dependencies for both NLP and Vision models. They are built on top of the
 <!-- BEGIN_COMPOSER_BUILD_MATRIX -->
 | Composer Version   | CUDA Support   | Docker Tag                                                     |
 |--------------------|----------------|----------------------------------------------------------------|
-| 0.23.5             | Yes            | `mosaicml/composer:latest`, `mosaicml/composer:0.23.5`         |
-| 0.23.5             | No             | `mosaicml/composer:latest_cpu`, `mosaicml/composer:0.23.5_cpu` |
+| 0.24.0             | Yes            | `mosaicml/composer:latest`, `mosaicml/composer:0.24.0`         |
+| 0.24.0             | No             | `mosaicml/composer:latest_cpu`, `mosaicml/composer:0.24.0_cpu` |
 <!-- END_COMPOSER_BUILD_MATRIX -->
 
 **Note**: For a lightweight installation, we recommended using a [MosaicML PyTorch Image](#pytorch-images) and manually

diff --git a/docker/build_matrix.yaml b/docker/build_matrix.yaml
@@ -208,9 +208,9 @@
   TORCHVISION_VERSION: 0.16.2
 - AWS_OFI_NCCL_VERSION: ''
   BASE_IMAGE: nvidia/cuda:12.1.1-cudnn8-devel-ubuntu20.04
-  COMPOSER_INSTALL_COMMAND: mosaicml[all]==0.23.5
+  COMPOSER_INSTALL_COMMAND: mosaicml[all]==0.24.0
   CUDA_VERSION: 12.1.1
-  IMAGE_NAME: composer-0-23-5
+  IMAGE_NAME: composer-0-24-0
   MOFED_VERSION: latest-23.10
   NVIDIA_REQUIRE_CUDA_OVERRIDE: cuda>=12.1 brand=tesla,driver>=450,driver<451 brand=tesla,driver>=470,driver<471
     brand=unknown,driver>=470,driver<471 brand=nvidia,driver>=470,driver<471 brand=nvidiartx,driver>=470,driver<471
@@ -231,23 +231,23 @@
   PYTORCH_NIGHTLY_VERSION: ''
   PYTORCH_VERSION: 2.3.1
   TAGS:
-  - mosaicml/composer:0.23.5
+  - mosaicml/composer:0.24.0
   - mosaicml/composer:latest
   TARGET: composer_stage
   TORCHVISION_VERSION: 0.18.1
 - AWS_OFI_NCCL_VERSION: ''
   BASE_IMAGE: ubuntu:20.04
-  COMPOSER_INSTALL_COMMAND: mosaicml[all]==0.23.5
+  COMPOSER_INSTALL_COMMAND: mosaicml[all]==0.24.0
   CUDA_VERSION: ''
-  IMAGE_NAME: composer-0-23-5-cpu
+  IMAGE_NAME: composer-0-24-0-cpu
   MOFED_VERSION: latest-23.10
   NVIDIA_REQUIRE_CUDA_OVERRIDE: ''
   PYTHON_VERSION: '3.11'
   PYTORCH_NIGHTLY_URL: ''
   PYTORCH_NIGHTLY_VERSION: ''
   PYTORCH_VERSION: 2.3.1
   TAGS:
-  - mosaicml/composer:0.23.5_cpu
+  - mosaicml/composer:0.24.0_cpu
   - mosaicml/composer:latest_cpu
   TARGET: composer_stage
   TORCHVISION_VERSION: 0.18.1
diff --git a/docker/generate_build_matrix.py b/docker/generate_build_matrix.py
@@ -231,7 +231,7 @@ def _main():
     composer_entries = []
 
     # The `GIT_COMMIT` is a placeholder and Jenkins will substitute it with the actual git commit for the `composer_staging` images
-    composer_versions = ['0.23.5']  # Only build images for the latest composer version
+    composer_versions = ['0.24.0']  # Only build images for the latest composer version
     composer_python_versions = [PRODUCTION_PYTHON_VERSION]  # just build composer against the latest
 
     for product in itertools.product(composer_python_versions, composer_versions, cuda_options):
Original file line number	Diff line number	Diff line change
Expand Up		@@ -3,4 +3,4 @@

		"""The Composer Version."""

		__version__ = '0.24.0.dev0'
		__version__ = '0.24.0'