diff --git a/composer/_version.py b/composer/_version.py index fffe771b0c..ad813aba8b 100644 --- a/composer/_version.py +++ b/composer/_version.py @@ -3,4 +3,4 @@ """The Composer Version.""" -__version__ = '0.20.1' +__version__ = '0.21.0' diff --git a/composer/core/state.py b/composer/core/state.py index a21b142d42..ac9c5e0064 100644 --- a/composer/core/state.py +++ b/composer/core/state.py @@ -44,7 +44,6 @@ is_model_deepspeed, reproducibility, ) -from composer.utils.warnings import VersionedDeprecationWarning if TYPE_CHECKING: import deepspeed @@ -790,16 +789,6 @@ def fsdp_state_dict_type(self): def fsdp_sharded_state_dict_enabled(self): return self.fsdp_config is not None and self.fsdp_enabled and self.fsdp_state_dict_type == 'sharded' - @property - def fsdp_elastic_sharded_enabled(self): - warnings.warn( - VersionedDeprecationWarning( - 'state.fsdp_elastic_sharded_enabled is deprecated.', - remove_version='0.21.0', - ), - ) - return self.fsdp_sharded_state_dict_enabled - @property def fsdp_device_mesh(self): if self.fsdp_enabled: diff --git a/composer/core/time.py b/composer/core/time.py index 35c17d74f9..f05b521614 100644 --- a/composer/core/time.py +++ b/composer/core/time.py @@ -19,12 +19,10 @@ import datetime import re -import warnings from typing import Any, Dict, Generic, Optional, TypeVar, Union, cast from composer.core.serializable import Serializable from composer.utils import StringEnum -from composer.utils.warnings import VersionedDeprecationWarning __all__ = ['TimeUnit', 'Time', 'Timestamp', 'ensure_time'] @@ -540,18 +538,6 @@ def state_dict(self) -> Dict[str, Any]: 'batch_wct': self.batch_wct, } - def get_state(self) -> Dict[str, Union[Time[int], datetime.timedelta]]: - """Returns all values of the timestamp object in a dictionary. - - Returns: - Dict[str, Union[Time[int], datetime.timedelta]]: All values of the timestamp object. - """ - warnings.warn( - VersionedDeprecationWarning('core.time.Timestamp.get_state is deprecated.', remove_version='0.21.0'), - ) - - return self.state_dict() - def load_state_dict(self, state: Dict[str, Any]) -> None: self._epoch = Time(state['epoch'], TimeUnit.EPOCH) self._batch = Time(state['batch'], TimeUnit.BATCH) diff --git a/composer/metrics/nlp.py b/composer/metrics/nlp.py index 4b4a0218b5..5082ec87ee 100644 --- a/composer/metrics/nlp.py +++ b/composer/metrics/nlp.py @@ -247,7 +247,7 @@ def update( ): """Abstract interface for computing an in-context learning metrics. - The `output_logits` argument is deprecated and will be removed in v0.21 while it's functionality will + The `output_logits` argument is deprecated and will be removed in v0.22 while it's functionality will be moved to `outputs`. Args: @@ -255,6 +255,7 @@ def update( to compute the metric. output_logits (torch.Tensor): The model outputs evaluated on the batch `input_ids` labels (torch.Tensor): The correct outputs. + outputs (torch.Tensor): The model outputs evaluated on the batch `input_ids`. Raises: NotImplementedError: Abstract method must be implemented by subclasses diff --git a/composer/models/huggingface.py b/composer/models/huggingface.py index 9bb7d62b82..e0ad6fdf6d 100644 --- a/composer/models/huggingface.py +++ b/composer/models/huggingface.py @@ -513,7 +513,7 @@ def eval_forward(self, batch, outputs: Optional[Any] = None): warnings.warn( VersionedDeprecationWarning( '`generation_length` has been deprecated in favor of passing `max_new_tokens` directly into `generation_kwargs`.', - remove_version='0.21.0', + remove_version='0.22.0', ), ) if 'generation_kwargs' in batch: diff --git a/docker/README.md b/docker/README.md index 73b4b1e13b..5e5d943ee0 100644 --- a/docker/README.md +++ b/docker/README.md @@ -15,8 +15,8 @@ all dependencies for both NLP and Vision models. They are built on top of the | Composer Version | CUDA Support | Docker Tag | |--------------------|----------------|----------------------------------------------------------------| -| 0.20.1 | Yes | `mosaicml/composer:latest`, `mosaicml/composer:0.20.1` | -| 0.20.1 | No | `mosaicml/composer:latest_cpu`, `mosaicml/composer:0.20.1_cpu` | +| 0.21.0 | Yes | `mosaicml/composer:latest`, `mosaicml/composer:0.21.0` | +| 0.21.0 | No | `mosaicml/composer:latest_cpu`, `mosaicml/composer:0.21.0_cpu` | **Note**: For a lightweight installation, we recommended using a [MosaicML PyTorch Image](#pytorch-images) and manually @@ -30,8 +30,6 @@ To install composer, once inside the image, run `pip install mosaicml`. | Linux Distro | Flavor | PyTorch Version | CUDA Version | Python Version | Docker Tags | |----------------|----------|-------------------|---------------------|------------------|------------------------------------------------------------------------------------------| -| Ubuntu 20.04 | Base | 2.3.0 | 12.1.1 (Infiniband) | 3.11 | `mosaicml/pytorch:2.3.0_cu121-nightly20240110-python3.11-ubuntu20.04` | -| Ubuntu 20.04 | Base | 2.3.0 | 12.1.1 (EFA) | 3.11 | `mosaicml/pytorch:2.3.0_cu121-nightly20240110-python3.11-ubuntu20.04-aws` | | Ubuntu 20.04 | Base | 2.2.1 | 12.1.1 (Infiniband) | 3.11 | `mosaicml/pytorch:2.2.1_cu121-python3.11-ubuntu20.04` | | Ubuntu 20.04 | Base | 2.2.1 | 12.1.1 (EFA) | 3.11 | `mosaicml/pytorch:2.2.1_cu121-python3.11-ubuntu20.04-aws` | | Ubuntu 20.04 | Base | 2.2.1 | cpu | 3.11 | `mosaicml/pytorch:2.2.1_cpu-python3.11-ubuntu20.04` | diff --git a/docker/build_matrix.yaml b/docker/build_matrix.yaml index 31e3e1ba27..6b150ab7e0 100644 --- a/docker/build_matrix.yaml +++ b/docker/build_matrix.yaml @@ -190,65 +190,11 @@ - mosaicml/pytorch:2.0.1_cpu-python3.10-ubuntu20.04 TARGET: pytorch_stage TORCHVISION_VERSION: 0.15.2 -- AWS_OFI_NCCL_VERSION: v1.7.4-aws - BASE_IMAGE: nvidia/cuda:12.1.1-cudnn8-devel-ubuntu20.04 - CUDA_VERSION: 12.1.1 - IMAGE_NAME: torch-nightly-2-3-0-20240110-cu121-python3-11-aws - MOFED_VERSION: '' - NVIDIA_REQUIRE_CUDA_OVERRIDE: cuda>=12.1 brand=tesla,driver>=450,driver<451 brand=tesla,driver>=470,driver<471 - brand=unknown,driver>=470,driver<471 brand=nvidia,driver>=470,driver<471 brand=nvidiartx,driver>=470,driver<471 - brand=geforce,driver>=470,driver<471 brand=geforcertx,driver>=470,driver<471 brand=quadro,driver>=470,driver<471 - brand=quadrortx,driver>=470,driver<471 brand=titan,driver>=470,driver<471 brand=titanrtx,driver>=470,driver<471 - brand=tesla,driver>=510,driver<511 brand=unknown,driver>=510,driver<511 brand=nvidia,driver>=510,driver<511 - brand=nvidiartx,driver>=510,driver<511 brand=geforce,driver>=510,driver<511 brand=geforcertx,driver>=510,driver<511 - brand=quadro,driver>=510,driver<511 brand=quadrortx,driver>=510,driver<511 brand=titan,driver>=510,driver<511 - brand=titanrtx,driver>=510,driver<511 brand=tesla,driver>=515,driver<516 brand=unknown,driver>=515,driver<516 - brand=nvidia,driver>=515,driver<516 brand=nvidiartx,driver>=515,driver<516 brand=geforce,driver>=515,driver<516 - brand=geforcertx,driver>=515,driver<516 brand=quadro,driver>=515,driver<516 brand=quadrortx,driver>=515,driver<516 - brand=titan,driver>=515,driver<516 brand=titanrtx,driver>=515,driver<516 brand=tesla,driver>=525,driver<526 - brand=unknown,driver>=525,driver<526 brand=nvidia,driver>=525,driver<526 brand=nvidiartx,driver>=525,driver<526 - brand=geforce,driver>=525,driver<526 brand=geforcertx,driver>=525,driver<526 brand=quadro,driver>=525,driver<526 - brand=quadrortx,driver>=525,driver<526 brand=titan,driver>=525,driver<526 brand=titanrtx,driver>=525,driver<526 - PYTHON_VERSION: '3.11' - PYTORCH_NIGHTLY_URL: https://download.pytorch.org/whl/nightly/cu121 - PYTORCH_NIGHTLY_VERSION: dev20240110+cu121 - PYTORCH_VERSION: 2.3.0 - TAGS: - - mosaicml/pytorch:2.3.0_cu121-nightly20240110-python3.11-ubuntu20.04-aws - TARGET: pytorch_stage - TORCHVISION_VERSION: 0.18.0 -- AWS_OFI_NCCL_VERSION: '' - BASE_IMAGE: nvidia/cuda:12.1.1-cudnn8-devel-ubuntu20.04 - CUDA_VERSION: 12.1.1 - IMAGE_NAME: torch-nightly-2-3-0-20240110-cu121-python3-11 - MOFED_VERSION: 5.5-1.0.3.2 - NVIDIA_REQUIRE_CUDA_OVERRIDE: cuda>=12.1 brand=tesla,driver>=450,driver<451 brand=tesla,driver>=470,driver<471 - brand=unknown,driver>=470,driver<471 brand=nvidia,driver>=470,driver<471 brand=nvidiartx,driver>=470,driver<471 - brand=geforce,driver>=470,driver<471 brand=geforcertx,driver>=470,driver<471 brand=quadro,driver>=470,driver<471 - brand=quadrortx,driver>=470,driver<471 brand=titan,driver>=470,driver<471 brand=titanrtx,driver>=470,driver<471 - brand=tesla,driver>=510,driver<511 brand=unknown,driver>=510,driver<511 brand=nvidia,driver>=510,driver<511 - brand=nvidiartx,driver>=510,driver<511 brand=geforce,driver>=510,driver<511 brand=geforcertx,driver>=510,driver<511 - brand=quadro,driver>=510,driver<511 brand=quadrortx,driver>=510,driver<511 brand=titan,driver>=510,driver<511 - brand=titanrtx,driver>=510,driver<511 brand=tesla,driver>=515,driver<516 brand=unknown,driver>=515,driver<516 - brand=nvidia,driver>=515,driver<516 brand=nvidiartx,driver>=515,driver<516 brand=geforce,driver>=515,driver<516 - brand=geforcertx,driver>=515,driver<516 brand=quadro,driver>=515,driver<516 brand=quadrortx,driver>=515,driver<516 - brand=titan,driver>=515,driver<516 brand=titanrtx,driver>=515,driver<516 brand=tesla,driver>=525,driver<526 - brand=unknown,driver>=525,driver<526 brand=nvidia,driver>=525,driver<526 brand=nvidiartx,driver>=525,driver<526 - brand=geforce,driver>=525,driver<526 brand=geforcertx,driver>=525,driver<526 brand=quadro,driver>=525,driver<526 - brand=quadrortx,driver>=525,driver<526 brand=titan,driver>=525,driver<526 brand=titanrtx,driver>=525,driver<526 - PYTHON_VERSION: '3.11' - PYTORCH_NIGHTLY_URL: https://download.pytorch.org/whl/nightly/cu121 - PYTORCH_NIGHTLY_VERSION: dev20240110+cu121 - PYTORCH_VERSION: 2.3.0 - TAGS: - - mosaicml/pytorch:2.3.0_cu121-nightly20240110-python3.11-ubuntu20.04 - TARGET: pytorch_stage - TORCHVISION_VERSION: 0.18.0 - AWS_OFI_NCCL_VERSION: '' BASE_IMAGE: nvidia/cuda:12.1.1-cudnn8-devel-ubuntu20.04 - COMPOSER_INSTALL_COMMAND: mosaicml[all]==0.20.1 + COMPOSER_INSTALL_COMMAND: mosaicml[all]==0.21.0 CUDA_VERSION: 12.1.1 - IMAGE_NAME: composer-0-20-1 + IMAGE_NAME: composer-0-21-0 MOFED_VERSION: 5.5-1.0.3.2 NVIDIA_REQUIRE_CUDA_OVERRIDE: cuda>=12.1 brand=tesla,driver>=450,driver<451 brand=tesla,driver>=470,driver<471 brand=unknown,driver>=470,driver<471 brand=nvidia,driver>=470,driver<471 brand=nvidiartx,driver>=470,driver<471 @@ -269,15 +215,15 @@ PYTORCH_NIGHTLY_VERSION: '' PYTORCH_VERSION: 2.1.2 TAGS: - - mosaicml/composer:0.20.1 + - mosaicml/composer:0.21.0 - mosaicml/composer:latest TARGET: composer_stage TORCHVISION_VERSION: 0.16.2 - AWS_OFI_NCCL_VERSION: '' BASE_IMAGE: ubuntu:20.04 - COMPOSER_INSTALL_COMMAND: mosaicml[all]==0.20.1 + COMPOSER_INSTALL_COMMAND: mosaicml[all]==0.21.0 CUDA_VERSION: '' - IMAGE_NAME: composer-0-20-1-cpu + IMAGE_NAME: composer-0-21-0-cpu MOFED_VERSION: 5.5-1.0.3.2 NVIDIA_REQUIRE_CUDA_OVERRIDE: '' PYTHON_VERSION: '3.10' @@ -285,7 +231,7 @@ PYTORCH_NIGHTLY_VERSION: '' PYTORCH_VERSION: 2.1.2 TAGS: - - mosaicml/composer:0.20.1_cpu + - mosaicml/composer:0.21.0_cpu - mosaicml/composer:latest_cpu TARGET: composer_stage TORCHVISION_VERSION: 0.16.2 diff --git a/docker/generate_build_matrix.py b/docker/generate_build_matrix.py index a45c08228d..0b2405417b 100644 --- a/docker/generate_build_matrix.py +++ b/docker/generate_build_matrix.py @@ -228,44 +228,10 @@ def _main(): pytorch_entries.append(entry) - nightly_entry_311_aws = { - 'AWS_OFI_NCCL_VERSION': 'v1.7.4-aws', - 'BASE_IMAGE': 'nvidia/cuda:12.1.1-cudnn8-devel-ubuntu20.04', - 'CUDA_VERSION': '12.1.1', - 'IMAGE_NAME': 'torch-nightly-2-3-0-20240110-cu121-python3-11-aws', - 'MOFED_VERSION': '', - 'NVIDIA_REQUIRE_CUDA_OVERRIDE': _get_cuda_override('12.1.1'), - 'PYTHON_VERSION': '3.11', - 'PYTORCH_VERSION': '2.3.0', - 'PYTORCH_NIGHTLY_URL': 'https://download.pytorch.org/whl/nightly/cu121', - 'PYTORCH_NIGHTLY_VERSION': 'dev20240110+cu121', - 'TAGS': ['mosaicml/pytorch:2.3.0_cu121-nightly20240110-python3.11-ubuntu20.04-aws'], - 'TARGET': 'pytorch_stage', - 'TORCHVISION_VERSION': '0.18.0', - } - pytorch_entries.append(nightly_entry_311_aws) - - nightly_entry_311 = { - 'AWS_OFI_NCCL_VERSION': '', - 'BASE_IMAGE': 'nvidia/cuda:12.1.1-cudnn8-devel-ubuntu20.04', - 'CUDA_VERSION': '12.1.1', - 'IMAGE_NAME': 'torch-nightly-2-3-0-20240110-cu121-python3-11', - 'MOFED_VERSION': '5.5-1.0.3.2', - 'NVIDIA_REQUIRE_CUDA_OVERRIDE': _get_cuda_override('12.1.1'), - 'PYTHON_VERSION': '3.11', - 'PYTORCH_VERSION': '2.3.0', - 'PYTORCH_NIGHTLY_URL': 'https://download.pytorch.org/whl/nightly/cu121', - 'PYTORCH_NIGHTLY_VERSION': 'dev20240110+cu121', - 'TAGS': ['mosaicml/pytorch:2.3.0_cu121-nightly20240110-python3.11-ubuntu20.04'], - 'TARGET': 'pytorch_stage', - 'TORCHVISION_VERSION': '0.18.0', - } - pytorch_entries.append(nightly_entry_311) - composer_entries = [] # The `GIT_COMMIT` is a placeholder and Jenkins will substitute it with the actual git commit for the `composer_staging` images - composer_versions = ['0.20.1'] # Only build images for the latest composer version + composer_versions = ['0.21.0'] # Only build images for the latest composer version composer_python_versions = [PRODUCTION_PYTHON_VERSION] # just build composer against the latest for product in itertools.product(composer_python_versions, composer_versions, cuda_options): diff --git a/docs/source/notes/distributed_training.rst b/docs/source/notes/distributed_training.rst index cab087f3b8..c64b51dca2 100644 --- a/docs/source/notes/distributed_training.rst +++ b/docs/source/notes/distributed_training.rst @@ -395,18 +395,14 @@ It does this by gathering the model state to the global rank 0 device, unflatten If `load_monolith_rank0_only=True`, then when loading checkpoints the global rank 0 device will load in the checkpoint file and scatter the model and optimizer state to the other ranks, which will will dramatically reduce the memory usage on system. Otherwise, all ranks will separately load in the checkpoint file. -2. :code:`state_dict_type='local'` -For save: each rank saves out the flattened model state shard they are -responsibile for to a distinct checkpoint file. For load, each rank loads in the checkpoint file -corresponding to their shard. **Note: state_dict_type='local' is deprecated in Composer for torch versions 2.0.0 or higher.** - -3. :code:`state_dict_type='sharded'` -Each rank saves out an unflattened shard. For loading, similar to ``state_dict_type='local'``, each rank -loads in the checkpoint file corresponding to their unflattened shard. **Note: state_dict_type='sharded' is the recommended setting for sharded checkpointing in Composer for torch versions 2.0.0 or higher.** +2. :code:`state_dict_type='sharded'` +Each rank saves out an unflattened shard. For loading, each rank loads in the checkpoint file +corresponding to their unflattened shard. +**Note: state_dict_type='sharded' is the recommended setting for sharded checkpointing in Composer for torch versions 2.0.0 or higher.** See `The FSDP docs `__ for more info. -If you use sharded checkpoints (`state_dict_type='sharded'` or `state_dict_type='local'`), your run will save as many files as you have +If you use sharded checkpoints (`state_dict_type='sharded'`), your run will save as many files as you have ranks at each checkpointing event (plus one metadata file for torch versions 2.0.0 or higher). This can quicky pollute your `save_folder` with a lot of files after a couple checkpointing events. To help keep your checkpoint shard files organized, Composer will save each set of shards in it's own prefix directory, which you can configure by using `'sharded_ckpt_prefix_dir'` (default value `sharded_ckpt_prefix_dir='ep{epoch}-ba{batch}'`). Checkpoint shards will be saved to