diff --git a/scripts/train/train.py b/scripts/train/train.py index c9e2d67bf4..3cf3d9551d 100644 --- a/scripts/train/train.py +++ b/scripts/train/train.py @@ -504,7 +504,7 @@ def main(cfg: DictConfig) -> Trainer: precision=train_cfg.precision, algorithms=algorithms, device_train_microbatch_size=train_cfg.device_train_microbatch_size, - fsdp_config=fsdp_config, + parallelism_config={'fsdp': fsdp_config}, save_folder=train_cfg.save_folder, save_filename=save_filename, save_latest_filename=save_latest_filename, diff --git a/setup.py b/setup.py index 78182976d4..0556050de9 100644 --- a/setup.py +++ b/setup.py @@ -54,7 +54,7 @@ ] install_requires = [ - 'mosaicml[libcloud,wandb,oci,gcs]>=0.22.0,<0.23', + 'mosaicml[libcloud,wandb,oci,gcs]>=0.23.0,<0.24', 'mlflow>=2.12.1,<2.13', 'accelerate>=0.25,<0.26', # for HF inference `device_map` 'transformers>=4.40,<4.41', @@ -92,14 +92,14 @@ ] extra_deps['databricks'] = [ - 'mosaicml[databricks]>=0.22.0,<0.23', + 'mosaicml[databricks]>=0.23.0,<0.24', 'databricks-sql-connector>=3,<4', 'databricks-connect==14.1.0', 'lz4>=4,<5', ] extra_deps['tensorboard'] = [ - 'mosaicml[tensorboard]>=0.22.0,<0.23', + 'mosaicml[tensorboard]>=0.23.0,<0.24', ] # Flash 2 group kept for backwards compatibility @@ -110,7 +110,7 @@ extra_deps['gpu'] = copy.deepcopy(extra_deps['gpu-flash2']) extra_deps['peft'] = [ - 'mosaicml[peft]>=0.22.0,<0.23', + 'mosaicml[peft]>=0.23.0,<0.24', ] extra_deps['openai'] = [ diff --git a/tests/models/test_model.py b/tests/models/test_model.py index a62a7dd114..2f93b1d3ce 100644 --- a/tests/models/test_model.py +++ b/tests/models/test_model.py @@ -13,10 +13,15 @@ import torch.nn as nn from accelerate import init_empty_weights from composer.core.precision import Precision, get_precision_context +from composer.distributed.dist_strategy import prepare_fsdp_module from composer.models.huggingface import maybe_get_underlying_model from composer.optim import DecoupledAdamW -from composer.trainer.dist_strategy import prepare_fsdp_module -from composer.utils import dist, get_device, reproducibility +from composer.utils import ( + FSDPConfig, + dist, + get_device, + reproducibility, +) from omegaconf import DictConfig, ListConfig from omegaconf import OmegaConf as om from transformers import ( @@ -2538,7 +2543,14 @@ def test_hf_init( betas=(0.9, 0.99), ) - prepare_fsdp_module(model, optimizer, fsdp_config, precision, device, False) + prepare_fsdp_module( + model, + optimizer, + FSDPConfig(**fsdp_config), + precision, + device, + False, + ) model = HuggingFaceModelWithFSDP(model, tokenizer)