From 0890d243f9c75ad8b69e18622fa7e307a06d8733 Mon Sep 17 00:00:00 2001 From: Irene Dea Date: Fri, 11 Oct 2024 18:44:31 +0000 Subject: [PATCH 01/10] Bump version to 0.14.0.dev0. --- llmfoundry/_version.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llmfoundry/_version.py b/llmfoundry/_version.py index 0cddcaf967..e6385a53a7 100644 --- a/llmfoundry/_version.py +++ b/llmfoundry/_version.py @@ -3,4 +3,4 @@ """The LLM Foundry Version.""" -__version__ = '0.13.0.dev0' +__version__ = '0.14.0.dev0' From 4f29dd8d71814b03585eea1fb843742b9030bb36 Mon Sep 17 00:00:00 2001 From: Irene Dea Date: Fri, 11 Oct 2024 19:18:05 +0000 Subject: [PATCH 02/10] Remove deprecated fsdp_config --- llmfoundry/command_utils/eval.py | 14 -------------- 1 file changed, 14 deletions(-) diff --git a/llmfoundry/command_utils/eval.py b/llmfoundry/command_utils/eval.py index 73127e8a07..d0471b9ee6 100644 --- a/llmfoundry/command_utils/eval.py +++ b/llmfoundry/command_utils/eval.py @@ -4,7 +4,6 @@ import logging import os import time -import warnings from typing import Any, Optional, Union import pandas as pd @@ -63,7 +62,6 @@ def evaluate_model( callback_configs: Optional[dict[str, Any]], metadata: Optional[dict[str, str]], logged_config: dict[str, Any], - fsdp_config: Optional[dict[str, Any]] = None, parallelism_config: Optional[dict[str, Any]] = None, should_log_config: bool = True, load_path: Optional[str] = None, @@ -78,18 +76,6 @@ def evaluate_model( 'parallelism_config cannot contain deprecated fsdp_config arguments.', ) - if fsdp_config: - warnings.warn( - VersionedDeprecationWarning( - 'The argument fsdp_config is deprecated. Please use parallelism_config instead.', - remove_version='0.14.0', - ), - ) - if fsdp_config and parallelism_config: - raise ValueError( - 'Both fsdp_config and parallelism_config cannot be provided at the same time. Please use parallelism_config.', - ) - log.info(f'Evaluating model: {model_name}') # Build tokenizer and model tokenizer_cfg = tokenizer From f6f1c57caca37aa2c03ff580eead0eca4f90c119 Mon Sep 17 00:00:00 2001 From: Irene Dea Date: Fri, 11 Oct 2024 19:20:39 +0000 Subject: [PATCH 03/10] Remove deprecated HuggingFaceModelWithFSDP --- llmfoundry/models/hf/__init__.py | 2 - llmfoundry/models/hf/model_wrapper.py | 103 -------------------------- tests/models/test_model.py | 6 +- 3 files changed, 3 insertions(+), 108 deletions(-) delete mode 100644 llmfoundry/models/hf/model_wrapper.py diff --git a/llmfoundry/models/hf/__init__.py b/llmfoundry/models/hf/__init__.py index 2f25f92940..03df90e8cd 100644 --- a/llmfoundry/models/hf/__init__.py +++ b/llmfoundry/models/hf/__init__.py @@ -9,7 +9,6 @@ prepare_hf_model_for_fsdp, ) from llmfoundry.models.hf.hf_t5 import ComposerHFT5 -from llmfoundry.models.hf.model_wrapper import HuggingFaceModelWithFSDP __all__ = [ 'BaseHuggingFaceModel', @@ -18,5 +17,4 @@ 'prepare_hf_causal_lm_model_for_fsdp', 'prepare_hf_enc_dec_model_for_fsdp', 'prepare_hf_model_for_fsdp', - 'HuggingFaceModelWithFSDP', ] diff --git a/llmfoundry/models/hf/model_wrapper.py b/llmfoundry/models/hf/model_wrapper.py deleted file mode 100644 index f2b67db1ec..0000000000 --- a/llmfoundry/models/hf/model_wrapper.py +++ /dev/null @@ -1,103 +0,0 @@ -# Copyright 2022 MosaicML LLM Foundry authors -# SPDX-License-Identifier: Apache-2.0 - -"""Re-usable :class:`.ComposerModel` for LLM HF Models.""" - -from __future__ import annotations - -import warnings -from collections import UserDict -from typing import TYPE_CHECKING, Mapping, Optional, Union - -import transformers -from composer.models.huggingface import HuggingFaceModel -from torchmetrics import Metric -from transformers import PreTrainedTokenizerBase -from transformers.utils.generic import ModelOutput - -from llmfoundry.models.hf.hf_fsdp import prepare_hf_model_for_fsdp -from llmfoundry.utils.warnings import VersionedDeprecationWarning - -if TYPE_CHECKING: - from peft import PeftConfig, PeftModel - -__all__ = ['HuggingFaceModelWithFSDP'] - -# HuggingFace hardcodes the ignore index to -100 -_HF_IGNORE_INDEX = -100 - - -class HuggingFaceModelWithFSDP(HuggingFaceModel): - """Wrapper around HuggingFaceModel. - - Handles preparation for FSDP wrapping. - """ - - def __init__( - self, - model: Union[transformers.PreTrainedModel, 'PeftModel'], - tokenizer: Optional[PreTrainedTokenizerBase] = None, - metrics: Optional[list[Metric]] = None, - eval_metrics: Optional[list[Metric]] = None, - shift_labels: bool = False, - allow_embedding_resizing: bool = False, - init_device: Optional[str] = None, - peft_config: Optional['PeftConfig'] = None, - should_save_peft_only: bool = True, - ): - warnings.warn( - VersionedDeprecationWarning( - '`HuggingFaceModelWithFSDP` is deprecated. In the future please use `BaseHuggingFaceModel`.', - remove_version='0.14.0', - ), - ) - super().__init__( - model, - tokenizer, - use_logits=True, - metrics=metrics, - eval_metrics=eval_metrics, - shift_labels=shift_labels, - allow_embedding_resizing=allow_embedding_resizing, - peft_config=peft_config, - should_save_peft_only=should_save_peft_only, - ) - - self.prepare_inner_model(self.model, init_device) - - def forward(self, batch: Mapping): - if isinstance(batch, dict) or isinstance(batch, UserDict): - # Further input validation is left to the huggingface forward call - batch = { - k: v for k, v in batch.items() if k in self.model_forward_args - } - output = self.model(**batch) # type: ignore (thirdparty) - else: - raise ValueError( - 'Unexpected batch type. Expected a dictionary with keys corresponding to the inputs to the forward function of the Huggingface model', - ) - return output - - def loss(self, outputs: ModelOutput, batch: Mapping): - if self.config.use_return_dict: - return outputs['loss'] - # loss is at index 0 in the output tuple, logits are at index 1 - return outputs[:2] - - @staticmethod - def prepare_inner_model( - model: Union[transformers.PreTrainedModel, 'PeftModel'], - init_device: Optional[str] = None, - ): - """Prepare the inner model for FSDP wrapping. - - Args: - model: The model to prepare. - init_device: The device to initialize the model on. - """ - # Note: We need to add the FSDP related attributes to the model AFTER the super init, - # so that the (possible) embedding resizing doesn't destroy them - prepare_hf_model_for_fsdp(model, init_device) - - # This provides support for meta initialization when using FSDP - model.param_init_fn = lambda module: model._init_weights(module) diff --git a/tests/models/test_model.py b/tests/models/test_model.py index eeb6bf0d90..e2ddb0a012 100644 --- a/tests/models/test_model.py +++ b/tests/models/test_model.py @@ -39,7 +39,7 @@ from llmfoundry import ComposerHFCausalLM from llmfoundry.layers_registry import norms -from llmfoundry.models.hf.model_wrapper import HuggingFaceModelWithFSDP +from llmfoundry.models.hf.hf_base import BaseHuggingFaceModel from llmfoundry.models.layers import build_alibi_bias from llmfoundry.models.layers.attention import ( check_alibi_support, @@ -2560,7 +2560,7 @@ def test_hf_init( False, ) - model = HuggingFaceModelWithFSDP(model, tokenizer) + model = BaseHuggingFaceModel(model, tokenizer) batch = gen_random_batch(batch_size, test_cfg) @@ -2609,7 +2609,7 @@ def test_head_dim_8_flash_mqa_attn(batch_size: int = 2): mpt = MPTForCausalLM(hf_config) - model = HuggingFaceModelWithFSDP(mpt, tokenizer, shift_labels=True) + model = BaseHuggingFaceModel(mpt, tokenizer, shift_labels=True) model = model.to(test_cfg.device) batch = gen_random_batch(batch_size, test_cfg) From 3849957c6b61b40974497ca9dac564bf182893df Mon Sep 17 00:00:00 2001 From: Irene Dea Date: Fri, 11 Oct 2024 20:00:04 +0000 Subject: [PATCH 04/10] fix --- llmfoundry/command_utils/eval.py | 3 +-- tests/eval/test_eval_deprecation.py | 33 ----------------------------- 2 files changed, 1 insertion(+), 35 deletions(-) diff --git a/llmfoundry/command_utils/eval.py b/llmfoundry/command_utils/eval.py index d0471b9ee6..0181ff8ab1 100644 --- a/llmfoundry/command_utils/eval.py +++ b/llmfoundry/command_utils/eval.py @@ -36,7 +36,6 @@ process_init_device, ) from llmfoundry.utils.registry_utils import import_file -from llmfoundry.utils.warnings import VersionedDeprecationWarning log = logging.getLogger(__name__) @@ -113,7 +112,7 @@ def evaluate_model( fsdp_config = parallelism_config.get( 'fsdp_config', None, - ) if parallelism_config else fsdp_config + ) if fsdp_config and model.get('load_in_8bit', False): raise ValueError( 'The FSDP config block is not supported when loading ' + diff --git a/tests/eval/test_eval_deprecation.py b/tests/eval/test_eval_deprecation.py index 828186245a..e6b64cab05 100644 --- a/tests/eval/test_eval_deprecation.py +++ b/tests/eval/test_eval_deprecation.py @@ -90,36 +90,3 @@ def test_deprecation_warning_with_deprecated_arg(self): 'parallelism_config cannot contain deprecated fsdp_config arguments.', str(context.exception), ) - - def test_deprecation_warning_with_fsdp_config(self): - with warnings.catch_warnings(record=True) as w: - warnings.simplefilter('always') - - try: - evaluate_model( - **self.common_args, - parallelism_config=None, - fsdp_config={'verbose': True}, - ) - except Exception: - pass - - self.assertTrue( - any( - issubclass(warning.category, VersionedDeprecationWarning) - for warning in w - ), - ) - - def test_error_with_both_fsdp_and_parallelism_config(self): - with self.assertRaises(ValueError) as context: - evaluate_model( - **self.common_args, - parallelism_config={'some_arg': True}, - fsdp_config={'some_arg': True}, - ) - - self.assertIn( - 'Both fsdp_config and parallelism_config cannot be provided at the same time.', - str(context.exception), - ) From a4f6a8ed720fed58e5ecabf930da49c886a38d03 Mon Sep 17 00:00:00 2001 From: Irene Dea Date: Fri, 11 Oct 2024 20:10:55 +0000 Subject: [PATCH 05/10] fix --- llmfoundry/command_utils/eval.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llmfoundry/command_utils/eval.py b/llmfoundry/command_utils/eval.py index 0181ff8ab1..14874bc16e 100644 --- a/llmfoundry/command_utils/eval.py +++ b/llmfoundry/command_utils/eval.py @@ -112,7 +112,7 @@ def evaluate_model( fsdp_config = parallelism_config.get( 'fsdp_config', None, - ) + ) if parallelism_config else None if fsdp_config and model.get('load_in_8bit', False): raise ValueError( 'The FSDP config block is not supported when loading ' + From df8f451f2f62fba3ded46189cddeb489378c6994 Mon Sep 17 00:00:00 2001 From: Irene Dea Date: Fri, 11 Oct 2024 20:20:18 +0000 Subject: [PATCH 06/10] fix --- llmfoundry/command_utils/eval.py | 1 - 1 file changed, 1 deletion(-) diff --git a/llmfoundry/command_utils/eval.py b/llmfoundry/command_utils/eval.py index 14874bc16e..f160da9d4b 100644 --- a/llmfoundry/command_utils/eval.py +++ b/llmfoundry/command_utils/eval.py @@ -330,7 +330,6 @@ def evaluate(cfg: DictConfig) -> tuple[list[Trainer], pd.DataFrame]: device_eval_batch_size=eval_config.device_eval_batch_size, eval_gauntlet_config=eval_gauntlet_config, eval_loader_config=eval_loader_config, - fsdp_config=fsdp_config, loggers=loggers, python_log_level=eval_config.python_log_level, precision=eval_config.precision, From d384b10636b14ee03c068a7a9006b1faab939a91 Mon Sep 17 00:00:00 2001 From: Irene Dea Date: Fri, 11 Oct 2024 23:43:47 +0000 Subject: [PATCH 07/10] fix --- llmfoundry/command_utils/eval.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/llmfoundry/command_utils/eval.py b/llmfoundry/command_utils/eval.py index f160da9d4b..e94949b320 100644 --- a/llmfoundry/command_utils/eval.py +++ b/llmfoundry/command_utils/eval.py @@ -253,8 +253,6 @@ def evaluate(cfg: DictConfig) -> tuple[list[Trainer], pd.DataFrame]: model_configs = eval_config.models eval_gauntlet_config = eval_config.eval_gauntlet or eval_config.eval_gauntlet_str - fsdp_config = eval_config.fsdp_config - # Mandatory Evaluation Parameters icl_tasks = eval_config.icl_tasks or eval_config.icl_tasks_str if icl_tasks is None: From 0f27850992656a26f628200538721624a87559f6 Mon Sep 17 00:00:00 2001 From: Irene Dea Date: Mon, 14 Oct 2024 16:56:55 +0000 Subject: [PATCH 08/10] fix --- tests/models/test_model.py | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/tests/models/test_model.py b/tests/models/test_model.py index e2ddb0a012..43067f5e47 100644 --- a/tests/models/test_model.py +++ b/tests/models/test_model.py @@ -15,7 +15,10 @@ from accelerate import init_empty_weights from composer.core.precision import Precision, get_precision_context from composer.distributed.dist_strategy import prepare_fsdp_module -from composer.models.huggingface import maybe_get_underlying_model +from composer.models.huggingface import ( + HuggingFaceModel, + maybe_get_underlying_model, +) from composer.optim import DecoupledAdamW from composer.utils import ( FSDPConfig, @@ -39,7 +42,6 @@ from llmfoundry import ComposerHFCausalLM from llmfoundry.layers_registry import norms -from llmfoundry.models.hf.hf_base import BaseHuggingFaceModel from llmfoundry.models.layers import build_alibi_bias from llmfoundry.models.layers.attention import ( check_alibi_support, @@ -2560,7 +2562,7 @@ def test_hf_init( False, ) - model = BaseHuggingFaceModel(model, tokenizer) + model = HuggingFaceModel(model, tokenizer) batch = gen_random_batch(batch_size, test_cfg) @@ -2609,7 +2611,7 @@ def test_head_dim_8_flash_mqa_attn(batch_size: int = 2): mpt = MPTForCausalLM(hf_config) - model = BaseHuggingFaceModel(mpt, tokenizer, shift_labels=True) + model = HuggingFaceModel(mpt, tokenizer, shift_labels=True) model = model.to(test_cfg.device) batch = gen_random_batch(batch_size, test_cfg) From e08d25cddc132e99a2f1004e1b45ffadfda39047 Mon Sep 17 00:00:00 2001 From: Irene Dea Date: Mon, 14 Oct 2024 20:03:03 +0000 Subject: [PATCH 09/10] pass fsdp config through via parallelism_config --- llmfoundry/command_utils/eval.py | 1 + 1 file changed, 1 insertion(+) diff --git a/llmfoundry/command_utils/eval.py b/llmfoundry/command_utils/eval.py index e94949b320..953daa8105 100644 --- a/llmfoundry/command_utils/eval.py +++ b/llmfoundry/command_utils/eval.py @@ -330,6 +330,7 @@ def evaluate(cfg: DictConfig) -> tuple[list[Trainer], pd.DataFrame]: eval_loader_config=eval_loader_config, loggers=loggers, python_log_level=eval_config.python_log_level, + parallelism_config={'fsdp': eval_config.fsdp_config}, precision=eval_config.precision, eval_gauntlet_df=eval_gauntlet_df, callback_configs=eval_config.callbacks, From c5c6c753f9a3130181a97d54825f44919af5f242 Mon Sep 17 00:00:00 2001 From: Irene Dea Date: Mon, 14 Oct 2024 20:12:47 +0000 Subject: [PATCH 10/10] pass parallelism config directly --- llmfoundry/command_utils/eval.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llmfoundry/command_utils/eval.py b/llmfoundry/command_utils/eval.py index 953daa8105..0678925a84 100644 --- a/llmfoundry/command_utils/eval.py +++ b/llmfoundry/command_utils/eval.py @@ -160,7 +160,7 @@ def evaluate_model( callbacks=callbacks, loggers=loggers, precision=precision, - parallelism_config={'fsdp': fsdp_config}, + parallelism_config=parallelism_config, load_path=load_path, load_weights_only=True, progress_bar=False,