From 7cb1c084bcc0d2f013b032272887cd28c4c499cc Mon Sep 17 00:00:00 2001 From: Daniel King Date: Tue, 24 Sep 2024 19:32:35 -0700 Subject: [PATCH 1/7] bump version to 0.13.0.dev0 --- llmfoundry/_version.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llmfoundry/_version.py b/llmfoundry/_version.py index 2f1f590b19..0cddcaf967 100644 --- a/llmfoundry/_version.py +++ b/llmfoundry/_version.py @@ -3,4 +3,4 @@ """The LLM Foundry Version.""" -__version__ = '0.12.0.dev0' +__version__ = '0.13.0.dev0' From 6858db9d3988e35c54275f5bab44404ca95744a3 Mon Sep 17 00:00:00 2001 From: Daniel King Date: Tue, 24 Sep 2024 19:32:44 -0700 Subject: [PATCH 2/7] deprecations --- llmfoundry/command_utils/eval.py | 2 +- llmfoundry/models/hf/__init__.py | 2 - llmfoundry/models/hf/model_wrapper.py | 103 -------------------------- tests/models/test_model.py | 6 +- 4 files changed, 4 insertions(+), 109 deletions(-) delete mode 100644 llmfoundry/models/hf/model_wrapper.py diff --git a/llmfoundry/command_utils/eval.py b/llmfoundry/command_utils/eval.py index 70c4319ea8..73127e8a07 100644 --- a/llmfoundry/command_utils/eval.py +++ b/llmfoundry/command_utils/eval.py @@ -82,7 +82,7 @@ def evaluate_model( warnings.warn( VersionedDeprecationWarning( 'The argument fsdp_config is deprecated. Please use parallelism_config instead.', - remove_version='0.13.0', + remove_version='0.14.0', ), ) if fsdp_config and parallelism_config: diff --git a/llmfoundry/models/hf/__init__.py b/llmfoundry/models/hf/__init__.py index 2f25f92940..03df90e8cd 100644 --- a/llmfoundry/models/hf/__init__.py +++ b/llmfoundry/models/hf/__init__.py @@ -9,7 +9,6 @@ prepare_hf_model_for_fsdp, ) from llmfoundry.models.hf.hf_t5 import ComposerHFT5 -from llmfoundry.models.hf.model_wrapper import HuggingFaceModelWithFSDP __all__ = [ 'BaseHuggingFaceModel', @@ -18,5 +17,4 @@ 'prepare_hf_causal_lm_model_for_fsdp', 'prepare_hf_enc_dec_model_for_fsdp', 'prepare_hf_model_for_fsdp', - 'HuggingFaceModelWithFSDP', ] diff --git a/llmfoundry/models/hf/model_wrapper.py b/llmfoundry/models/hf/model_wrapper.py deleted file mode 100644 index c8805e5d6d..0000000000 --- a/llmfoundry/models/hf/model_wrapper.py +++ /dev/null @@ -1,103 +0,0 @@ -# Copyright 2022 MosaicML LLM Foundry authors -# SPDX-License-Identifier: Apache-2.0 - -"""Re-usable :class:`.ComposerModel` for LLM HF Models.""" - -from __future__ import annotations - -import warnings -from collections import UserDict -from typing import TYPE_CHECKING, Mapping, Optional, Union - -import transformers -from composer.models.huggingface import HuggingFaceModel -from torchmetrics import Metric -from transformers import PreTrainedTokenizerBase -from transformers.utils.generic import ModelOutput - -from llmfoundry.models.hf.hf_fsdp import prepare_hf_model_for_fsdp -from llmfoundry.utils.warnings import VersionedDeprecationWarning - -if TYPE_CHECKING: - from peft import PeftConfig, PeftModel - -__all__ = ['HuggingFaceModelWithFSDP'] - -# HuggingFace hardcodes the ignore index to -100 -_HF_IGNORE_INDEX = -100 - - -class HuggingFaceModelWithFSDP(HuggingFaceModel): - """Wrapper around HuggingFaceModel. - - Handles preparation for FSDP wrapping. - """ - - def __init__( - self, - model: Union[transformers.PreTrainedModel, 'PeftModel'], - tokenizer: Optional[PreTrainedTokenizerBase] = None, - metrics: Optional[list[Metric]] = None, - eval_metrics: Optional[list[Metric]] = None, - shift_labels: bool = False, - allow_embedding_resizing: bool = False, - init_device: Optional[str] = None, - peft_config: Optional['PeftConfig'] = None, - should_save_peft_only: bool = True, - ): - warnings.warn( - VersionedDeprecationWarning( - '`HuggingFaceModelWithFSDP` is deprecated. In the future please use `BaseHuggingFaceModel`.', - remove_version='0.13.0', - ), - ) - super().__init__( - model, - tokenizer, - use_logits=True, - metrics=metrics, - eval_metrics=eval_metrics, - shift_labels=shift_labels, - allow_embedding_resizing=allow_embedding_resizing, - peft_config=peft_config, - should_save_peft_only=should_save_peft_only, - ) - - self.prepare_inner_model(self.model, init_device) - - def forward(self, batch: Mapping): - if isinstance(batch, dict) or isinstance(batch, UserDict): - # Further input validation is left to the huggingface forward call - batch = { - k: v for k, v in batch.items() if k in self.model_forward_args - } - output = self.model(**batch) # type: ignore (thirdparty) - else: - raise ValueError( - 'Unexpected batch type. Expected a dictionary with keys corresponding to the inputs to the forward function of the Huggingface model', - ) - return output - - def loss(self, outputs: ModelOutput, batch: Mapping): - if self.config.use_return_dict: - return outputs['loss'] - # loss is at index 0 in the output tuple, logits are at index 1 - return outputs[:2] - - @staticmethod - def prepare_inner_model( - model: Union[transformers.PreTrainedModel, 'PeftModel'], - init_device: Optional[str] = None, - ): - """Prepare the inner model for FSDP wrapping. - - Args: - model: The model to prepare. - init_device: The device to initialize the model on. - """ - # Note: We need to add the FSDP related attributes to the model AFTER the super init, - # so that the (possible) embedding resizing doesn't destroy them - prepare_hf_model_for_fsdp(model, init_device) - - # This provides support for meta initialization when using FSDP - model.param_init_fn = lambda module: model._init_weights(module) diff --git a/tests/models/test_model.py b/tests/models/test_model.py index eeb6bf0d90..92effffdd8 100644 --- a/tests/models/test_model.py +++ b/tests/models/test_model.py @@ -39,7 +39,7 @@ from llmfoundry import ComposerHFCausalLM from llmfoundry.layers_registry import norms -from llmfoundry.models.hf.model_wrapper import HuggingFaceModelWithFSDP +from llmfoundry.models.hf import BaseHuggingFaceModel from llmfoundry.models.layers import build_alibi_bias from llmfoundry.models.layers.attention import ( check_alibi_support, @@ -2560,7 +2560,7 @@ def test_hf_init( False, ) - model = HuggingFaceModelWithFSDP(model, tokenizer) + model = BaseHuggingFaceModel(model, tokenizer) batch = gen_random_batch(batch_size, test_cfg) @@ -2609,7 +2609,7 @@ def test_head_dim_8_flash_mqa_attn(batch_size: int = 2): mpt = MPTForCausalLM(hf_config) - model = HuggingFaceModelWithFSDP(mpt, tokenizer, shift_labels=True) + model = BaseHuggingFaceModel(mpt, tokenizer, shift_labels=True) model = model.to(test_cfg.device) batch = gen_random_batch(batch_size, test_cfg) From 259cc76c20e11ce9f7b525173db7dafebfb035e3 Mon Sep 17 00:00:00 2001 From: Daniel King Date: Tue, 24 Sep 2024 20:36:19 -0700 Subject: [PATCH 3/7] fix? --- tests/models/test_model.py | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/tests/models/test_model.py b/tests/models/test_model.py index 92effffdd8..16f02d63e2 100644 --- a/tests/models/test_model.py +++ b/tests/models/test_model.py @@ -2551,8 +2551,10 @@ def test_hf_init( betas=(0.9, 0.99), ) + model = BaseHuggingFaceModel(str(save_path), tokenizer) + prepare_fsdp_module( - model, + model.model, optimizer, FSDPConfig(**fsdp_config), precision, @@ -2560,8 +2562,6 @@ def test_hf_init( False, ) - model = BaseHuggingFaceModel(model, tokenizer) - batch = gen_random_batch(batch_size, test_cfg) original_params = next(model.parameters()).clone().data @@ -2579,7 +2579,10 @@ def test_hf_init( @pytest.mark.gpu -def test_head_dim_8_flash_mqa_attn(batch_size: int = 2): +def test_head_dim_8_flash_mqa_attn( + tmp_path: pathlib.Path, + batch_size: int = 2, +): test_cfg = get_config(conf_path='scripts/train/yamls/pretrain/testing.yaml') test_cfg.device = torch.cuda.current_device() @@ -2608,8 +2611,9 @@ def test_head_dim_8_flash_mqa_attn(batch_size: int = 2): ) mpt = MPTForCausalLM(hf_config) + mpt.save_pretrained(tmp_path) - model = BaseHuggingFaceModel(mpt, tokenizer, shift_labels=True) + model = BaseHuggingFaceModel(str(tmp_path), tokenizer, shift_labels=True) model = model.to(test_cfg.device) batch = gen_random_batch(batch_size, test_cfg) From e27bb7b58646dc0b4b0145e20b4efb4c76353df3 Mon Sep 17 00:00:00 2001 From: Daniel King Date: Tue, 24 Sep 2024 21:21:17 -0700 Subject: [PATCH 4/7] Revert "fix?" This reverts commit 259cc76c20e11ce9f7b525173db7dafebfb035e3. --- tests/models/test_model.py | 14 +++++--------- 1 file changed, 5 insertions(+), 9 deletions(-) diff --git a/tests/models/test_model.py b/tests/models/test_model.py index 16f02d63e2..92effffdd8 100644 --- a/tests/models/test_model.py +++ b/tests/models/test_model.py @@ -2551,10 +2551,8 @@ def test_hf_init( betas=(0.9, 0.99), ) - model = BaseHuggingFaceModel(str(save_path), tokenizer) - prepare_fsdp_module( - model.model, + model, optimizer, FSDPConfig(**fsdp_config), precision, @@ -2562,6 +2560,8 @@ def test_hf_init( False, ) + model = BaseHuggingFaceModel(model, tokenizer) + batch = gen_random_batch(batch_size, test_cfg) original_params = next(model.parameters()).clone().data @@ -2579,10 +2579,7 @@ def test_hf_init( @pytest.mark.gpu -def test_head_dim_8_flash_mqa_attn( - tmp_path: pathlib.Path, - batch_size: int = 2, -): +def test_head_dim_8_flash_mqa_attn(batch_size: int = 2): test_cfg = get_config(conf_path='scripts/train/yamls/pretrain/testing.yaml') test_cfg.device = torch.cuda.current_device() @@ -2611,9 +2608,8 @@ def test_head_dim_8_flash_mqa_attn( ) mpt = MPTForCausalLM(hf_config) - mpt.save_pretrained(tmp_path) - model = BaseHuggingFaceModel(str(tmp_path), tokenizer, shift_labels=True) + model = BaseHuggingFaceModel(mpt, tokenizer, shift_labels=True) model = model.to(test_cfg.device) batch = gen_random_batch(batch_size, test_cfg) From 3b316c43bbb56eafcc301e59d9947bee58c8dc4a Mon Sep 17 00:00:00 2001 From: Daniel King Date: Tue, 24 Sep 2024 21:21:27 -0700 Subject: [PATCH 5/7] Revert "deprecations" This reverts commit 6858db9d3988e35c54275f5bab44404ca95744a3. --- llmfoundry/command_utils/eval.py | 2 +- llmfoundry/models/hf/__init__.py | 2 + llmfoundry/models/hf/model_wrapper.py | 103 ++++++++++++++++++++++++++ tests/models/test_model.py | 6 +- 4 files changed, 109 insertions(+), 4 deletions(-) create mode 100644 llmfoundry/models/hf/model_wrapper.py diff --git a/llmfoundry/command_utils/eval.py b/llmfoundry/command_utils/eval.py index 73127e8a07..70c4319ea8 100644 --- a/llmfoundry/command_utils/eval.py +++ b/llmfoundry/command_utils/eval.py @@ -82,7 +82,7 @@ def evaluate_model( warnings.warn( VersionedDeprecationWarning( 'The argument fsdp_config is deprecated. Please use parallelism_config instead.', - remove_version='0.14.0', + remove_version='0.13.0', ), ) if fsdp_config and parallelism_config: diff --git a/llmfoundry/models/hf/__init__.py b/llmfoundry/models/hf/__init__.py index 03df90e8cd..2f25f92940 100644 --- a/llmfoundry/models/hf/__init__.py +++ b/llmfoundry/models/hf/__init__.py @@ -9,6 +9,7 @@ prepare_hf_model_for_fsdp, ) from llmfoundry.models.hf.hf_t5 import ComposerHFT5 +from llmfoundry.models.hf.model_wrapper import HuggingFaceModelWithFSDP __all__ = [ 'BaseHuggingFaceModel', @@ -17,4 +18,5 @@ 'prepare_hf_causal_lm_model_for_fsdp', 'prepare_hf_enc_dec_model_for_fsdp', 'prepare_hf_model_for_fsdp', + 'HuggingFaceModelWithFSDP', ] diff --git a/llmfoundry/models/hf/model_wrapper.py b/llmfoundry/models/hf/model_wrapper.py new file mode 100644 index 0000000000..c8805e5d6d --- /dev/null +++ b/llmfoundry/models/hf/model_wrapper.py @@ -0,0 +1,103 @@ +# Copyright 2022 MosaicML LLM Foundry authors +# SPDX-License-Identifier: Apache-2.0 + +"""Re-usable :class:`.ComposerModel` for LLM HF Models.""" + +from __future__ import annotations + +import warnings +from collections import UserDict +from typing import TYPE_CHECKING, Mapping, Optional, Union + +import transformers +from composer.models.huggingface import HuggingFaceModel +from torchmetrics import Metric +from transformers import PreTrainedTokenizerBase +from transformers.utils.generic import ModelOutput + +from llmfoundry.models.hf.hf_fsdp import prepare_hf_model_for_fsdp +from llmfoundry.utils.warnings import VersionedDeprecationWarning + +if TYPE_CHECKING: + from peft import PeftConfig, PeftModel + +__all__ = ['HuggingFaceModelWithFSDP'] + +# HuggingFace hardcodes the ignore index to -100 +_HF_IGNORE_INDEX = -100 + + +class HuggingFaceModelWithFSDP(HuggingFaceModel): + """Wrapper around HuggingFaceModel. + + Handles preparation for FSDP wrapping. + """ + + def __init__( + self, + model: Union[transformers.PreTrainedModel, 'PeftModel'], + tokenizer: Optional[PreTrainedTokenizerBase] = None, + metrics: Optional[list[Metric]] = None, + eval_metrics: Optional[list[Metric]] = None, + shift_labels: bool = False, + allow_embedding_resizing: bool = False, + init_device: Optional[str] = None, + peft_config: Optional['PeftConfig'] = None, + should_save_peft_only: bool = True, + ): + warnings.warn( + VersionedDeprecationWarning( + '`HuggingFaceModelWithFSDP` is deprecated. In the future please use `BaseHuggingFaceModel`.', + remove_version='0.13.0', + ), + ) + super().__init__( + model, + tokenizer, + use_logits=True, + metrics=metrics, + eval_metrics=eval_metrics, + shift_labels=shift_labels, + allow_embedding_resizing=allow_embedding_resizing, + peft_config=peft_config, + should_save_peft_only=should_save_peft_only, + ) + + self.prepare_inner_model(self.model, init_device) + + def forward(self, batch: Mapping): + if isinstance(batch, dict) or isinstance(batch, UserDict): + # Further input validation is left to the huggingface forward call + batch = { + k: v for k, v in batch.items() if k in self.model_forward_args + } + output = self.model(**batch) # type: ignore (thirdparty) + else: + raise ValueError( + 'Unexpected batch type. Expected a dictionary with keys corresponding to the inputs to the forward function of the Huggingface model', + ) + return output + + def loss(self, outputs: ModelOutput, batch: Mapping): + if self.config.use_return_dict: + return outputs['loss'] + # loss is at index 0 in the output tuple, logits are at index 1 + return outputs[:2] + + @staticmethod + def prepare_inner_model( + model: Union[transformers.PreTrainedModel, 'PeftModel'], + init_device: Optional[str] = None, + ): + """Prepare the inner model for FSDP wrapping. + + Args: + model: The model to prepare. + init_device: The device to initialize the model on. + """ + # Note: We need to add the FSDP related attributes to the model AFTER the super init, + # so that the (possible) embedding resizing doesn't destroy them + prepare_hf_model_for_fsdp(model, init_device) + + # This provides support for meta initialization when using FSDP + model.param_init_fn = lambda module: model._init_weights(module) diff --git a/tests/models/test_model.py b/tests/models/test_model.py index 92effffdd8..eeb6bf0d90 100644 --- a/tests/models/test_model.py +++ b/tests/models/test_model.py @@ -39,7 +39,7 @@ from llmfoundry import ComposerHFCausalLM from llmfoundry.layers_registry import norms -from llmfoundry.models.hf import BaseHuggingFaceModel +from llmfoundry.models.hf.model_wrapper import HuggingFaceModelWithFSDP from llmfoundry.models.layers import build_alibi_bias from llmfoundry.models.layers.attention import ( check_alibi_support, @@ -2560,7 +2560,7 @@ def test_hf_init( False, ) - model = BaseHuggingFaceModel(model, tokenizer) + model = HuggingFaceModelWithFSDP(model, tokenizer) batch = gen_random_batch(batch_size, test_cfg) @@ -2609,7 +2609,7 @@ def test_head_dim_8_flash_mqa_attn(batch_size: int = 2): mpt = MPTForCausalLM(hf_config) - model = BaseHuggingFaceModel(mpt, tokenizer, shift_labels=True) + model = HuggingFaceModelWithFSDP(mpt, tokenizer, shift_labels=True) model = model.to(test_cfg.device) batch = gen_random_batch(batch_size, test_cfg) From ccf135cac25101ac548341d26a7bd3c743c482d5 Mon Sep 17 00:00:00 2001 From: Daniel King Date: Tue, 24 Sep 2024 21:22:00 -0700 Subject: [PATCH 6/7] bump dep version --- llmfoundry/models/hf/model_wrapper.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llmfoundry/models/hf/model_wrapper.py b/llmfoundry/models/hf/model_wrapper.py index c8805e5d6d..f2b67db1ec 100644 --- a/llmfoundry/models/hf/model_wrapper.py +++ b/llmfoundry/models/hf/model_wrapper.py @@ -48,7 +48,7 @@ def __init__( warnings.warn( VersionedDeprecationWarning( '`HuggingFaceModelWithFSDP` is deprecated. In the future please use `BaseHuggingFaceModel`.', - remove_version='0.13.0', + remove_version='0.14.0', ), ) super().__init__( From fef1726085781dd7ff1438ea961398f59e2c2dbd Mon Sep 17 00:00:00 2001 From: Daniel King Date: Tue, 24 Sep 2024 21:23:09 -0700 Subject: [PATCH 7/7] one more --- llmfoundry/command_utils/eval.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llmfoundry/command_utils/eval.py b/llmfoundry/command_utils/eval.py index 70c4319ea8..73127e8a07 100644 --- a/llmfoundry/command_utils/eval.py +++ b/llmfoundry/command_utils/eval.py @@ -82,7 +82,7 @@ def evaluate_model( warnings.warn( VersionedDeprecationWarning( 'The argument fsdp_config is deprecated. Please use parallelism_config instead.', - remove_version='0.13.0', + remove_version='0.14.0', ), ) if fsdp_config and parallelism_config: