From 0890d243f9c75ad8b69e18622fa7e307a06d8733 Mon Sep 17 00:00:00 2001
From: Irene Dea <deaairene@gmail.com>
Date: Fri, 11 Oct 2024 18:44:31 +0000
Subject: [PATCH 01/10] Bump version to 0.14.0.dev0.

---
 llmfoundry/_version.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/llmfoundry/_version.py b/llmfoundry/_version.py
index 0cddcaf967..e6385a53a7 100644
--- a/llmfoundry/_version.py
+++ b/llmfoundry/_version.py
@@ -3,4 +3,4 @@
 
 """The LLM Foundry Version."""
 
-__version__ = '0.13.0.dev0'
+__version__ = '0.14.0.dev0'

From 4f29dd8d71814b03585eea1fb843742b9030bb36 Mon Sep 17 00:00:00 2001
From: Irene Dea <deaairene@gmail.com>
Date: Fri, 11 Oct 2024 19:18:05 +0000
Subject: [PATCH 02/10] Remove deprecated fsdp_config

---
 llmfoundry/command_utils/eval.py | 14 --------------
 1 file changed, 14 deletions(-)

diff --git a/llmfoundry/command_utils/eval.py b/llmfoundry/command_utils/eval.py
index 73127e8a07..d0471b9ee6 100644
--- a/llmfoundry/command_utils/eval.py
+++ b/llmfoundry/command_utils/eval.py
@@ -4,7 +4,6 @@
 import logging
 import os
 import time
-import warnings
 from typing import Any, Optional, Union
 
 import pandas as pd
@@ -63,7 +62,6 @@ def evaluate_model(
     callback_configs: Optional[dict[str, Any]],
     metadata: Optional[dict[str, str]],
     logged_config: dict[str, Any],
-    fsdp_config: Optional[dict[str, Any]] = None,
     parallelism_config: Optional[dict[str, Any]] = None,
     should_log_config: bool = True,
     load_path: Optional[str] = None,
@@ -78,18 +76,6 @@ def evaluate_model(
                     'parallelism_config cannot contain deprecated fsdp_config arguments.',
                 )
 
-    if fsdp_config:
-        warnings.warn(
-            VersionedDeprecationWarning(
-                'The argument fsdp_config is deprecated. Please use parallelism_config instead.',
-                remove_version='0.14.0',
-            ),
-        )
-    if fsdp_config and parallelism_config:
-        raise ValueError(
-            'Both fsdp_config and parallelism_config cannot be provided at the same time. Please use parallelism_config.',
-        )
-
     log.info(f'Evaluating model: {model_name}')
     # Build tokenizer and model
     tokenizer_cfg = tokenizer

From f6f1c57caca37aa2c03ff580eead0eca4f90c119 Mon Sep 17 00:00:00 2001
From: Irene Dea <deaairene@gmail.com>
Date: Fri, 11 Oct 2024 19:20:39 +0000
Subject: [PATCH 03/10] Remove deprecated HuggingFaceModelWithFSDP

---
 llmfoundry/models/hf/__init__.py      |   2 -
 llmfoundry/models/hf/model_wrapper.py | 103 --------------------------
 tests/models/test_model.py            |   6 +-
 3 files changed, 3 insertions(+), 108 deletions(-)
 delete mode 100644 llmfoundry/models/hf/model_wrapper.py

diff --git a/llmfoundry/models/hf/__init__.py b/llmfoundry/models/hf/__init__.py
index 2f25f92940..03df90e8cd 100644
--- a/llmfoundry/models/hf/__init__.py
+++ b/llmfoundry/models/hf/__init__.py
@@ -9,7 +9,6 @@
     prepare_hf_model_for_fsdp,
 )
 from llmfoundry.models.hf.hf_t5 import ComposerHFT5
-from llmfoundry.models.hf.model_wrapper import HuggingFaceModelWithFSDP
 
 __all__ = [
     'BaseHuggingFaceModel',
@@ -18,5 +17,4 @@
     'prepare_hf_causal_lm_model_for_fsdp',
     'prepare_hf_enc_dec_model_for_fsdp',
     'prepare_hf_model_for_fsdp',
-    'HuggingFaceModelWithFSDP',
 ]
diff --git a/llmfoundry/models/hf/model_wrapper.py b/llmfoundry/models/hf/model_wrapper.py
deleted file mode 100644
index f2b67db1ec..0000000000
--- a/llmfoundry/models/hf/model_wrapper.py
+++ /dev/null
@@ -1,103 +0,0 @@
-# Copyright 2022 MosaicML LLM Foundry authors
-# SPDX-License-Identifier: Apache-2.0
-
-"""Re-usable :class:`.ComposerModel` for LLM HF Models."""
-
-from __future__ import annotations
-
-import warnings
-from collections import UserDict
-from typing import TYPE_CHECKING, Mapping, Optional, Union
-
-import transformers
-from composer.models.huggingface import HuggingFaceModel
-from torchmetrics import Metric
-from transformers import PreTrainedTokenizerBase
-from transformers.utils.generic import ModelOutput
-
-from llmfoundry.models.hf.hf_fsdp import prepare_hf_model_for_fsdp
-from llmfoundry.utils.warnings import VersionedDeprecationWarning
-
-if TYPE_CHECKING:
-    from peft import PeftConfig, PeftModel
-
-__all__ = ['HuggingFaceModelWithFSDP']
-
-# HuggingFace hardcodes the ignore index to -100
-_HF_IGNORE_INDEX = -100
-
-
-class HuggingFaceModelWithFSDP(HuggingFaceModel):
-    """Wrapper around HuggingFaceModel.
-
-    Handles preparation for FSDP wrapping.
-    """
-
-    def __init__(
-        self,
-        model: Union[transformers.PreTrainedModel, 'PeftModel'],
-        tokenizer: Optional[PreTrainedTokenizerBase] = None,
-        metrics: Optional[list[Metric]] = None,
-        eval_metrics: Optional[list[Metric]] = None,
-        shift_labels: bool = False,
-        allow_embedding_resizing: bool = False,
-        init_device: Optional[str] = None,
-        peft_config: Optional['PeftConfig'] = None,
-        should_save_peft_only: bool = True,
-    ):
-        warnings.warn(
-            VersionedDeprecationWarning(
-                '`HuggingFaceModelWithFSDP` is deprecated. In the future please use `BaseHuggingFaceModel`.',
-                remove_version='0.14.0',
-            ),
-        )
-        super().__init__(
-            model,
-            tokenizer,
-            use_logits=True,
-            metrics=metrics,
-            eval_metrics=eval_metrics,
-            shift_labels=shift_labels,
-            allow_embedding_resizing=allow_embedding_resizing,
-            peft_config=peft_config,
-            should_save_peft_only=should_save_peft_only,
-        )
-
-        self.prepare_inner_model(self.model, init_device)
-
-    def forward(self, batch: Mapping):
-        if isinstance(batch, dict) or isinstance(batch, UserDict):
-            # Further input validation is left to the huggingface forward call
-            batch = {
-                k: v for k, v in batch.items() if k in self.model_forward_args
-            }
-            output = self.model(**batch)  # type: ignore (thirdparty)
-        else:
-            raise ValueError(
-                'Unexpected batch type. Expected a dictionary with keys corresponding to the inputs to the forward function of the Huggingface model',
-            )
-        return output
-
-    def loss(self, outputs: ModelOutput, batch: Mapping):
-        if self.config.use_return_dict:
-            return outputs['loss']
-        # loss is at index 0 in the output tuple, logits are at index 1
-        return outputs[:2]
-
-    @staticmethod
-    def prepare_inner_model(
-        model: Union[transformers.PreTrainedModel, 'PeftModel'],
-        init_device: Optional[str] = None,
-    ):
-        """Prepare the inner model for FSDP wrapping.
-
-        Args:
-            model: The model to prepare.
-            init_device: The device to initialize the model on.
-        """
-        # Note: We need to add the FSDP related attributes to the model AFTER the super init,
-        # so that the (possible) embedding resizing doesn't destroy them
-        prepare_hf_model_for_fsdp(model, init_device)
-
-        # This provides support for meta initialization when using FSDP
-        model.param_init_fn = lambda module: model._init_weights(module)
diff --git a/tests/models/test_model.py b/tests/models/test_model.py
index eeb6bf0d90..e2ddb0a012 100644
--- a/tests/models/test_model.py
+++ b/tests/models/test_model.py
@@ -39,7 +39,7 @@
 
 from llmfoundry import ComposerHFCausalLM
 from llmfoundry.layers_registry import norms
-from llmfoundry.models.hf.model_wrapper import HuggingFaceModelWithFSDP
+from llmfoundry.models.hf.hf_base import BaseHuggingFaceModel
 from llmfoundry.models.layers import build_alibi_bias
 from llmfoundry.models.layers.attention import (
     check_alibi_support,
@@ -2560,7 +2560,7 @@ def test_hf_init(
         False,
     )
 
-    model = HuggingFaceModelWithFSDP(model, tokenizer)
+    model = BaseHuggingFaceModel(model, tokenizer)
 
     batch = gen_random_batch(batch_size, test_cfg)
 
@@ -2609,7 +2609,7 @@ def test_head_dim_8_flash_mqa_attn(batch_size: int = 2):
 
     mpt = MPTForCausalLM(hf_config)
 
-    model = HuggingFaceModelWithFSDP(mpt, tokenizer, shift_labels=True)
+    model = BaseHuggingFaceModel(mpt, tokenizer, shift_labels=True)
 
     model = model.to(test_cfg.device)
     batch = gen_random_batch(batch_size, test_cfg)

From 3849957c6b61b40974497ca9dac564bf182893df Mon Sep 17 00:00:00 2001
From: Irene Dea <deaairene@gmail.com>
Date: Fri, 11 Oct 2024 20:00:04 +0000
Subject: [PATCH 04/10] fix

---
 llmfoundry/command_utils/eval.py    |  3 +--
 tests/eval/test_eval_deprecation.py | 33 -----------------------------
 2 files changed, 1 insertion(+), 35 deletions(-)

diff --git a/llmfoundry/command_utils/eval.py b/llmfoundry/command_utils/eval.py
index d0471b9ee6..0181ff8ab1 100644
--- a/llmfoundry/command_utils/eval.py
+++ b/llmfoundry/command_utils/eval.py
@@ -36,7 +36,6 @@
     process_init_device,
 )
 from llmfoundry.utils.registry_utils import import_file
-from llmfoundry.utils.warnings import VersionedDeprecationWarning
 
 log = logging.getLogger(__name__)
 
@@ -113,7 +112,7 @@ def evaluate_model(
     fsdp_config = parallelism_config.get(
         'fsdp_config',
         None,
-    ) if parallelism_config else fsdp_config
+    )
     if fsdp_config and model.get('load_in_8bit', False):
         raise ValueError(
             'The FSDP config block is not supported when loading ' +
diff --git a/tests/eval/test_eval_deprecation.py b/tests/eval/test_eval_deprecation.py
index 828186245a..e6b64cab05 100644
--- a/tests/eval/test_eval_deprecation.py
+++ b/tests/eval/test_eval_deprecation.py
@@ -90,36 +90,3 @@ def test_deprecation_warning_with_deprecated_arg(self):
             'parallelism_config cannot contain deprecated fsdp_config arguments.',
             str(context.exception),
         )
-
-    def test_deprecation_warning_with_fsdp_config(self):
-        with warnings.catch_warnings(record=True) as w:
-            warnings.simplefilter('always')
-
-            try:
-                evaluate_model(
-                    **self.common_args,
-                    parallelism_config=None,
-                    fsdp_config={'verbose': True},
-                )
-            except Exception:
-                pass
-
-            self.assertTrue(
-                any(
-                    issubclass(warning.category, VersionedDeprecationWarning)
-                    for warning in w
-                ),
-            )
-
-    def test_error_with_both_fsdp_and_parallelism_config(self):
-        with self.assertRaises(ValueError) as context:
-            evaluate_model(
-                **self.common_args,
-                parallelism_config={'some_arg': True},
-                fsdp_config={'some_arg': True},
-            )
-
-        self.assertIn(
-            'Both fsdp_config and parallelism_config cannot be provided at the same time.',
-            str(context.exception),
-        )

From a4f6a8ed720fed58e5ecabf930da49c886a38d03 Mon Sep 17 00:00:00 2001
From: Irene Dea <deaairene@gmail.com>
Date: Fri, 11 Oct 2024 20:10:55 +0000
Subject: [PATCH 05/10] fix

---
 llmfoundry/command_utils/eval.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/llmfoundry/command_utils/eval.py b/llmfoundry/command_utils/eval.py
index 0181ff8ab1..14874bc16e 100644
--- a/llmfoundry/command_utils/eval.py
+++ b/llmfoundry/command_utils/eval.py
@@ -112,7 +112,7 @@ def evaluate_model(
     fsdp_config = parallelism_config.get(
         'fsdp_config',
         None,
-    )
+    ) if parallelism_config else None
     if fsdp_config and model.get('load_in_8bit', False):
         raise ValueError(
             'The FSDP config block is not supported when loading ' +

From df8f451f2f62fba3ded46189cddeb489378c6994 Mon Sep 17 00:00:00 2001
From: Irene Dea <deaairene@gmail.com>
Date: Fri, 11 Oct 2024 20:20:18 +0000
Subject: [PATCH 06/10] fix

---
 llmfoundry/command_utils/eval.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/llmfoundry/command_utils/eval.py b/llmfoundry/command_utils/eval.py
index 14874bc16e..f160da9d4b 100644
--- a/llmfoundry/command_utils/eval.py
+++ b/llmfoundry/command_utils/eval.py
@@ -330,7 +330,6 @@ def evaluate(cfg: DictConfig) -> tuple[list[Trainer], pd.DataFrame]:
              device_eval_batch_size=eval_config.device_eval_batch_size,
              eval_gauntlet_config=eval_gauntlet_config,
              eval_loader_config=eval_loader_config,
-             fsdp_config=fsdp_config,
              loggers=loggers,
              python_log_level=eval_config.python_log_level,
              precision=eval_config.precision,

From d384b10636b14ee03c068a7a9006b1faab939a91 Mon Sep 17 00:00:00 2001
From: Irene Dea <deaairene@gmail.com>
Date: Fri, 11 Oct 2024 23:43:47 +0000
Subject: [PATCH 07/10] fix

---
 llmfoundry/command_utils/eval.py | 2 --
 1 file changed, 2 deletions(-)

diff --git a/llmfoundry/command_utils/eval.py b/llmfoundry/command_utils/eval.py
index f160da9d4b..e94949b320 100644
--- a/llmfoundry/command_utils/eval.py
+++ b/llmfoundry/command_utils/eval.py
@@ -253,8 +253,6 @@ def evaluate(cfg: DictConfig) -> tuple[list[Trainer], pd.DataFrame]:
     model_configs = eval_config.models
     eval_gauntlet_config = eval_config.eval_gauntlet or eval_config.eval_gauntlet_str
 
-    fsdp_config = eval_config.fsdp_config
-
     # Mandatory Evaluation Parameters
     icl_tasks = eval_config.icl_tasks or eval_config.icl_tasks_str
     if icl_tasks is None:

From 0f27850992656a26f628200538721624a87559f6 Mon Sep 17 00:00:00 2001
From: Irene Dea <deaairene@gmail.com>
Date: Mon, 14 Oct 2024 16:56:55 +0000
Subject: [PATCH 08/10] fix

---
 tests/models/test_model.py | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/tests/models/test_model.py b/tests/models/test_model.py
index e2ddb0a012..43067f5e47 100644
--- a/tests/models/test_model.py
+++ b/tests/models/test_model.py
@@ -15,7 +15,10 @@
 from accelerate import init_empty_weights
 from composer.core.precision import Precision, get_precision_context
 from composer.distributed.dist_strategy import prepare_fsdp_module
-from composer.models.huggingface import maybe_get_underlying_model
+from composer.models.huggingface import (
+    HuggingFaceModel,
+    maybe_get_underlying_model,
+)
 from composer.optim import DecoupledAdamW
 from composer.utils import (
     FSDPConfig,
@@ -39,7 +42,6 @@
 
 from llmfoundry import ComposerHFCausalLM
 from llmfoundry.layers_registry import norms
-from llmfoundry.models.hf.hf_base import BaseHuggingFaceModel
 from llmfoundry.models.layers import build_alibi_bias
 from llmfoundry.models.layers.attention import (
     check_alibi_support,
@@ -2560,7 +2562,7 @@ def test_hf_init(
         False,
     )
 
-    model = BaseHuggingFaceModel(model, tokenizer)
+    model = HuggingFaceModel(model, tokenizer)
 
     batch = gen_random_batch(batch_size, test_cfg)
 
@@ -2609,7 +2611,7 @@ def test_head_dim_8_flash_mqa_attn(batch_size: int = 2):
 
     mpt = MPTForCausalLM(hf_config)
 
-    model = BaseHuggingFaceModel(mpt, tokenizer, shift_labels=True)
+    model = HuggingFaceModel(mpt, tokenizer, shift_labels=True)
 
     model = model.to(test_cfg.device)
     batch = gen_random_batch(batch_size, test_cfg)

From e08d25cddc132e99a2f1004e1b45ffadfda39047 Mon Sep 17 00:00:00 2001
From: Irene Dea <deaairene@gmail.com>
Date: Mon, 14 Oct 2024 20:03:03 +0000
Subject: [PATCH 09/10] pass fsdp config through via parallelism_config

---
 llmfoundry/command_utils/eval.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/llmfoundry/command_utils/eval.py b/llmfoundry/command_utils/eval.py
index e94949b320..953daa8105 100644
--- a/llmfoundry/command_utils/eval.py
+++ b/llmfoundry/command_utils/eval.py
@@ -330,6 +330,7 @@ def evaluate(cfg: DictConfig) -> tuple[list[Trainer], pd.DataFrame]:
              eval_loader_config=eval_loader_config,
              loggers=loggers,
              python_log_level=eval_config.python_log_level,
+             parallelism_config={'fsdp': eval_config.fsdp_config},
              precision=eval_config.precision,
              eval_gauntlet_df=eval_gauntlet_df,
              callback_configs=eval_config.callbacks,

From c5c6c753f9a3130181a97d54825f44919af5f242 Mon Sep 17 00:00:00 2001
From: Irene Dea <deaairene@gmail.com>
Date: Mon, 14 Oct 2024 20:12:47 +0000
Subject: [PATCH 10/10] pass parallelism config directly

---
 llmfoundry/command_utils/eval.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/llmfoundry/command_utils/eval.py b/llmfoundry/command_utils/eval.py
index 953daa8105..0678925a84 100644
--- a/llmfoundry/command_utils/eval.py
+++ b/llmfoundry/command_utils/eval.py
@@ -160,7 +160,7 @@ def evaluate_model(
         callbacks=callbacks,
         loggers=loggers,
         precision=precision,
-        parallelism_config={'fsdp': fsdp_config},
+        parallelism_config=parallelism_config,
         load_path=load_path,
         load_weights_only=True,
         progress_bar=False,