diff --git a/composer/_version.py b/composer/_version.py
index a38b61a722..72f97c6d90 100644
--- a/composer/_version.py
+++ b/composer/_version.py
@@ -3,4 +3,4 @@
 
 """The Composer Version."""
 
-__version__ = '0.24.0.dev0'
+__version__ = '0.24.0'
diff --git a/composer/callbacks/__init__.py b/composer/callbacks/__init__.py
index b876826e3c..16a50a31a9 100644
--- a/composer/callbacks/__init__.py
+++ b/composer/callbacks/__init__.py
@@ -9,7 +9,6 @@
 from composer.callbacks.activation_monitor import ActivationMonitor
 from composer.callbacks.checkpoint_saver import CheckpointSaver
 from composer.callbacks.early_stopper import EarlyStopper
-from composer.callbacks.eval_output_logging_callback import EvalOutputLogging
 from composer.callbacks.export_for_inference import ExportForInferenceCallback
 from composer.callbacks.free_outputs import FreeOutputs
 from composer.callbacks.generate import Generate
@@ -36,7 +35,6 @@
     'CheckpointSaver',
     'MLPerfCallback',
     'EarlyStopper',
-    'EvalOutputLogging',
     'ExportForInferenceCallback',
     'ThresholdStopper',
     'ImageVisualizer',
diff --git a/composer/callbacks/eval_output_logging_callback.py b/composer/callbacks/eval_output_logging_callback.py
deleted file mode 100644
index 717994413a..0000000000
--- a/composer/callbacks/eval_output_logging_callback.py
+++ /dev/null
@@ -1,129 +0,0 @@
-# Copyright 2022 MosaicML Composer authors
-# SPDX-License-Identifier: Apache-2.0
-
-"""Log model outputs and expected outputs during ICL evaluation."""
-
-import warnings
-from copy import deepcopy
-from typing import Any, Sequence, Union
-
-import torch
-
-from composer.core import Callback, State
-from composer.loggers import ConsoleLogger, Logger
-from composer.utils import VersionedDeprecationWarning, dist
-
-
-class EvalOutputLogging(Callback):
-    """Logs eval outputs for each sample of each ICL evaluation dataset.
-
-    ICL metrics are required to support caching the model's responses including information on whether model was correct.
-    Metrics are responsible for returning the results of individual data points in a dictionary of lists.
-    The callback will log the metric name, the depadded and detokenized input, any data stored in state.metric_outputs, and
-    any keys from the batch passed into `batch_keys_to_log`. It will do so after every eval batch.
-    """
-
-    def __init__(self, log_tokens=False, *args, **kwargs):
-        warnings.warn(
-            VersionedDeprecationWarning(
-                '`InContextLearningMetric` and it\'s subclasses have been deprecated and ' +
-                'migrated to MosaicML\'s llm-foundry repo under the llmfoundry.eval.datasets.in_context_learning module: '
-                + 'https://github.com/mosaicml/llm-foundry/blob/main/scripts/eval/README.md.' +
-                'As EvalOutputLogging only works for ICL metrics, it has been deprecated and ' +
-                'will be migrated as well.',
-                remove_version='0.24.0',
-            ),
-        )
-        super().__init__(self, *args, **kwargs)
-        self.log_tokens = log_tokens
-        self.columns = None
-        self.name = None
-        self.rows = []
-
-    def eval_batch_end(self, state: State, logger: Logger) -> None:
-        if not isinstance(state.batch, dict):
-            warnings.warn(
-                f'''EvalOutputLogging only supports batches that are dictionary. \
-                Found batch for type {type(state.batch)}. \
-                Not logging eval outputs.''',
-            )
-            return
-
-        assert state.outputs is not None
-        assert state.metric_outputs is not None
-        logging_dict: dict[str, Union[list[Any], torch.Tensor, Sequence[torch.Tensor]]] = deepcopy(state.metric_outputs)
-
-        # If batch mode is not generate, outputs will be logits
-        if state.batch['mode'] == 'generate':
-            # Outputs are already detokenized
-            logging_dict['outputs'] = state.outputs
-
-        input_ids = state.batch['input_ids']
-        logged_input = []
-        assert state.dataloader is not None
-
-        # Depad and decode input_ids
-        for input_list in input_ids.tolist():
-            dataset = state.dataloader.dataset  # pyright: ignore[reportGeneralTypeIssues]
-            depadded_input = [tok for tok in input_list if tok != dataset.pad_tok_id]
-            logged_input.append(dataset.tokenizer.decode(depadded_input))
-        logging_dict['input'] = logged_input
-
-        # Log token indices if toggled
-        if self.log_tokens:
-            logging_dict['input_tokens'] = input_ids.tolist()
-            if not state.batch['mode'] == 'generate':
-                if isinstance(state.outputs, torch.Tensor):  # pyright
-                    logging_dict['label_tokens'] = state.outputs.tolist()
-
-        # Add run_name as a column
-        run_name_list = [state.run_name for _ in range(0, len(logging_dict['input']))]
-        logging_dict['run_name'] = run_name_list
-
-        # NOTE: This assumes _any_ tensor logged are tokens to be decoded.
-        #       This might not be true if, for example, logits are logged.
-
-        # Detokenize data in rows
-        for key, value in logging_dict.items():
-            # All types in list are the same
-            if isinstance(value[0], torch.Tensor):
-                logging_dict[key] = [
-                    state.dataloader.dataset.tokenizer.decode(t)  # pyright: ignore[reportGeneralTypeIssues]
-                    for t in value
-                ]
-            elif isinstance(value[0], list):
-                if isinstance(value[0][0], torch.Tensor):
-                    tokenizer = state.dataloader.dataset.tokenizer  # pyright: ignore[reportGeneralTypeIssues]
-                    logging_dict[key] = [[tokenizer.decode(choice) for choice in t] for t in value]
-
-        # Convert logging_dict from kv pairs of column name and column values to a list of rows
-        # Example:
-        # logging_dict = {"a": ["1a", "2a"], "b": ["1b", "2b"]}
-        # will become
-        # columns = {"a", "b"}, rows = [["1a", "1b"], ["2a", "2b"]]
-        columns = list(logging_dict.keys())
-        rows = [list(item) for item in zip(*logging_dict.values())]
-
-        assert state.dataloader_label is not None
-        if not self.name:
-            # If only running eval, step will be 0
-            # If running training, step will be current training step
-            step = state.timestamp.batch.value
-            self.name = f'{state.dataloader_label}_step_{step}'
-            self.columns = columns
-        self.rows.extend(rows)
-
-    def eval_end(self, state: State, logger: Logger) -> None:
-        # eval_batch_end will have set these if there is anything to log
-        if self.name is None or self.columns is None:
-            return
-
-        list_of_rows = dist.all_gather_object(self.rows)
-        rows = [row for rows in list_of_rows for row in rows]
-        for dest_logger in logger.destinations:
-            if not isinstance(dest_logger, ConsoleLogger):
-                dest_logger.log_table(self.columns, rows, name=self.name, step=state.timestamp.batch.value)
-
-        self.rows = []
-        self.name = None
-        self.columns = None
diff --git a/composer/core/state.py b/composer/core/state.py
index 7c43473ace..3980514380 100644
--- a/composer/core/state.py
+++ b/composer/core/state.py
@@ -47,7 +47,6 @@
     ParallelismConfig,
     ParallelismType,
     TPConfig,
-    VersionedDeprecationWarning,
     batch_get,
     batch_set,
     dist,
@@ -617,7 +616,7 @@ def _validate_parallelism_configs(self):
         # Load monolith rank0 only
         if self.load_monolith_rank0_only:
             if self.tp_config is not None:
-                raise ValueError('load_fsdp_monolith_rank0_only is not compatible with tensor parallelism (TP).')
+                raise ValueError('load_monolith_rank0_only is not compatible with tensor parallelism (TP).')
             assert self.fsdp_config is not None
             error_message = ''
             if self.fsdp_config.sync_module_states == False:
@@ -900,21 +899,6 @@ def fsdp_state_dict_type(self):
     def fsdp_sharded_state_dict_enabled(self):
         return self.fsdp_config is not None and self.fsdp_enabled and self.fsdp_state_dict_type == 'sharded'
 
-    @property
-    def fsdp_device_mesh(self):
-        warnings.warn(VersionedDeprecationWarning('fsdp_device_mesh is deprecated. Use device_mesh instead.', '0.24'))
-        return self.device_mesh
-
-    @property
-    def load_fsdp_monolith_rank0_only(self):
-        warnings.warn(
-            VersionedDeprecationWarning(
-                'load_fsdp_monolith_rank0_only is deprecated. Use load_monolith_rank0_only instead.',
-                '0.24',
-            ),
-        )
-        return self.load_monolith_rank0_only
-
     @property
     def load_monolith_rank0_only(self):
         return (
diff --git a/composer/trainer/trainer.py b/composer/trainer/trainer.py
index 2a65a40b43..68d543e40e 100644
--- a/composer/trainer/trainer.py
+++ b/composer/trainer/trainer.py
@@ -114,7 +114,6 @@
     Transform,
     VersionedDeprecationWarning,
     checkpoint,
-    create_fsdp_config,
     dist,
     ensure_tuple,
     export_with_logger,
@@ -1323,7 +1322,7 @@ def __init__(
                 if isinstance(parallelism_config['fsdp'], FSDPConfig):
                     parallelism_config_args['fsdp'] = parallelism_config['fsdp']
                 else:
-                    parallelism_config_args['fsdp'] = create_fsdp_config(parallelism_config['fsdp'])
+                    parallelism_config_args['fsdp'] = FSDPConfig(**parallelism_config['fsdp'])
             if 'tp' in parallelism_config and parallelism_config['tp'] is not None:
                 if isinstance(parallelism_config['tp'], TPConfig):
                     parallelism_config_args['tp'] = parallelism_config['tp']
diff --git a/composer/utils/__init__.py b/composer/utils/__init__.py
index 20fa44e092..283ab446c1 100644
--- a/composer/utils/__init__.py
+++ b/composer/utils/__init__.py
@@ -76,7 +76,7 @@
     UCObjectStore,
     build_remote_backend,
 )
-from composer.utils.parallelism import FSDPConfig, ParallelismConfig, TPConfig, create_fsdp_config
+from composer.utils.parallelism import FSDPConfig, ParallelismConfig, TPConfig
 from composer.utils.remote_uploader import RemoteFilesExistingCheckStatus, RemoteUploader
 from composer.utils.retrying import retry
 from composer.utils.string_enum import StringEnum
@@ -153,7 +153,6 @@
     'KNOWN_COMPRESSORS',
     'STR_TO_DTYPE',
     'ParallelismType',
-    'create_fsdp_config',
     'FSDPConfig',
     'TPConfig',
     'ParallelismConfig',
diff --git a/composer/utils/parallelism.py b/composer/utils/parallelism.py
index 4dc921b63a..6d4e05d773 100644
--- a/composer/utils/parallelism.py
+++ b/composer/utils/parallelism.py
@@ -3,14 +3,11 @@
 
 """Parallelism configs."""
 
-import warnings
 from dataclasses import dataclass
 from typing import Any, Optional
 
 from torch.distributed._tensor.device_mesh import DeviceMesh
 
-from composer.utils.warnings import VersionedDeprecationWarning
-
 
 @dataclass
 class FSDPConfig:
@@ -45,42 +42,6 @@ class FSDPConfig:
     verbose: bool = False
 
 
-def create_fsdp_config(fsdp_config: dict[str, Any]):
-    """Modify fsdp_config to set default values for missing keys."""
-    fsdp_config = {**fsdp_config}  # Shallow copy to avoid modifying input
-    if 'process_group' in fsdp_config:
-        warnings.warn(
-            VersionedDeprecationWarning(
-                'process_group is deprecated. Please specify `data_parallel_shard_degree` and `data_parallel_replicate_degree` instead.',
-                remove_version='0.24',
-            ),
-        )
-
-    if 'device_mesh' in fsdp_config:
-        warnings.warn(
-            VersionedDeprecationWarning(
-                'device_mesh is deprecated. Please specify `data_parallel_shard_degree` and `data_parallel_replicate_degree` instead.',
-                remove_version='0.24',
-            ),
-        )
-        if 'data_parallel_shard_degree' in fsdp_config or 'data_parallel_replicate_degree' in fsdp_config:
-            raise ValueError(
-                'Cannot specify both `device_mesh` and `data_parallel_shard_degree` or `data_parallel_replicate_degree`. Please remove `device_mesh`.',
-            )
-        device_mesh = fsdp_config.pop('device_mesh')
-        if len(device_mesh) == 1:
-            fsdp_config['data_parallel_shard_degree'] = device_mesh[0]
-        elif len(device_mesh) == 2:
-            fsdp_config['data_parallel_replicate_degree'] = device_mesh[0]
-            fsdp_config['data_parallel_shard_degree'] = device_mesh[1]
-        else:
-            raise ValueError(
-                f'device_mesh must be of length 1 or 2 but received length {len(device_mesh)} with device mesh {device_mesh}.',
-            )
-
-    return FSDPConfig(**fsdp_config)
-
-
 @dataclass
 class TPConfig:
     """Configuration for tensor parallelism (TP)."""
diff --git a/docker/README.md b/docker/README.md
index a8ebfa63e4..7639a70f06 100644
--- a/docker/README.md
+++ b/docker/README.md
@@ -15,8 +15,8 @@ all dependencies for both NLP and Vision models. They are built on top of the
 <!-- BEGIN_COMPOSER_BUILD_MATRIX -->
 | Composer Version   | CUDA Support   | Docker Tag                                                                                                                                                       |
 |--------------------|----------------|------------------------------------------------------------------------------------------------------------------------------------------------------------------|
-| 0.23.5             | Yes            | `mosaicml/composer:latest`, `mosaicml/composer:0.23.5`                 |
-| 0.23.5             | No             | `mosaicml/composer:latest_cpu`, `mosaicml/composer:0.23.5_cpu` |
+| 0.24.0             | Yes            | `mosaicml/composer:latest`, `mosaicml/composer:0.24.0`                 |
+| 0.24.0             | No             | `mosaicml/composer:latest_cpu`, `mosaicml/composer:0.24.0_cpu` |
 <!-- END_COMPOSER_BUILD_MATRIX -->
 
 **Note**: For a lightweight installation, we recommended using a [MosaicML PyTorch Image](#pytorch-images) and manually
diff --git a/docker/build_matrix.yaml b/docker/build_matrix.yaml
index 856ae598e1..32b942d265 100644
--- a/docker/build_matrix.yaml
+++ b/docker/build_matrix.yaml
@@ -194,9 +194,9 @@
   TORCHVISION_VERSION: 0.17.2
 - AWS_OFI_NCCL_VERSION: ''
   BASE_IMAGE: nvidia/cuda:12.4.1-cudnn-devel-ubuntu20.04
-  COMPOSER_INSTALL_COMMAND: mosaicml[all]==0.23.5
+  COMPOSER_INSTALL_COMMAND: mosaicml[all]==0.24.0
   CUDA_VERSION: 12.4.1
-  IMAGE_NAME: composer-0-23-5
+  IMAGE_NAME: composer-0-24-0
   MOFED_VERSION: latest-23.10
   NVIDIA_REQUIRE_CUDA_OVERRIDE: ''
   PYTHON_VERSION: '3.11'
@@ -204,17 +204,17 @@
   PYTORCH_NIGHTLY_VERSION: ''
   PYTORCH_VERSION: 2.4.0
   TAGS:
-  - mosaicml/composer:0.23.5
-  - ghcr.io/databricks-mosaic/composer:0.23.5
+  - mosaicml/composer:0.24.0
+  - ghcr.io/databricks-mosaic/composer:0.24.0
   - mosaicml/composer:latest
   - ghcr.io/databricks-mosaic/composer:latest
   TARGET: composer_stage
   TORCHVISION_VERSION: 0.19.0
 - AWS_OFI_NCCL_VERSION: ''
   BASE_IMAGE: ubuntu:20.04
-  COMPOSER_INSTALL_COMMAND: mosaicml[all]==0.23.5
+  COMPOSER_INSTALL_COMMAND: mosaicml[all]==0.24.0
   CUDA_VERSION: ''
-  IMAGE_NAME: composer-0-23-5-cpu
+  IMAGE_NAME: composer-0-24-0-cpu
   MOFED_VERSION: latest-23.10
   NVIDIA_REQUIRE_CUDA_OVERRIDE: ''
   PYTHON_VERSION: '3.11'
@@ -222,8 +222,8 @@
   PYTORCH_NIGHTLY_VERSION: ''
   PYTORCH_VERSION: 2.4.0
   TAGS:
-  - mosaicml/composer:0.23.5_cpu
-  - ghcr.io/databricks-mosaic/composer:0.23.5_cpu
+  - mosaicml/composer:0.24.0_cpu
+  - ghcr.io/databricks-mosaic/composer:0.24.0_cpu
   - mosaicml/composer:latest_cpu
   - ghcr.io/databricks-mosaic/composer:latest_cpu
   TARGET: composer_stage
diff --git a/docker/generate_build_matrix.py b/docker/generate_build_matrix.py
index d2261a4ea3..bb9317dbe6 100644
--- a/docker/generate_build_matrix.py
+++ b/docker/generate_build_matrix.py
@@ -244,7 +244,7 @@ def _main():
     composer_entries = []
 
     # The `GIT_COMMIT` is a placeholder and Jenkins will substitute it with the actual git commit for the `composer_staging` images
-    composer_versions = ['0.23.5']  # Only build images for the latest composer version
+    composer_versions = ['0.24.0']  # Only build images for the latest composer version
     composer_python_versions = [PRODUCTION_PYTHON_VERSION]  # just build composer against the latest
 
     for product in itertools.product(composer_python_versions, composer_versions, cuda_options):
diff --git a/tests/trainer/test_fsdp.py b/tests/trainer/test_fsdp.py
index 9dab386324..491589c557 100644
--- a/tests/trainer/test_fsdp.py
+++ b/tests/trainer/test_fsdp.py
@@ -324,32 +324,6 @@ def test_fsdp_automicrobatching_sync_hooks(world_size: int):
         mock_readd_hooks.assert_called_once()
 
 
-@pytest.mark.gpu
-@world_size(2)
-@pytest.mark.filterwarnings('ignore:Instantiating FSDP with custom process groups.*:UserWarning')
-@pytest.mark.filterwarnings('ignore:Composer is instantiating custom process groups.*:UserWarning')
-@pytest.mark.filterwarnings('ignore:.*process_group and device_mesh are set for FSDP.*.:UserWarning')
-def test_fsdp_process_group(world_size: int):
-    model = SimpleModel()
-    model.fc1._fsdp_wrap = True  # pyright: ignore[reportGeneralTypeIssues]
-    model.fc2._fsdp_wrap = True  # pyright: ignore[reportGeneralTypeIssues]
-    dataset = RandomClassificationDataset(size=10)
-    dataloader = DataLoader(dataset, sampler=dist.get_sampler(dataset))
-
-    trainer = Trainer(
-        model=model,
-        train_dataloader=dataloader,
-        parallelism_config={
-            'fsdp': {
-                'process_group': 'mod1',  # all ranks
-            },
-        },
-        max_duration='3ba',
-    )
-
-    trainer.fit()
-
-
 @pytest.mark.gpu
 @world_size(2)
 @pytest.mark.skipif(
@@ -577,24 +551,6 @@ def oom_hook(module, grad_input, grad_ouput):
     assert torch.equal(output_1, output_2)
 
 
-@pytest.mark.gpu
-@world_size(2)
-def test_fsdp_device_mesh(world_size: int):
-    model = SimpleModel()
-    model.fc1._fsdp_wrap = True  # pyright: ignore[reportGeneralTypeIssues]
-    model.fc2._fsdp_wrap = True  # pyright: ignore[reportGeneralTypeIssues]
-
-    # Expect warning via pytest
-    with pytest.warns(DeprecationWarning):
-        Trainer(
-            model=model,
-            parallelism_config={'fsdp': {
-                'device_mesh': [2],
-            }},
-            max_duration='3ba',
-        )
-
-
 @pytest.mark.gpu
 @world_size(2)
 def test_fsdp_shard(world_size: int):