Skip to content

Commit

Permalink
style more
Browse files Browse the repository at this point in the history
  • Loading branch information
eitanturok committed Sep 25, 2024
1 parent 6d65a29 commit 3372ec0
Show file tree
Hide file tree
Showing 4 changed files with 22 additions and 14 deletions.
7 changes: 5 additions & 2 deletions llmfoundry/command_utils/train.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,8 +19,11 @@
TraceHandler,
cyclic_schedule,
)
from composer.utils import (FSDPConfig, ParallelismConfig, TPConfig, dist,
get_device, reproducibility,)
from composer.utils import (
dist,
get_device,
reproducibility,
)
from omegaconf import DictConfig
from omegaconf import OmegaConf as om

Expand Down
13 changes: 7 additions & 6 deletions llmfoundry/models/utils/tp_strategy.py
Original file line number Diff line number Diff line change
@@ -1,23 +1,24 @@

# Copyright 2024 MosaicML LLM Foundry authors
# SPDX-License-Identifier: Apache-2.0

from composer.models import ComposerModel
from torch.distributed._tensor import Replicate, Shard
from torch.distributed.tensor.parallel import (ColwiseParallel,
PrepareModuleInput,
RowwiseParallel,)
from torch.distributed.tensor.parallel import (
ColwiseParallel,
PrepareModuleInput,
RowwiseParallel,
)
from torch.distributed.tensor.parallel.style import ParallelStyle


def ffn_tp_strategy(model: ComposerModel) -> dict[str, ParallelStyle]:
TP_LAYERS = {'up_proj', 'down_proj'}

# validate that all TP_LAYERS are in model
tp_layers_in_model = set([
tp_layers_in_model = {
layer for layer in TP_LAYERS for name, _ in model.named_modules()
if layer in name
])
}
assert tp_layers_in_model == TP_LAYERS, f'The FFN tensor parallelism strategy requires `model` to have layers {TP_LAYERS}. But `model` is missing layers {TP_LAYERS - tp_layers_in_model}.'

# generate layer plan
Expand Down
5 changes: 3 additions & 2 deletions llmfoundry/utils/config_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -503,8 +503,9 @@ def update_batch_size_info(cfg: dict[str, Any]) -> dict[str, Any]:


def process_init_device(
model_cfg: dict[str, Any], fsdp_config: Optional[dict],
tp_config: Optional[dict]
model_cfg: dict[str, Any],
fsdp_config: Optional[dict],
tp_config: Optional[dict],
):
# Restrict model init_device to 'meta' and 'cpu',
# using 'cuda' vs. 'cuda:id' is tricky and can lead to common user errors
Expand Down
11 changes: 7 additions & 4 deletions tests/models/utils/test_tp_strategy.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,11 @@
# SPDX-License-Identifier: Apache-2.0

from torch.distributed._tensor import Replicate, Shard
from torch.distributed.tensor.parallel import (ColwiseParallel,
PrepareModuleInput,
RowwiseParallel,)
from torch.distributed.tensor.parallel import (
ColwiseParallel,
PrepareModuleInput,
RowwiseParallel,
)

from llmfoundry.models.mpt.modeling_mpt import ComposerMPTCausalLM
from llmfoundry.utils.builders import build_tp_strategy
Expand Down Expand Up @@ -58,7 +60,8 @@ def test_ffn_tp_strategy_layer_plan():

# Compare expected and actual layer plans
for (n1, lp1), (n2, lp2) in zip(
sorted(expected_layer_plan.items()), sorted(layer_plan.items())
sorted(expected_layer_plan.items()),
sorted(layer_plan.items()),
):
assert n1 == n2
assert type(lp1) == type(lp2)
Expand Down

0 comments on commit 3372ec0

Please sign in to comment.