Skip to content

Commit

Permalink
it works!
Browse files Browse the repository at this point in the history
  • Loading branch information
eitanturok committed Sep 25, 2024
1 parent 7b73db5 commit 19f5477
Show file tree
Hide file tree
Showing 3 changed files with 5 additions and 2 deletions.
2 changes: 2 additions & 0 deletions llmfoundry/command_utils/train.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,8 @@
ic.configureOutput(includeContext=True)
install()

ic.disable()

def validate_config(train_config: TrainConfig):
"""Validates compatible model and dataloader selection."""
# Validate the rest of the config
Expand Down
2 changes: 1 addition & 1 deletion llmfoundry/models/layers/blocks.py
Original file line number Diff line number Diff line change
Expand Up @@ -206,7 +206,7 @@ def forward(
m = self.norm_2(x)

n = self.apply_ffn(attention_mask, m)
ic(x.shape, x.device, n.shape, n.device)
ic(x.shape, x.device, m.shape, m.device, n.shape, n.device)
# In the following line we move the `x` tensor to the same devices as the output of ffn layer. This operation should be a no-op during training.
# This is done to fix pipeline parallel generation using hf.generate. Please see this comment for details: https://github.com/mosaicml/llm-foundry/pull/1332#issue-2386827204
x = x.to(device=n.device,
Expand Down
3 changes: 2 additions & 1 deletion llmfoundry/models/utils/tp_strategy.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,8 @@ def ffn_tp_strategy(model: ComposerModel) -> dict[str, ParallelStyle]:
elif name.split('.')[-2:] == ['ffn', 'down_proj']:
layer_plan[name] = RowwiseParallel(
input_layouts = Shard(-1),
output_layouts = Replicate(),
# output_layouts = Replicate(),
output_layouts = Shard(0),
)
elif name.split('.')[-1] == 'ffn':
layer_plan[name] = PrepareModuleInput(
Expand Down

0 comments on commit 19f5477

Please sign in to comment.