Skip to content

Commit

Permalink
Added Automatic DP computation
Browse files Browse the repository at this point in the history
  • Loading branch information
TJ-Solergibert committed Jul 18, 2024
1 parent 50da275 commit 9a7d4a3
Show file tree
Hide file tree
Showing 2 changed files with 7 additions and 3 deletions.
4 changes: 2 additions & 2 deletions examples/config_nanoset.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -88,12 +88,12 @@ optimizer:
weight_decay: 0.01
zero_stage: 0
parallelism:
dp: 1
dp: -1
expert_parallel_size: 1
pp: 1
pp_engine: 1f1b
tp: 1
tp_linear_async_communication: true
tp_linear_async_communication: false
tp_mode: REDUCE_SCATTER
profiler: null
tokenizer:
Expand Down
6 changes: 5 additions & 1 deletion src/nanotron/config/parallelism_config.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import os
from dataclasses import dataclass
from typing import Optional

Expand All @@ -16,7 +17,7 @@ class ParallelismArgs:
"""Arguments related to TP/PP/DP
Args:
dp: Number of DP replicas
dp: Number of DP replicas. Set to -1 to automatically compute DP size after dividing the model w/ PP & TP
pp: Number of PP stages
tp: Number of TP replicas
expert_parallel_size: Number of expert parallel replicas (used only for MoEs)
Expand Down Expand Up @@ -47,3 +48,6 @@ def __post_init__(self):
self.pp_engine = cast_str_to_pipeline_engine(self.pp_engine)
if isinstance(self.tp_mode, str):
self.tp_mode = TensorParallelLinearMode[self.tp_mode.upper()]

if self.dp == -1:
self.dp = int(os.environ["WORLD_SIZE"]) // (self.tp * self.pp)

0 comments on commit 9a7d4a3

Please sign in to comment.