Skip to content

Commit

Permalink
enable arg enabled normalization of routing weights (#58)
Browse files Browse the repository at this point in the history
* enable arg enabled nomalization of routing weights

* pr comment
  • Loading branch information
vchiley authored Dec 13, 2023
1 parent 5897cd6 commit a6ac3ce
Show file tree
Hide file tree
Showing 2 changed files with 3 additions and 0 deletions.
1 change: 1 addition & 0 deletions megablocks/layers/arguments.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ class Arguments:
moe_num_experts : int = 1
moe_top_k : int = 1
moe_capacity_factor : int = 1
moe_normalize_expert_weights: bool = False
moe_loss_weight : float = 0.1
moe_jitter_eps : Optional[float] = None
moe_lbl_in_fp32 : bool = False
Expand Down
2 changes: 2 additions & 0 deletions megablocks/layers/router.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,8 @@ def forward(self, x):

scores = self.layer(x.view(-1, x.shape[-1])).softmax(dim=-1)
expert_weights, expert_indices = self._top_k(scores)
if self.args.moe_normalize_expert_weights:
expert_weights /= expert_weights.sum(dim=-1, keepdim=True)

expert_indices = (
_uniform_expert_assignment(expert_indices, self.args.moe_num_experts)
Expand Down

0 comments on commit a6ac3ce

Please sign in to comment.