From ba3a463307cc4cffd309e3957621513066de406f Mon Sep 17 00:00:00 2001 From: Vitaliy Chiley Date: Wed, 13 Dec 2023 09:17:05 -0800 Subject: [PATCH] remove superfulous return; add doc str --- llmfoundry/models/layers/ffn.py | 22 ++++++++++++++++------ llmfoundry/models/mpt/configuration_mpt.py | 2 +- 2 files changed, 17 insertions(+), 7 deletions(-) diff --git a/llmfoundry/models/layers/ffn.py b/llmfoundry/models/layers/ffn.py index 8f37b39306..d5453dec5f 100644 --- a/llmfoundry/models/layers/ffn.py +++ b/llmfoundry/models/layers/ffn.py @@ -19,11 +19,21 @@ log = logging.getLogger(__name__) -def _resolve_ffn_hidden_and_exp_ratio( +def resolve_ffn_hidden_and_exp_ratio( d_model: int, expansion_ratio: Union[int, float], ffn_hidden_size: Optional[int] = None, -) -> tuple[Union[int, float], int]: +) -> int: + """Resolve the hidden size of the feed-forward network. + + Args: + d_model (int): The dimension of the input and output of the feed-forward network. + expansion_ratio (Union[int, float]): The expansion ratio of the feed-forward network. + ffn_hidden_size (Optional[int]): The hidden size of the feed-forward network. + + Returns: + int: The hidden size of the feed-forward network. + """ if ffn_hidden_size is not None: log.info( f'`expansion_ratio` (={expansion_ratio}) ignored when `ffn_hidden_size` (={ffn_hidden_size}) is specified.' @@ -32,9 +42,9 @@ def _resolve_ffn_hidden_and_exp_ratio( ffn_hidden_size = int(d_model * expansion_ratio) if ffn_hidden_size != d_model * expansion_ratio: raise ValueError( - f'`d_model * expansion_ratio` ({ffn_hidden_size}) must be an integer.' + f'`d_model * expansion_ratio` (={d_model * expansion_ratio}) must be an integer.' ) - return expansion_ratio, ffn_hidden_size + return ffn_hidden_size class MPTMLP(nn.Module): @@ -49,7 +59,7 @@ def __init__( bias: bool = True, ): super().__init__() - expansion_ratio, ffn_hidden_size = _resolve_ffn_hidden_and_exp_ratio( + ffn_hidden_size = resolve_ffn_hidden_and_exp_ratio( d_model, expansion_ratio, ffn_hidden_size) self.fc_kwargs: dict[str, Any] = { 'bias': bias, @@ -138,7 +148,7 @@ def build_ffn( ) elif ffn_type == 'te_ln_mlp': assert te is not None - _, ffn_hidden_size = _resolve_ffn_hidden_and_exp_ratio( + ffn_hidden_size = resolve_ffn_hidden_and_exp_ratio( d_model, expansion_ratio, ffn_hidden_size) return te.LayerNormMLP( hidden_size=d_model, diff --git a/llmfoundry/models/mpt/configuration_mpt.py b/llmfoundry/models/mpt/configuration_mpt.py index b9b4929ad0..2ecc726aa3 100644 --- a/llmfoundry/models/mpt/configuration_mpt.py +++ b/llmfoundry/models/mpt/configuration_mpt.py @@ -70,7 +70,7 @@ def __init__( d_model (int): The size of the embedding dimension of the model. n_heads (int): The number of attention heads. n_layers (int): The number of layers in the model. - expansion_ratio (int, float): The ratio of the up/down scale in the ffn. + expansion_ratio (Union[int, float]): The ratio of the up/down scale in the ffn. max_seq_len (int): The maximum sequence length of the model. vocab_size (int): The size of the vocabulary. resid_pdrop (float): The dropout probability applied to the attention output before combining with residual.