From c0ac73aba04c02e77d5edb404b2a46727ab27203 Mon Sep 17 00:00:00 2001 From: Daniel King Date: Thu, 11 Apr 2024 19:45:56 -0700 Subject: [PATCH] debug --- llmfoundry/models/layers/blocks.py | 3 ++- llmfoundry/models/mpt/modeling_mpt.py | 3 +++ 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/llmfoundry/models/layers/blocks.py b/llmfoundry/models/layers/blocks.py index 2b94d35d64..5f6fba1803 100644 --- a/llmfoundry/models/layers/blocks.py +++ b/llmfoundry/models/layers/blocks.py @@ -61,6 +61,7 @@ def __init__( use_pad_tok_in_ffn: bool = True, **kwargs: Any, ): + print(ffn_config) if attn_config is None: attn_config = attn_config_defaults @@ -89,7 +90,7 @@ def __init__( d_model=d_model, n_heads=n_heads, attn_config=attn_config, - ffn_config=ffn_config, + ffn_type=ffn_type, fc_type=fc_type, resid_pdrop=resid_pdrop, norm_type=norm_type, diff --git a/llmfoundry/models/mpt/modeling_mpt.py b/llmfoundry/models/mpt/modeling_mpt.py index aca2350051..3baab79469 100644 --- a/llmfoundry/models/mpt/modeling_mpt.py +++ b/llmfoundry/models/mpt/modeling_mpt.py @@ -325,6 +325,7 @@ def __init__(self, config: MPTConfig): self.emb_drop = nn.Dropout(config.emb_pdrop) self.mb_args = None block_args = config.to_dict() + print(block_args['ffn_config'], block_args['ffn_config']['ffn_type']) if block_args['ffn_config']['ffn_type'] in ffns_with_megablocks: block_args['ffn_config'] = config_moe_args( block_args['ffn_config'], @@ -333,6 +334,8 @@ def __init__(self, config: MPTConfig): config.n_layers, ) self.mb_args = block_args['ffn_config'].get('args') + print(block_args['ffn_config'], block_args['ffn_config']['ffn_type']) + self.blocks = nn.ModuleList([ MPTBlock( device=config.init_device,