From b9a5abc36ec74c783c87dd6373d0b951f1f91933 Mon Sep 17 00:00:00 2001 From: Daniel King Date: Fri, 12 Apr 2024 05:02:14 +0000 Subject: [PATCH] logs --- scripts/train/train.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/scripts/train/train.py b/scripts/train/train.py index 5bcce0038e..96226e8c7c 100644 --- a/scripts/train/train.py +++ b/scripts/train/train.py @@ -102,12 +102,18 @@ def validate_config(cfg: DictConfig): '`load_in_8bit` is only supported for evaluation rather than training.' ) + print('in validate') + print(cfg.model.get('ffn_config', {}).get('ffn_type', 'mptmlp')) + print(cfg.model.get('ffn_config', {}).get('ffn_type', 'mptmlp') in ffns_with_megablocks) if cfg.model.get('ffn_config', {}).get('ffn_type', 'mptmlp') in ffns_with_megablocks: + print('inside') moe_world_size = cfg.model.get('ffn_config', {}).get('moe_world_size', 1) + print(moe_world_size) use_orig_params = cfg.get('fsdp_config', {}).get('use_orig_params', True) + print(use_orig_params) if moe_world_size > 1 and not use_orig_params: raise ValueError( f'MoEs with expert parallelism (moe_world_size {moe_world_size} > 1) require `use_orig_params=True`.'