From 648b1bd410f511eb57ec3834904d6257b05a3aae Mon Sep 17 00:00:00 2001 From: Daniel King <43149077+dakinggg@users.noreply.github.com> Date: Sun, 24 Mar 2024 12:37:55 -0700 Subject: [PATCH] Fix multi model eval (#1055) --- llmfoundry/utils/__init__.py | 15 ++- ...gger_utils.py => mosaicml_logger_utils.py} | 97 ++++++++----------- scripts/eval/eval.py | 8 +- scripts/train/train.py | 6 +- 4 files changed, 57 insertions(+), 69 deletions(-) rename llmfoundry/utils/{mosaicmllogger_utils.py => mosaicml_logger_utils.py} (58%) diff --git a/llmfoundry/utils/__init__.py b/llmfoundry/utils/__init__.py index 86e6ea2c6b..e8c90a6007 100644 --- a/llmfoundry/utils/__init__.py +++ b/llmfoundry/utils/__init__.py @@ -18,10 +18,15 @@ from llmfoundry.utils.logging_utils import SpecificWarningFilter from llmfoundry.utils.model_download_utils import ( download_from_hf_hub, download_from_http_fileserver, download_from_oras) -from llmfoundry.utils.mosaicmllogger_utils import (create_mosaicml_logger, - find_mosaicml_logger, - log_eval_analytics, - log_train_analytics) + +# isort: off +from llmfoundry.utils.mosaicml_logger_utils import ( + find_mosaicml_logger, + log_eval_analytics, + log_train_analytics, + maybe_create_mosaicml_logger, +) +# isort: on from llmfoundry.utils.prompt_files import load_prompts, load_prompts_from_file from llmfoundry.utils.registry_utils import (TypedRegistry, construct_from_registry, @@ -60,5 +65,5 @@ 'find_mosaicml_logger', 'log_eval_analytics', 'log_train_analytics', - 'create_mosaicml_logger', + 'maybe_create_mosaicml_logger', ] diff --git a/llmfoundry/utils/mosaicmllogger_utils.py b/llmfoundry/utils/mosaicml_logger_utils.py similarity index 58% rename from llmfoundry/utils/mosaicmllogger_utils.py rename to llmfoundry/utils/mosaicml_logger_utils.py index 182a7eef80..e54f11ce32 100644 --- a/llmfoundry/utils/mosaicmllogger_utils.py +++ b/llmfoundry/utils/mosaicml_logger_utils.py @@ -10,18 +10,27 @@ MOSAICML_PLATFORM_ENV_VAR) from omegaconf import DictConfig, ListConfig - -def create_mosaicml_logger() -> Union[MosaicMLLogger, None]: +_MODEL_KEYS_TO_LOG = [ + 'pretrained_model_name_or_path', + 'pretrained', + 'vocab_size', + 'd_model', + 'n_heads', + 'n_layers', + 'expansion_ratio', + 'max_seq_len', +] + + +def maybe_create_mosaicml_logger() -> Optional[MosaicMLLogger]: """Creates a MosaicMLLogger if the run was sent from the Mosaic platform.""" if os.environ.get(MOSAICML_PLATFORM_ENV_VAR, 'false').lower( ) == 'true' and os.environ.get(MOSAICML_ACCESS_TOKEN_ENV_VAR): - # Adds mosaicml logger to composer if the run was sent from Mosaic platform, - # access token is set, and mosaic logger wasn't previously added return MosaicMLLogger() def find_mosaicml_logger( - loggers: List[LoggerDestination]) -> Union[MosaicMLLogger, None]: + loggers: List[LoggerDestination]) -> Optional[MosaicMLLogger]: """Returns the first MosaicMLLogger from a list, and None otherwise.""" return next( (logger for logger in loggers if isinstance(logger, MosaicMLLogger)), @@ -37,31 +46,22 @@ def log_eval_analytics(mosaicml_logger: MosaicMLLogger, 'llmfoundry/script': 'eval', } - if eval_gauntlet_config is not None: - metrics['llmfoundry/gauntlet_configured'] = True - else: - metrics['llmfoundry/gauntlet_configured'] = False - - if isinstance(icl_tasks, str): - metrics['llmfoundry/icl_configured'] = True - elif len(icl_tasks) > 0: - metrics['llmfoundry/icl_configured'] = True - else: - metrics['llmfoundry/icl_configured'] = False + metrics['llmfoundry/gauntlet_configured'] = eval_gauntlet_config is not None + metrics['llmfoundry/icl_configured'] = isinstance(icl_tasks, + str) or len(icl_tasks) > 0 metrics['llmfoundry/model_configs'] = [] for model_config in model_configs: + nested_model_config = model_config.get('model', {}) model_config_data = {} - if model_config.get('vocab_size', None) is not None: - model_config_data['vocab_size'] = model_config.get('vocab_size') - if model_config.get('d_model', None) is not None: - model_config_data['d_model'] = model_config.get('d_model') - if model_config.get('n_heads', None) is not None: - model_config_data['n_heads'] = model_config.get('n_heads') + for key in _MODEL_KEYS_TO_LOG: + if nested_model_config.get(key, None) is not None: + model_config_data[key] = nested_model_config.get(key) if len(model_config_data) > 0: metrics['llmfoundry/model_configs'].append( json.dumps(model_config_data, sort_keys=True)) + mosaicml_logger.log_metrics(metrics) mosaicml_logger._flush_metadata(force_flush=True) @@ -69,22 +69,18 @@ def log_eval_analytics(mosaicml_logger: MosaicMLLogger, def log_train_analytics(mosaicml_logger: MosaicMLLogger, model_config: DictConfig, train_loader_config: DictConfig, - eval_loader_config: Union[DictConfig, ListConfig, None], - callback_configs: Union[DictConfig, None], - tokenizer_name: str, load_path: Union[str, None], + eval_loader_config: Optional[Union[DictConfig, + ListConfig]], + callback_configs: Optional[DictConfig], + tokenizer_name: str, load_path: Optional[str], icl_tasks_config: Optional[Union[ListConfig, str]], eval_gauntlet: Optional[Union[DictConfig, str]]): """Logs analytics for runs using the `train.py` script.""" train_loader_dataset = train_loader_config.get('dataset', {}) metrics: Dict[str, Any] = { - 'llmfoundry/tokenizer_name': - tokenizer_name, - 'llmfoundry/script': - 'train', - 'llmfoundry/train_loader_name': - train_loader_config.get('name'), - 'llmfoundry/train_loader_workers': - train_loader_dataset.get('num_workers'), + 'llmfoundry/tokenizer_name': tokenizer_name, + 'llmfoundry/script': 'train', + 'llmfoundry/train_loader_name': train_loader_config.get('name'), } if callback_configs is not None: @@ -92,20 +88,9 @@ def log_train_analytics(mosaicml_logger: MosaicMLLogger, name for name, _ in callback_configs.items() ] - if eval_gauntlet is not None: - metrics['llmfoundry/gauntlet_configured'] = True - else: - metrics['llmfoundry/gauntlet_configured'] = False - - if icl_tasks_config is not None: - if isinstance(icl_tasks_config, str): - metrics['llmfoundry/icl_configured'] = True - elif len(icl_tasks_config) > 0: - metrics['llmfoundry/icl_configured'] = True - else: - metrics['llmfoundry/icl_configured'] = False - else: - metrics['llmfoundry/icl_configured'] = False + metrics['llmfoundry/gauntlet_configured'] = eval_gauntlet is not None + metrics['llmfoundry/icl_configured'] = (icl_tasks_config is not None and ( + (isinstance(icl_tasks_config, str) or len(icl_tasks_config) > 0))) if train_loader_dataset.get('hf_name', None) is not None: metrics['llmfoundry/train_dataset_hf_name'] = train_loader_dataset.get( @@ -130,8 +115,6 @@ def log_train_analytics(mosaicml_logger: MosaicMLLogger, eval_loader_info = {} eval_loader_dataset = loader_config.get('dataset', {}) eval_loader_info['name'] = loader_config.get('name') - eval_loader_info['num_workers'] = eval_loader_dataset.get( - 'num_workers', None) if eval_loader_dataset.get('hf_name', None) is not None: eval_loader_info['dataset_hf_name'] = eval_loader_dataset.get( 'hf_name') @@ -140,15 +123,13 @@ def log_train_analytics(mosaicml_logger: MosaicMLLogger, metrics['llmfoundry/eval_loaders'].append( json.dumps(eval_loader_info, sort_keys=True)) - if model_config['name'] == 'hf_casual_lm': - metrics['llmfoundry/model_name'] = model_config.get( - 'pretrained_model_name_or_path') - if model_config.get('vocab_size', None) is not None: - metrics['llmfoundry/vocab_size'] = model_config.get('vocab_size'), - if model_config.get('d_model', None) is not None: - metrics['llmfoundry/d_model'] = model_config.get('d_model') - if model_config.get('n_heads', None) is not None: - metrics['llmfoundry/n_heads'] = model_config.get('n_heads') + model_config_data = {} + for key in _MODEL_KEYS_TO_LOG: + if model_config.get(key, None) is not None: + model_config_data[f'llmfoundry/{key}'] = model_config.get(key) + + if len(model_config_data) > 0: + metrics.update(model_config_data) mosaicml_logger.log_metrics(metrics) mosaicml_logger._flush_metadata(force_flush=True) diff --git a/scripts/eval/eval.py b/scripts/eval/eval.py index c1267ab1dd..24394ad906 100644 --- a/scripts/eval/eval.py +++ b/scripts/eval/eval.py @@ -20,8 +20,8 @@ from rich.traceback import install from transformers import PreTrainedTokenizerBase -from llmfoundry.utils import (create_mosaicml_logger, find_mosaicml_logger, - log_eval_analytics) +from llmfoundry.utils import (find_mosaicml_logger, log_eval_analytics, + maybe_create_mosaicml_logger) install() from llmfoundry.models.model_registry import COMPOSER_MODEL_REGISTRY @@ -295,7 +295,7 @@ def main(cfg: DictConfig) -> Tuple[List[Trainer], pd.DataFrame]: mosaicml_logger = find_mosaicml_logger(loggers) if mosaicml_logger is None: - mosaicml_logger = create_mosaicml_logger() + mosaicml_logger = maybe_create_mosaicml_logger() # mosaicml_logger will be None if run isn't on MosaicML platform if mosaicml_logger is not None: loggers.append(mosaicml_logger) @@ -367,6 +367,8 @@ def main(cfg: DictConfig) -> Tuple[List[Trainer], pd.DataFrame]: assert models_df is not None print(models_df.to_markdown(index=False)) + trainer.close() + return trainers, eval_gauntlet_df diff --git a/scripts/train/train.py b/scripts/train/train.py index c608dfb619..e1d91c3cdb 100644 --- a/scripts/train/train.py +++ b/scripts/train/train.py @@ -20,8 +20,8 @@ from omegaconf import OmegaConf as om from rich.traceback import install -from llmfoundry.utils import (create_mosaicml_logger, find_mosaicml_logger, - log_train_analytics) +from llmfoundry.utils import (find_mosaicml_logger, log_train_analytics, + maybe_create_mosaicml_logger) install() @@ -451,7 +451,7 @@ def main(cfg: DictConfig) -> Trainer: mosaicml_logger = find_mosaicml_logger(loggers) if mosaicml_logger is None: - mosaicml_logger = create_mosaicml_logger() + mosaicml_logger = maybe_create_mosaicml_logger() if mosaicml_logger is not None: # mosaicml_logger will be None if run isn't on MosaicML platform loggers.append(mosaicml_logger)