From 7a8a1564827cbcbc281a6bdc4a11bc8f584142bd Mon Sep 17 00:00:00 2001 From: Max Marion Date: Thu, 28 Mar 2024 14:38:33 -0700 Subject: [PATCH] Output eval logging batch (#961) * Skip flaky lion8b test (#598) * relax atol and add retries to reduce flakiness in lion8b timing test * add eval output logging * add back tasks * foo * add rlhf prompts * add rlhf prompts * add rlhf prompts * add rlhf prompts * add rlhf prompts * fix prompt * fix prompt * modify mcli * test * test * fix * fix merge * wip * merge * reset files, wip commit * rm small changes * reduce changes * reduce changes * . * wip * rm batch keys * revert init device * linting * add import * fix import * add eval_output_logging to registry * readd import * pyright + linting --------- Co-authored-by: dblalock Co-authored-by: Jeremy Dohmann --- llmfoundry/callbacks/__init__.py | 7 ++++--- scripts/eval/eval.py | 19 ++++++++++++++++--- 2 files changed, 20 insertions(+), 6 deletions(-) diff --git a/llmfoundry/callbacks/__init__.py b/llmfoundry/callbacks/__init__.py index aaba90eeec..d9bb3c24a7 100644 --- a/llmfoundry/callbacks/__init__.py +++ b/llmfoundry/callbacks/__init__.py @@ -1,9 +1,9 @@ # Copyright 2022 MosaicML LLM Foundry authors # SPDX-License-Identifier: Apache-2.0 -from composer.callbacks import (EarlyStopper, Generate, LRMonitor, - MemoryMonitor, MemorySnapshot, OOMObserver, - OptimizerMonitor, RuntimeEstimator, +from composer.callbacks import (EarlyStopper, EvalOutputLogging, Generate, + LRMonitor, MemoryMonitor, MemorySnapshot, + OOMObserver, OptimizerMonitor, RuntimeEstimator, SpeedMonitor) from llmfoundry.callbacks.async_eval_callback import AsyncEval @@ -33,6 +33,7 @@ callbacks.register('mono_checkpoint_saver', func=MonolithicCheckpointSaver) callbacks.register('scheduled_gc', func=ScheduledGarbageCollector) callbacks.register('oom_observer', func=OOMObserver) +callbacks.register('eval_output_logging', func=EvalOutputLogging) callbacks_with_config.register('async_eval', func=AsyncEval) callbacks_with_config.register('curriculum_learning', func=CurriculumLearning) diff --git a/scripts/eval/eval.py b/scripts/eval/eval.py index 961b50e254..22108d4c75 100644 --- a/scripts/eval/eval.py +++ b/scripts/eval/eval.py @@ -11,6 +11,7 @@ import pandas as pd import torch +from composer.core import Callback from composer.loggers.logger_destination import LoggerDestination from composer.trainer import Trainer from composer.utils import dist, get_device, reproducibility @@ -23,8 +24,9 @@ install() from llmfoundry.utils.builders import (add_metrics_to_eval_loaders, - build_composer_model, build_evaluators, - build_logger, build_tokenizer) + build_callback, build_composer_model, + build_evaluators, build_logger, + build_tokenizer) from llmfoundry.utils.config_utils import (log_config, pop_config, process_init_device) from llmfoundry.utils.registry_utils import import_file @@ -49,6 +51,7 @@ def evaluate_model( eval_gauntlet_df: Optional[pd.DataFrame], eval_subset_num_batches: int, icl_subset_num_batches: Optional[int], + callback_configs: Optional[DictConfig], metadata: Optional[Dict[str, str]], logged_config: DictConfig, should_log_config: bool = True, @@ -73,7 +76,12 @@ def evaluate_model( icl_subset_num_batches=icl_subset_num_batches, ) - callbacks = [] + # Callbacks + callbacks: List[Callback] = [ + build_callback(str(name), callback_cfg) + for name, callback_cfg in callback_configs.items() + ] if callback_configs else [] + if eval_gauntlet_callback is not None: callbacks.append(eval_gauntlet_callback) @@ -238,6 +246,10 @@ def main(cfg: DictConfig) -> Tuple[List[Trainer], pd.DataFrame]: # Pop out interpolation variables. pop_config(cfg, 'model_name_or_path', must_exist=False, default_value=None) + callback_configs: Optional[DictConfig] = pop_config(cfg, + 'callbacks', + must_exist=False, + default_value=None) # Warn for unused parameters for key in cfg: @@ -296,6 +308,7 @@ def main(cfg: DictConfig) -> Tuple[List[Trainer], pd.DataFrame]: python_log_level=python_log_level, precision=precision, eval_gauntlet_df=eval_gauntlet_df, + callback_configs=callback_configs, eval_subset_num_batches=eval_subset_num_batches, icl_subset_num_batches=icl_subset_num_batches, metadata=metadata,