From 59a5a04229c169ea35b1feea419ea1eff7c9c17b Mon Sep 17 00:00:00 2001 From: Cheng Li Date: Thu, 1 Feb 2024 13:40:05 -0800 Subject: [PATCH] add oom observer callback --- llmfoundry/utils/builders.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/llmfoundry/utils/builders.py b/llmfoundry/utils/builders.py index 457f146986..1c90ff9e25 100644 --- a/llmfoundry/utils/builders.py +++ b/llmfoundry/utils/builders.py @@ -12,7 +12,7 @@ import torch from composer import algorithms from composer.callbacks import (EarlyStopper, Generate, LRMonitor, - MemoryMonitor, OptimizerMonitor, + MemoryMonitor, OOMObserver, OptimizerMonitor, RuntimeEstimator, SpeedMonitor) from composer.core import Algorithm, Callback, Evaluator from composer.datasets.in_context_learning_evaluation import \ @@ -166,6 +166,8 @@ def build_callback( return LRMonitor() elif name == 'memory_monitor': return MemoryMonitor() + elif name == 'oom_observer': + return OOMObserver(**kwargs) elif name == 'speed_monitor': return SpeedMonitor(window_size=kwargs.get('window_size', 1), gpu_flops_available=kwargs.get(