Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add eval loader to eval script #742

Merged
merged 14 commits into from
Nov 30, 2023
32 changes: 12 additions & 20 deletions llmfoundry/data/dataloader.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,12 @@
from llmfoundry.data.finetuning.dataloader import build_finetuning_dataloader
from llmfoundry.data.text_data import build_text_dataloader

LOADER_NAME_TO_FUNCTION = {
'text': build_text_dataloader,
'text_denoising': build_text_denoising_dataloader,
'finetuning': build_finetuning_dataloader,
}


def build_dataloader(cfg: DictConfig, tokenizer: PreTrainedTokenizerBase,
device_batch_size: int) -> DataSpec:
Expand All @@ -22,23 +28,9 @@ def build_dataloader(cfg: DictConfig, tokenizer: PreTrainedTokenizerBase,
device_batch_size (int): The size of the batches (number of examples)
that the dataloader will produce.
"""
if cfg.name == 'text':
return build_text_dataloader(
cfg,
tokenizer,
device_batch_size,
)
elif cfg.name == 'text_denoising':
return build_text_denoising_dataloader(
cfg,
tokenizer,
device_batch_size,
)
elif cfg.name == 'finetuning':
return build_finetuning_dataloader(
cfg,
tokenizer,
device_batch_size,
)
else:
raise ValueError(f'Not sure how to build dataloader with config: {cfg}')
aspfohl marked this conversation as resolved.
Show resolved Hide resolved
if cfg.name not in LOADER_NAME_TO_FUNCTION:
allowed = ', '.join(LOADER_NAME_TO_FUNCTION.keys())
raise ValueError(f'Expected dataloader name to be one of {allowed}' +
f' but found name "{cfg.name}" in config: {cfg}')

return LOADER_NAME_TO_FUNCTION[cfg.name](cfg, tokenizer, device_batch_size)
dakinggg marked this conversation as resolved.
Show resolved Hide resolved
30 changes: 30 additions & 0 deletions llmfoundry/utils/builders.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@
HuggingFaceCheckpointer, LayerFreezing,
MonolithicCheckpointSaver,
ScheduledGarbageCollector)
from llmfoundry.data.dataloader import build_dataloader
from llmfoundry.optim import (DecoupledAdaLRLion, DecoupledClipLion,
DecoupledLionW, DecoupledLionW_8bit)
from llmfoundry.optim.scheduler import InverseSquareRootWithWarmupScheduler
Expand All @@ -39,6 +40,35 @@
log = logging.getLogger(__name__)


def build_eval_loader(
aspfohl marked this conversation as resolved.
Show resolved Hide resolved
eval_loader_config: Union[DictConfig, ListConfig],
model: Any,
tokenizer: PreTrainedTokenizerBase,
device_eval_batch_size: int,
) -> List[Evaluator]:
assert model.train_metrics is not None
aspfohl marked this conversation as resolved.
Show resolved Hide resolved
eval_metric_names = list(model.train_metrics.keys())

evaluators: List[Evaluator] = []
if isinstance(eval_loader_config, ListConfig):
eval_configs: ListConfig = eval_loader_config
is_multi_eval = True
else:
eval_configs = ListConfig([eval_loader_config])
is_multi_eval = False

for eval_config in eval_configs:
eval_dataloader = build_dataloader(eval_config, tokenizer,
device_eval_batch_size)
eval_loader: Evaluator = Evaluator(
label=f'eval/{eval_config.label}' if is_multi_eval else 'eval',
dataloader=eval_dataloader,
metric_names=eval_metric_names,
)
evaluators.append(eval_loader)
return evaluators


def build_icl_data_and_gauntlet(
icl_tasks_config: Union[str, ListConfig],
eval_gauntlet_config: Optional[Union[str, DictConfig]],
Expand Down
39 changes: 31 additions & 8 deletions scripts/eval/eval.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,13 +21,14 @@

from llmfoundry.models import MPTForCausalLM
from llmfoundry.models.model_registry import COMPOSER_MODEL_REGISTRY
from llmfoundry.utils.builders import (build_icl_data_and_gauntlet,
from llmfoundry.utils.builders import (build_eval_loader,
build_icl_data_and_gauntlet,
build_logger, build_tokenizer)
from llmfoundry.utils.config_utils import pop_config, process_init_device


def load_peft_model(model_cfg: DictConfig, tokenizer: PreTrainedTokenizerBase,
num_retries: int) -> Optional[ComposerModel]:
num_retries: int) -> ComposerModel:
try:
from peft import PeftModel
except ImportError as e:
Expand All @@ -43,7 +44,8 @@ def load_peft_model(model_cfg: DictConfig, tokenizer: PreTrainedTokenizerBase,
}

retries = 0
while retries < num_retries:
composer_model_wrapper = None
while retries < num_retries and composer_model_wrapper is None:
irenedea marked this conversation as resolved.
Show resolved Hide resolved
try:
trust_remote_code = model_cfg.get('trust_remote_code', True)
use_auth_token = model_cfg.get('use_auth_token', False)
Expand All @@ -58,7 +60,6 @@ def load_peft_model(model_cfg: DictConfig, tokenizer: PreTrainedTokenizerBase,

composer_model_wrapper = COMPOSER_MODEL_REGISTRY[model_cfg.name](
peft_model, tokenizer)
return composer_model_wrapper
except Exception as e:
retries += 1
if retries >= num_retries:
Expand All @@ -68,19 +69,21 @@ def load_peft_model(model_cfg: DictConfig, tokenizer: PreTrainedTokenizerBase,
f'Got exception {str(e)} while loading model {model_cfg.name}. {num_retries-retries} retries remaining'
)

assert composer_model_wrapper is not None
irenedea marked this conversation as resolved.
Show resolved Hide resolved
return composer_model_wrapper


def load_model(model_cfg: DictConfig, tokenizer: PreTrainedTokenizerBase,
fsdp_config: Optional[Dict],
num_retries: int) -> Optional[ComposerModel]:
fsdp_config: Optional[Dict], num_retries: int) -> ComposerModel:
init_context = process_init_device(model_cfg, fsdp_config)

retries = 0
composer_model = None
with init_context:
while retries < num_retries:
while retries < num_retries and composer_model is None:
try:
composer_model = COMPOSER_MODEL_REGISTRY[model_cfg.name](
model_cfg, tokenizer)
return composer_model
except Exception as e:
retries += 1
if retries >= num_retries:
Expand All @@ -90,6 +93,9 @@ def load_model(model_cfg: DictConfig, tokenizer: PreTrainedTokenizerBase,
f'Got exception {str(e)} while loading model {model_cfg.name}. {num_retries-retries} retries remaining'
)

assert composer_model is not None
return composer_model


def evaluate_model(
model_cfg: DictConfig,
Expand All @@ -100,6 +106,7 @@ def evaluate_model(
max_seq_len: int,
device_eval_batch_size: int,
eval_gauntlet_config: Optional[Union[str, DictConfig]],
eval_loader_config: Optional[Union[DictConfig, ListConfig]],
fsdp_config: Optional[Dict],
num_retries: int,
loggers_cfg: Dict[str, Any],
Expand Down Expand Up @@ -143,6 +150,15 @@ def evaluate_model(
composer_model = load_model(model_cfg.model, tokenizer, fsdp_config,
num_retries)

if eval_loader_config is not None:
loader_evaluators = build_eval_loader(
eval_loader_config,
composer_model,
tokenizer,
device_eval_batch_size,
)
evaluators.extend(loader_evaluators)
aspfohl marked this conversation as resolved.
Show resolved Hide resolved

if eval_gauntlet_df is None and eval_gauntlet_callback is not None:
eval_gauntlet_df = pd.DataFrame(
columns=['model_name'] +
Expand Down Expand Up @@ -228,6 +244,8 @@ def main(cfg: DictConfig):
default_value='debug')

# Optional Evaluation Parameters with default values
eval_loader_config: Optional[Union[DictConfig, ListConfig]] = pop_config(
cfg, 'eval_loader', must_exist=False, default_value=None)
seed: int = pop_config(cfg, 'seed', must_exist=False, default_value=17)
dist_timeout: Union[float, int] = pop_config(cfg,
'dist_timeout',
Expand Down Expand Up @@ -274,6 +292,7 @@ def main(cfg: DictConfig):
eval_gauntlet_df = None
models_df = None
composite_scores = None
trainers = []
for model_cfg in model_configs:
(trainer, logger_keys, eval_gauntlet_callback,
eval_gauntlet_df) = evaluate_model(
Expand All @@ -285,13 +304,15 @@ def main(cfg: DictConfig):
max_seq_len=max_seq_len,
device_eval_batch_size=device_eval_batch_size,
eval_gauntlet_config=eval_gauntlet_config,
eval_loader_config=eval_loader_config,
fsdp_config=fsdp_config,
num_retries=num_retries,
loggers_cfg=loggers_cfg,
python_log_level=python_log_level,
precision=precision,
eval_gauntlet_df=eval_gauntlet_df,
icl_subset_num_batches=icl_subset_num_batches)
trainers.append(trainer)

if eval_gauntlet_callback is not None:
composite_scores = eval_gauntlet_callback.eval_after_all(
Expand Down Expand Up @@ -330,6 +351,8 @@ def main(cfg: DictConfig):
assert models_df is not None
print(models_df.to_markdown(index=False))

return trainers, eval_gauntlet_df
aspfohl marked this conversation as resolved.
Show resolved Hide resolved


def calculate_markdown_results(logger_keys: List[str], trainer: Trainer,
benchmark_to_taxonomy: Dict[str, str],
Expand Down
59 changes: 21 additions & 38 deletions scripts/train/train.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,6 @@

import torch
from composer import Trainer
from composer.core import Evaluator
from composer.core.callback import Callback
from composer.loggers import MosaicMLLogger
from composer.loggers.mosaicml_logger import (MOSAICML_ACCESS_TOKEN_ENV_VAR,
Expand All @@ -27,6 +26,7 @@
MPTForCausalLM)
from llmfoundry.data.dataloader import build_dataloader
from llmfoundry.utils.builders import (build_algorithm, build_callback,
build_eval_loader,
build_icl_data_and_gauntlet,
build_logger, build_optimizer,
build_scheduler, build_tokenizer)
Expand Down Expand Up @@ -524,37 +524,6 @@ def main(cfg: DictConfig) -> Trainer:
if mosaicml_logger is not None:
mosaicml_logger.log_metrics({'data_validated': time.time()})

## Evaluation
print('Building eval loader...')
evaluators = []
eval_loaders = []
if eval_loader_config is not None:
is_multi_eval = isinstance(eval_loader_config, ListConfig)
eval_configs = eval_loader_config if is_multi_eval else [
eval_loader_config
]
for eval_config in eval_configs:
eval_dataloader = build_dataloader(eval_config, tokenizer,
device_eval_batch_size)
eval_loader = Evaluator(
label=f'eval/{eval_config.label}' if is_multi_eval else 'eval',
dataloader=eval_dataloader,
metric_names=[], # we will add these after model is created
)
eval_loaders.append(eval_loader)

eval_gauntlet_callback = None

if icl_tasks_config is not None:
icl_evaluators, _, eval_gauntlet_callback = build_icl_data_and_gauntlet(
icl_tasks_config, eval_gauntlet_config, tokenizer,
device_eval_batch_size, icl_seq_len if icl_seq_len else max_seq_len,
icl_subset_num_batches)
evaluators.extend(icl_evaluators)

if eval_gauntlet_callback is not None:
callbacks.append(eval_gauntlet_callback)

# Build Model
print('Initializing model...')
with init_context:
Expand All @@ -579,13 +548,27 @@ def main(cfg: DictConfig) -> Trainer:
optimizer_name: str = optimizer_config.pop('name')
optimizer = build_optimizer(model, optimizer_name, optimizer_config)

# Now add the eval metrics
## Evaluation
print('Building eval loader...')
aspfohl marked this conversation as resolved.
Show resolved Hide resolved
evaluators = []
if eval_loader_config is not None:
assert model.train_metrics is not None
eval_metric_names = list(model.train_metrics.keys())
for eval_loader in eval_loaders:
eval_loader.metric_names = eval_metric_names
evaluators.insert(0, eval_loader) # Put the base eval_loaders first
evaluators = build_eval_loader(
eval_loader_config,
model,
tokenizer,
device_eval_batch_size,
)

eval_gauntlet_callback = None
if icl_tasks_config is not None:
icl_evaluators, _, eval_gauntlet_callback = build_icl_data_and_gauntlet(
icl_tasks_config, eval_gauntlet_config, tokenizer,
device_eval_batch_size, icl_seq_len if icl_seq_len else max_seq_len,
icl_subset_num_batches)
evaluators.extend(icl_evaluators)

if eval_gauntlet_callback is not None:
callbacks.append(eval_gauntlet_callback)
aspfohl marked this conversation as resolved.
Show resolved Hide resolved

# Build the Trainer
print('Building trainer...')
Expand Down
Loading
Loading