diff --git a/llmfoundry/metrics/__init__.py b/llmfoundry/metrics/__init__.py index db4beba80e..116b6dd08c 100644 --- a/llmfoundry/metrics/__init__.py +++ b/llmfoundry/metrics/__init__.py @@ -1,8 +1,71 @@ # Copyright 2024 MosaicML LLM Foundry authors # SPDX-License-Identifier: Apache-2.0 +from composer.metrics import (InContextLearningCodeEvalAccuracy, + InContextLearningLMAccuracy, + InContextLearningLMExpectedCalibrationError, + InContextLearningMCExpectedCalibrationError, + InContextLearningMultipleChoiceAccuracy, + InContextLearningQAAccuracy, MaskedAccuracy) +from composer.metrics.nlp import LanguageCrossEntropy, LanguagePerplexity + from llmfoundry.metrics.token_acc import TokenAccuracy +from llmfoundry.registry import metrics + +metrics.register('token_accuracy', func=TokenAccuracy) +metrics.register('lm_accuracy', func=InContextLearningLMAccuracy) +metrics.register('lm_expected_calibration_error', + func=InContextLearningLMExpectedCalibrationError) +metrics.register('mc_expected_calibration_error', + func=InContextLearningMCExpectedCalibrationError) +metrics.register('mc_accuracy', func=InContextLearningMultipleChoiceAccuracy) +metrics.register('qa_accuracy', func=InContextLearningQAAccuracy) +metrics.register('code_eval_accuracy', func=InContextLearningCodeEvalAccuracy) +metrics.register('language_cross_entropy', func=LanguageCrossEntropy) +metrics.register('language_perplexity', func=LanguagePerplexity) +metrics.register('masked_accuracy', func=MaskedAccuracy) + +DEFAULT_CAUSAL_LM_TRAIN_METRICS = [ + 'language_cross_entropy', + 'language_perplexity', + 'token_accuracy', +] + +DEFAULT_CAUSAL_LM_EVAL_METRICS = [ + 'language_cross_entropy', + 'language_perplexity', + 'token_accuracy', + 'lm_accuracy', + 'lm_expected_calibration_error', + 'mc_expected_calibration_error', + 'mc_accuracy', + 'qa_accuracy', + 'code_eval_accuracy', +] + +DEFAULT_PREFIX_LM_METRICS = [ + 'language_cross_entropy', + 'masked_accuracy', +] + +DEFAULT_ENC_DEC_METRICS = [ + 'language_cross_entropy', + 'masked_accuracy', +] __all__ = [ 'TokenAccuracy', + 'InContextLearningLMAccuracy', + 'InContextLearningLMExpectedCalibrationError', + 'InContextLearningMCExpectedCalibrationError', + 'InContextLearningMultipleChoiceAccuracy', + 'InContextLearningQAAccuracy', + 'InContextLearningCodeEvalAccuracy', + 'LanguageCrossEntropy', + 'LanguagePerplexity', + 'MaskedAccuracy', + 'DEFAULT_CAUSAL_LM_TRAIN_METRICS', + 'DEFAULT_CAUSAL_LM_EVAL_METRICS', + 'DEFAULT_PREFIX_LM_METRICS', + 'DEFAULT_ENC_DEC_METRICS', ] diff --git a/llmfoundry/models/hf/hf_causal_lm.py b/llmfoundry/models/hf/hf_causal_lm.py index d250239931..7640a7b8a8 100644 --- a/llmfoundry/models/hf/hf_causal_lm.py +++ b/llmfoundry/models/hf/hf_causal_lm.py @@ -8,21 +8,14 @@ import warnings from typing import TYPE_CHECKING, Any, Dict, Mapping -# required for loading a python model into composer -from composer.metrics.nlp import (InContextLearningCodeEvalAccuracy, - InContextLearningLMAccuracy, - InContextLearningLMExpectedCalibrationError, - InContextLearningMCExpectedCalibrationError, - InContextLearningMultipleChoiceAccuracy, - InContextLearningQAAccuracy, - LanguageCrossEntropy, LanguagePerplexity) from composer.models.huggingface import peft_installed from composer.utils import dist from omegaconf import DictConfig from transformers import (AutoConfig, AutoModelForCausalLM, PreTrainedModel, PreTrainedTokenizerBase) -from llmfoundry.metrics import TokenAccuracy +from llmfoundry.metrics import (DEFAULT_CAUSAL_LM_EVAL_METRICS, + DEFAULT_CAUSAL_LM_TRAIN_METRICS) from llmfoundry.models.hf.hf_fsdp import hf_get_init_device from llmfoundry.models.hf.model_wrapper import HuggingFaceModelWithZLoss from llmfoundry.models.layers.attention import is_flash_v2_installed @@ -72,6 +65,8 @@ class ComposerHFCausalLM(HuggingFaceModelWithZLoss): def __init__(self, om_model_config: DictConfig, tokenizer: PreTrainedTokenizerBase): + from llmfoundry.utils.builders import build_metric + pretrained_model_name_or_path = om_model_config.pretrained_model_name_or_path pretrained_lora_id_or_path = om_model_config.get( 'pretrained_lora_id_or_path', None) @@ -123,25 +118,17 @@ def __init__(self, om_model_config: DictConfig, 'PEFT is not installed, but peft_config was passed. Please install LLM Foundry with the peft extra to use peft_config.' ) - # Set up training and eval metrics + use_train_metrics = om_model_config.get('use_train_metrics', True) + train_metric_names = DEFAULT_CAUSAL_LM_TRAIN_METRICS + om_model_config.get( + 'additional_train_metrics', []) train_metrics = [ - LanguageCrossEntropy(), - LanguagePerplexity(), - TokenAccuracy() - ] + build_metric(metric, {}) for metric in train_metric_names + ] if use_train_metrics else [] + eval_metric_names = DEFAULT_CAUSAL_LM_EVAL_METRICS + om_model_config.get( + 'additional_eval_metrics', []) eval_metrics = [ - LanguageCrossEntropy(), - LanguagePerplexity(), - TokenAccuracy(), - InContextLearningLMAccuracy(), - InContextLearningMultipleChoiceAccuracy(), - InContextLearningQAAccuracy(), - InContextLearningCodeEvalAccuracy(), - InContextLearningLMExpectedCalibrationError(), - InContextLearningMCExpectedCalibrationError() + build_metric(metric, {}) for metric in eval_metric_names ] - if not om_model_config.get('use_train_metrics', True): - train_metrics = [] # Construct the Hugging Face config to use config = AutoConfig.from_pretrained( diff --git a/llmfoundry/models/hf/hf_prefix_lm.py b/llmfoundry/models/hf/hf_prefix_lm.py index 7152dfae70..67060a02b8 100644 --- a/llmfoundry/models/hf/hf_prefix_lm.py +++ b/llmfoundry/models/hf/hf_prefix_lm.py @@ -7,12 +7,12 @@ from typing import Mapping, MutableMapping -from composer.metrics.nlp import LanguageCrossEntropy, MaskedAccuracy from composer.utils import dist from omegaconf import DictConfig from transformers import (AutoConfig, AutoModelForCausalLM, PreTrainedTokenizerBase) +from llmfoundry.metrics import DEFAULT_PREFIX_LM_METRICS from llmfoundry.models.hf.hf_fsdp import hf_get_init_device from llmfoundry.models.hf.model_wrapper import HuggingFaceModelWithZLoss from llmfoundry.models.utils import (adapt_tokenizer_for_denoising, @@ -22,9 +22,6 @@ __all__ = ['ComposerHFPrefixLM'] -# HuggingFace hardcodes the ignore index to -100 -_HF_IGNORE_INDEX = -100 - class ComposerHFPrefixLM(HuggingFaceModelWithZLoss): """Configures a :class:`.HuggingFaceModel` around a Prefix LM. @@ -68,6 +65,8 @@ class ComposerHFPrefixLM(HuggingFaceModelWithZLoss): def __init__(self, om_model_config: DictConfig, tokenizer: PreTrainedTokenizerBase): + from llmfoundry.utils.builders import build_metric + config = AutoConfig.from_pretrained( om_model_config.pretrained_model_name_or_path, trust_remote_code=om_model_config.get('trust_remote_code', True), @@ -130,8 +129,8 @@ def __init__(self, om_model_config: DictConfig, model = convert_hf_causal_lm_to_prefix_lm(model) metrics = [ - LanguageCrossEntropy(ignore_index=_HF_IGNORE_INDEX), - MaskedAccuracy(ignore_index=_HF_IGNORE_INDEX) + build_metric(metric, {}) for metric in DEFAULT_PREFIX_LM_METRICS + + om_model_config.get('additional_train_metrics', []) ] composer_model = super().__init__(model=model, diff --git a/llmfoundry/models/hf/hf_t5.py b/llmfoundry/models/hf/hf_t5.py index b7ed766e07..d059f54f85 100644 --- a/llmfoundry/models/hf/hf_t5.py +++ b/llmfoundry/models/hf/hf_t5.py @@ -7,12 +7,12 @@ from typing import Mapping -from composer.metrics.nlp import LanguageCrossEntropy, MaskedAccuracy from composer.utils import dist from omegaconf import DictConfig from transformers import (AutoConfig, PreTrainedTokenizerBase, T5ForConditionalGeneration) +from llmfoundry.metrics import DEFAULT_ENC_DEC_METRICS from llmfoundry.models.hf.hf_fsdp import hf_get_init_device from llmfoundry.models.hf.model_wrapper import HuggingFaceModelWithZLoss from llmfoundry.models.utils import (adapt_tokenizer_for_denoising, @@ -21,9 +21,6 @@ __all__ = ['ComposerHFT5'] -# HuggingFace hardcodes the ignore index to -100 -_HF_IGNORE_INDEX = -100 - @experimental('ComposerHFT5') class ComposerHFT5(HuggingFaceModelWithZLoss): @@ -59,6 +56,8 @@ class ComposerHFT5(HuggingFaceModelWithZLoss): def __init__(self, om_model_config: DictConfig, tokenizer: PreTrainedTokenizerBase): + from llmfoundry.utils.builders import build_metric + config = AutoConfig.from_pretrained( om_model_config.pretrained_model_name_or_path, trust_remote_code=om_model_config.get('trust_remote_code', True), @@ -122,8 +121,8 @@ def __init__(self, om_model_config: DictConfig, f'init_device="{init_device}" must be either "cpu" or "meta".') metrics = [ - LanguageCrossEntropy(ignore_index=_HF_IGNORE_INDEX), - MaskedAccuracy(ignore_index=_HF_IGNORE_INDEX) + build_metric(metric, {}) for metric in DEFAULT_ENC_DEC_METRICS + + om_model_config.get('additional_train_metrics', []) ] composer_model = super().__init__(model=model, diff --git a/llmfoundry/models/inference_api_wrapper/interface.py b/llmfoundry/models/inference_api_wrapper/interface.py index 13ea4ff017..3ee57d2f46 100644 --- a/llmfoundry/models/inference_api_wrapper/interface.py +++ b/llmfoundry/models/inference_api_wrapper/interface.py @@ -6,31 +6,22 @@ import torch from composer.core.types import Batch from composer.metrics import InContextLearningMetric -from composer.metrics.nlp import (InContextLearningLMAccuracy, - InContextLearningLMExpectedCalibrationError, - InContextLearningMCExpectedCalibrationError, - InContextLearningMultipleChoiceAccuracy, - InContextLearningQAAccuracy, - LanguageCrossEntropy, LanguagePerplexity) from composer.models import ComposerModel from torchmetrics import Metric from transformers import AutoTokenizer +from llmfoundry.metrics import DEFAULT_CAUSAL_LM_EVAL_METRICS +from llmfoundry.utils.builders import build_metric + class InferenceAPIEvalWrapper(ComposerModel): def __init__(self, model_cfg: Dict, tokenizer: AutoTokenizer): self.tokenizer = tokenizer self.labels = None - # set up training and eval metrics eval_metrics = [ - LanguageCrossEntropy(), - LanguagePerplexity(), - InContextLearningLMAccuracy(), - InContextLearningMultipleChoiceAccuracy(), - InContextLearningQAAccuracy(), - InContextLearningLMExpectedCalibrationError(), - InContextLearningMCExpectedCalibrationError() + build_metric(metric, {}) + for metric in DEFAULT_CAUSAL_LM_EVAL_METRICS ] self.eval_metrics = { metric.__class__.__name__: metric for metric in eval_metrics diff --git a/llmfoundry/models/mpt/modeling_mpt.py b/llmfoundry/models/mpt/modeling_mpt.py index 0a5e2ac13d..bf93bd71f0 100644 --- a/llmfoundry/models/mpt/modeling_mpt.py +++ b/llmfoundry/models/mpt/modeling_mpt.py @@ -16,17 +16,9 @@ import torch import torch.nn as nn import torch.nn.functional as F -from composer.metrics import (InContextLearningCodeEvalAccuracy, - InContextLearningLMAccuracy, - InContextLearningLMExpectedCalibrationError, - InContextLearningMCExpectedCalibrationError, - InContextLearningMultipleChoiceAccuracy, - InContextLearningQAAccuracy) -from composer.metrics.nlp import LanguageCrossEntropy, LanguagePerplexity from composer.models import HuggingFaceModel from composer.utils import dist -from llmfoundry.metrics import TokenAccuracy from llmfoundry.models.layers.attention import is_flash_v2_installed from llmfoundry.models.layers.norm import NORM_CLASS_REGISTRY @@ -1034,27 +1026,27 @@ def __init__( om_model_config: DictConfig, tokenizer: Optional[PreTrainedTokenizerBase] = None, ): + from llmfoundry.metrics import (DEFAULT_CAUSAL_LM_EVAL_METRICS, + DEFAULT_CAUSAL_LM_TRAIN_METRICS) + from llmfoundry.utils.builders import build_metric + resolved_om_model_config = om.to_container(om_model_config, resolve=True) + assert isinstance(resolved_om_model_config, dict) + hf_config = MPTConfig.from_dict(resolved_om_model_config) model = MPTForCausalLM(hf_config) use_train_metrics = om_model_config.get('use_train_metrics', True) + train_metric_names = DEFAULT_CAUSAL_LM_TRAIN_METRICS + resolved_om_model_config.get( + 'additional_train_metrics', []) train_metrics = [ - LanguageCrossEntropy(), - LanguagePerplexity(), - TokenAccuracy() + build_metric(metric, {}) for metric in train_metric_names ] if use_train_metrics else [] + eval_metric_names = DEFAULT_CAUSAL_LM_EVAL_METRICS + resolved_om_model_config.get( + 'additional_eval_metrics', []) eval_metrics = [ - LanguageCrossEntropy(), - LanguagePerplexity(), - TokenAccuracy(), - InContextLearningLMAccuracy(), - InContextLearningMultipleChoiceAccuracy(), - InContextLearningQAAccuracy(), - InContextLearningCodeEvalAccuracy(), - InContextLearningLMExpectedCalibrationError(), - InContextLearningMCExpectedCalibrationError(), + build_metric(metric, {}) for metric in eval_metric_names ] super().__init__( diff --git a/llmfoundry/registry.py b/llmfoundry/registry.py index cb4ae447df..86dc3513b6 100644 --- a/llmfoundry/registry.py +++ b/llmfoundry/registry.py @@ -6,6 +6,7 @@ from composer.loggers import LoggerDestination from composer.optim import ComposerScheduler from torch.optim import Optimizer +from torchmetrics import Metric from llmfoundry.interfaces import CallbackWithConfig from llmfoundry.utils.registry_utils import create_registry @@ -64,6 +65,13 @@ entry_points=True, description=_schedulers_description) +_metrics_description = """The metrics registry is used to register classes that implement the torchmetrics.Metric interface.""" +metrics = create_registry('llmfoundry', + 'metrics', + generic_type=Type[Metric], + entry_points=True, + description=_metrics_description) + __all__ = [ 'loggers', 'callbacks', @@ -71,4 +79,5 @@ 'optimizers', 'algorithms', 'schedulers', + 'metrics', ] diff --git a/llmfoundry/utils/__init__.py b/llmfoundry/utils/__init__.py index ab646936f9..86e6ea2c6b 100644 --- a/llmfoundry/utils/__init__.py +++ b/llmfoundry/utils/__init__.py @@ -2,11 +2,8 @@ # SPDX-License-Identifier: Apache-2.0 from llmfoundry.utils.builders import (build_algorithm, build_callback, - build_evaluators, - build_icl_data_and_gauntlet, - build_icl_evaluators, build_logger, - build_optimizer, build_scheduler, - build_tokenizer) + build_logger, build_optimizer, + build_scheduler, build_tokenizer) from llmfoundry.utils.checkpoint_conversion_helpers import ( convert_and_save_ft_weights, get_hf_tokenizer_from_composer_state_dict, load_tokenizer) @@ -34,9 +31,6 @@ __all__ = [ 'build_algorithm', 'build_callback', - 'build_evaluators', - 'build_icl_data_and_gauntlet', - 'build_icl_evaluators', 'build_logger', 'build_optimizer', 'build_scheduler', diff --git a/llmfoundry/utils/builders.py b/llmfoundry/utils/builders.py index a6354c3dbe..b064b00759 100644 --- a/llmfoundry/utils/builders.py +++ b/llmfoundry/utils/builders.py @@ -18,6 +18,7 @@ from omegaconf import DictConfig, ListConfig from omegaconf import OmegaConf as om from torch.optim.optimizer import Optimizer +from torchmetrics import Metric from transformers import AutoTokenizer, PreTrainedTokenizerBase from llmfoundry import registry @@ -38,6 +39,7 @@ 'build_optimizer', 'build_scheduler', 'build_tokenizer', + 'build_metric', ] @@ -155,12 +157,14 @@ def build_icl_data_and_gauntlet( def build_callback( name: str, - kwargs: Dict[str, Any], + kwargs: Optional[Dict[str, Any]] = None, config: Any = None, ) -> Callback: """Builds a callback from the registry.""" registry_to_use = registry.callbacks if name in registry.callbacks_with_config: + if kwargs is None: + kwargs = {} if 'config' in kwargs: raise ValueError( f'`config` is a reserved keyword for callbacks with config. Please remove it from the kwargs.' @@ -176,7 +180,8 @@ def build_callback( kwargs=kwargs) -def build_logger(name: str, kwargs: Dict[str, Any]) -> LoggerDestination: +def build_logger(name: str, + kwargs: Optional[Dict[str, Any]] = None) -> LoggerDestination: """Builds a logger from the registry.""" return construct_from_registry(name=name, registry=registry.loggers, @@ -186,7 +191,8 @@ def build_logger(name: str, kwargs: Dict[str, Any]) -> LoggerDestination: kwargs=kwargs) -def build_algorithm(name: str, kwargs: Dict[str, Any]) -> Algorithm: +def build_algorithm(name: str, + kwargs: Optional[Dict[str, Any]] = None) -> Algorithm: """Builds an algorithm from the registry.""" return construct_from_registry(name=name, registry=registry.algorithms, @@ -196,9 +202,19 @@ def build_algorithm(name: str, kwargs: Dict[str, Any]) -> Algorithm: kwargs=kwargs) +def build_metric(name: str, kwargs: Optional[Dict[str, Any]] = None) -> Metric: + """Builds a metric from the registry.""" + return construct_from_registry(name=name, + registry=registry.metrics, + partial_function=True, + pre_validation_function=Metric, + post_validation_function=None, + kwargs=kwargs) + + def _extract_param_groups( model: torch.nn.Module, - optimizer_config: Dict[str, Any], + optimizer_config: Optional[Dict[str, Any]] = None, ) -> Union[Iterable[torch.Tensor], Iterable[Dict[str, Any]]]: """Extracts parameter groups defined in the optimizer config. @@ -260,6 +276,9 @@ def _extract_param_groups( torch.Tensor's or dict's. Specifies what Tensors should be optimized and their param groupings. """ + if optimizer_config is None: + return model.parameters() + if 'disable_grad' in optimizer_config.keys(): str_matches = optimizer_config.pop('disable_grad') if isinstance(str_matches, str): @@ -296,11 +315,16 @@ def _extract_param_groups( return model.parameters() -def build_optimizer(model: torch.nn.Module, name: str, - optimizer_config: Dict[str, Any]) -> Optimizer: +def build_optimizer( + model: torch.nn.Module, + name: str, + optimizer_config: Optional[Dict[str, Any]] = None) -> Optimizer: params = _extract_param_groups(model, optimizer_config) kwargs = optimizer_config + + if kwargs is None: + kwargs = {} if 'params' in kwargs: raise ValueError( 'The `params` will be automatically extracted from the model and ' + @@ -316,8 +340,9 @@ def build_optimizer(model: torch.nn.Module, name: str, kwargs=kwargs) -def build_scheduler(name: str, - scheduler_config: Dict[str, Any]) -> ComposerScheduler: +def build_scheduler( + name: str, + scheduler_config: Optional[Dict[str, Any]] = None) -> ComposerScheduler: return construct_from_registry( name=name, registry=registry.schedulers, diff --git a/llmfoundry/utils/registry_utils.py b/llmfoundry/utils/registry_utils.py index 3c8cca276c..7089996a13 100644 --- a/llmfoundry/utils/registry_utils.py +++ b/llmfoundry/utils/registry_utils.py @@ -17,7 +17,10 @@ class TypedRegistry(catalogue.Registry, Generic[T]): - """A thin wrapper around catalogue.Registry to add static typing.""" + """A thin wrapper around catalogue.Registry to add static typing and. + + descriptions. + """ def __init__(self, namespace: Sequence[str], diff --git a/tests/test_registry.py b/tests/test_registry.py index 293e61a011..df8fda8d9f 100644 --- a/tests/test_registry.py +++ b/tests/test_registry.py @@ -21,8 +21,13 @@ def test_expected_registries_exist(): if isinstance(getattr(registry, name), registry_utils.TypedRegistry) } expected_registry_names = { - 'loggers', 'optimizers', 'schedulers', 'callbacks', 'algorithms', - 'callbacks_with_config' + 'loggers', + 'optimizers', + 'schedulers', + 'callbacks', + 'algorithms', + 'callbacks_with_config', + 'metrics', } assert existing_registries == expected_registry_names