diff --git a/llmfoundry/models/hf/hf_causal_lm.py b/llmfoundry/models/hf/hf_causal_lm.py index d5ef2435f9..13857e9bb9 100644 --- a/llmfoundry/models/hf/hf_causal_lm.py +++ b/llmfoundry/models/hf/hf_causal_lm.py @@ -65,10 +65,7 @@ def __init__(self, om_model_config: Union[DictConfig, nn.Module], tokenizer: PreTrainedTokenizerBase): # set up training and eval metrics - train_metrics = [ - LanguageCrossEntropy(), - LanguagePerplexity(), - ] + train_metrics = [LanguageCrossEntropy(), LanguagePerplexity()] eval_metrics = [ LanguageCrossEntropy(), LanguagePerplexity(), @@ -92,6 +89,9 @@ def __init__(self, om_model_config: Union[DictConfig, 'which is not significantly slower and not compatible with the LLM foundry training code, rather than the code release by MosaicML.' ) + if not om_model_config.get('use_train_metrics', True): + train_metrics = [] + # load the model config trust_remote_code = om_model_config.get('trust_remote_code', True) use_auth_token = om_model_config.get('use_auth_token', False) @@ -109,6 +109,7 @@ def __init__(self, om_model_config: Union[DictConfig, ) attr = getattr(config, k) + # attempt to disallow typos in nested configs if isinstance(attr, Mapping): extra_keys = [ _k for _k in v.keys() if _k not in attr.keys() @@ -120,6 +121,10 @@ def __init__(self, om_model_config: Union[DictConfig, f'Expected (a subset of) keys: {list(attr.keys())}.' ) getattr(config, k).update(v) + # necessary case to allow for rope_scaling to be overriden in llama config + elif attr is None and isinstance(v, Mapping): + setattr(config, k, {}) + getattr(config, k).update(v) else: setattr(config, k, v) diff --git a/llmfoundry/models/mpt/modeling_mpt.py b/llmfoundry/models/mpt/modeling_mpt.py index b1dff15398..cd162195b6 100644 --- a/llmfoundry/models/mpt/modeling_mpt.py +++ b/llmfoundry/models/mpt/modeling_mpt.py @@ -694,7 +694,9 @@ def __init__( hf_config = MPTConfig.from_dict(resolved_om_model_config) model = MPTForCausalLM(hf_config) - train_metrics = [LanguageCrossEntropy(), LanguagePerplexity()] + use_train_metrics = om_model_config.get('use_train_metrics', True) + train_metrics = [LanguageCrossEntropy(), + LanguagePerplexity()] if use_train_metrics else [] eval_metrics = [ LanguageCrossEntropy(), LanguagePerplexity(), diff --git a/mcli/mcli-llama2-finetune.yaml b/mcli/mcli-llama2-finetune.yaml index 89c9c0cd9c..ae8f57abb6 100644 --- a/mcli/mcli-llama2-finetune.yaml +++ b/mcli/mcli-llama2-finetune.yaml @@ -2,6 +2,7 @@ integrations: - integration_type: git_repo git_repo: mosaicml/llm-foundry git_branch: v0.3.0 + # git_commit: # OR use your commit hash pip_install: -e .[gpu] ssh_clone: false # Should be true if using a private repo diff --git a/mcli/mcli-openai-eval.yaml b/mcli/mcli-openai-eval.yaml index 6275d9d578..0b770626b9 100644 --- a/mcli/mcli-openai-eval.yaml +++ b/mcli/mcli-openai-eval.yaml @@ -1,8 +1,8 @@ integrations: - integration_type: git_repo git_repo: mosaicml/llm-foundry - git_branch: # use your branch - # git_commit: 29d65cc26853c09f6de7542978056ddb0b07e98c # OR use your commit hash + git_branch: v0.3.0 + # git_commit: # OR use your commit hash pip_install: -e ".[gpu,openai]" ssh_clone: false # Should be true if using a private repo diff --git a/mcli/mcli-pretokenize-oci-upload.yaml b/mcli/mcli-pretokenize-oci-upload.yaml index 8163d8c3bd..b585b5f5f2 100644 --- a/mcli/mcli-pretokenize-oci-upload.yaml +++ b/mcli/mcli-pretokenize-oci-upload.yaml @@ -14,7 +14,7 @@ integrations: - oci-cli==3.23.2 - integration_type: git_repo git_repo: mosaicml/llm-foundry - git_branch: v0.2.0 + git_branch: v0.3.0 # git_commit: # OR use your commit hash pip_install: '.' ssh_clone: false # Should be true if using a private repo diff --git a/tests/test_hf_config.py b/tests/test_hf_config.py index 99d01f309f..5b3bb3d150 100644 --- a/tests/test_hf_config.py +++ b/tests/test_hf_config.py @@ -1,6 +1,7 @@ # Copyright 2022 MosaicML LLM Foundry authors # SPDX-License-Identifier: Apache-2.0 +import os import tempfile from copy import deepcopy from pathlib import Path @@ -139,3 +140,30 @@ def test_hf_config_override( assert getattr(hf_model.config, k)[_k] == _v else: assert getattr(hf_model.config, k) == v + + +@pytest.mark.skipif('HUGGING_FACE_HUB_TOKEN' not in os.environ, + reason='CI does not have access to llama2') +def test_rope_scaling_override(): + model_cfg = { + 'name': 'hf_causal_lm', + 'pretrained_model_name_or_path': 'meta-llama/Llama-2-7b-hf', + 'config_overrides': { + 'num_hidden_layers': 2, + 'hidden_size': 32, + 'intermediate_size': 64, + 'rope_scaling': { + 'type': 'dynamic', + 'factor': 0.5 + } + }, + 'use_auth_token': True, + 'pretrained': False, + 'init_device': 'cpu', + } + model_cfg = om.create(model_cfg) + + model = COMPOSER_MODEL_REGISTRY[model_cfg.name](model_cfg, tokenizer=None) + # This would error if the config isn't parsed into a proper dictionary + model.get_metadata() + assert model.config.rope_scaling == {'type': 'dynamic', 'factor': 0.5}