Skip to content

Commit

Permalink
Merge branch 'main' into mlflow-log-model
Browse files Browse the repository at this point in the history
  • Loading branch information
dakinggg authored Oct 4, 2023
2 parents 5b5f039 + cb1d94a commit 15455bc
Show file tree
Hide file tree
Showing 6 changed files with 44 additions and 8 deletions.
13 changes: 9 additions & 4 deletions llmfoundry/models/hf/hf_causal_lm.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,10 +65,7 @@ def __init__(self, om_model_config: Union[DictConfig,
nn.Module],
tokenizer: PreTrainedTokenizerBase):
# set up training and eval metrics
train_metrics = [
LanguageCrossEntropy(),
LanguagePerplexity(),
]
train_metrics = [LanguageCrossEntropy(), LanguagePerplexity()]
eval_metrics = [
LanguageCrossEntropy(),
LanguagePerplexity(),
Expand All @@ -92,6 +89,9 @@ def __init__(self, om_model_config: Union[DictConfig,
'which is not significantly slower and not compatible with the LLM foundry training code, rather than the code release by MosaicML.'
)

if not om_model_config.get('use_train_metrics', True):
train_metrics = []

# load the model config
trust_remote_code = om_model_config.get('trust_remote_code', True)
use_auth_token = om_model_config.get('use_auth_token', False)
Expand All @@ -109,6 +109,7 @@ def __init__(self, om_model_config: Union[DictConfig,
)

attr = getattr(config, k)
# attempt to disallow typos in nested configs
if isinstance(attr, Mapping):
extra_keys = [
_k for _k in v.keys() if _k not in attr.keys()
Expand All @@ -120,6 +121,10 @@ def __init__(self, om_model_config: Union[DictConfig,
f'Expected (a subset of) keys: {list(attr.keys())}.'
)
getattr(config, k).update(v)
# necessary case to allow for rope_scaling to be overriden in llama config
elif attr is None and isinstance(v, Mapping):
setattr(config, k, {})
getattr(config, k).update(v)
else:
setattr(config, k, v)

Expand Down
4 changes: 3 additions & 1 deletion llmfoundry/models/mpt/modeling_mpt.py
Original file line number Diff line number Diff line change
Expand Up @@ -694,7 +694,9 @@ def __init__(
hf_config = MPTConfig.from_dict(resolved_om_model_config)
model = MPTForCausalLM(hf_config)

train_metrics = [LanguageCrossEntropy(), LanguagePerplexity()]
use_train_metrics = om_model_config.get('use_train_metrics', True)
train_metrics = [LanguageCrossEntropy(),
LanguagePerplexity()] if use_train_metrics else []
eval_metrics = [
LanguageCrossEntropy(),
LanguagePerplexity(),
Expand Down
1 change: 1 addition & 0 deletions mcli/mcli-llama2-finetune.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ integrations:
- integration_type: git_repo
git_repo: mosaicml/llm-foundry
git_branch: v0.3.0
# git_commit: # OR use your commit hash
pip_install: -e .[gpu]
ssh_clone: false # Should be true if using a private repo

Expand Down
4 changes: 2 additions & 2 deletions mcli/mcli-openai-eval.yaml
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
integrations:
- integration_type: git_repo
git_repo: mosaicml/llm-foundry
git_branch: # use your branch
# git_commit: 29d65cc26853c09f6de7542978056ddb0b07e98c # OR use your commit hash
git_branch: v0.3.0
# git_commit: # OR use your commit hash
pip_install: -e ".[gpu,openai]"
ssh_clone: false # Should be true if using a private repo

Expand Down
2 changes: 1 addition & 1 deletion mcli/mcli-pretokenize-oci-upload.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ integrations:
- oci-cli==3.23.2
- integration_type: git_repo
git_repo: mosaicml/llm-foundry
git_branch: v0.2.0
git_branch: v0.3.0
# git_commit: # OR use your commit hash
pip_install: '.'
ssh_clone: false # Should be true if using a private repo
Expand Down
28 changes: 28 additions & 0 deletions tests/test_hf_config.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
# Copyright 2022 MosaicML LLM Foundry authors
# SPDX-License-Identifier: Apache-2.0

import os
import tempfile
from copy import deepcopy
from pathlib import Path
Expand Down Expand Up @@ -139,3 +140,30 @@ def test_hf_config_override(
assert getattr(hf_model.config, k)[_k] == _v
else:
assert getattr(hf_model.config, k) == v


@pytest.mark.skipif('HUGGING_FACE_HUB_TOKEN' not in os.environ,
reason='CI does not have access to llama2')
def test_rope_scaling_override():
model_cfg = {
'name': 'hf_causal_lm',
'pretrained_model_name_or_path': 'meta-llama/Llama-2-7b-hf',
'config_overrides': {
'num_hidden_layers': 2,
'hidden_size': 32,
'intermediate_size': 64,
'rope_scaling': {
'type': 'dynamic',
'factor': 0.5
}
},
'use_auth_token': True,
'pretrained': False,
'init_device': 'cpu',
}
model_cfg = om.create(model_cfg)

model = COMPOSER_MODEL_REGISTRY[model_cfg.name](model_cfg, tokenizer=None)
# This would error if the config isn't parsed into a proper dictionary
model.get_metadata()
assert model.config.rope_scaling == {'type': 'dynamic', 'factor': 0.5}

0 comments on commit 15455bc

Please sign in to comment.