From 9f62ce1d63badb38863d02517baa0782a5173f19 Mon Sep 17 00:00:00 2001 From: Daniel King Date: Mon, 2 Oct 2023 18:04:12 -0700 Subject: [PATCH 1/6] fix rope scaling override --- llmfoundry/models/hf/hf_causal_lm.py | 5 +++++ tests/test_hf_config.py | 22 ++++++++++++++++++++++ 2 files changed, 27 insertions(+) diff --git a/llmfoundry/models/hf/hf_causal_lm.py b/llmfoundry/models/hf/hf_causal_lm.py index d5ef2435f9..78a5637e0e 100644 --- a/llmfoundry/models/hf/hf_causal_lm.py +++ b/llmfoundry/models/hf/hf_causal_lm.py @@ -109,6 +109,7 @@ def __init__(self, om_model_config: Union[DictConfig, ) attr = getattr(config, k) + # attempt to disallow typos in nested configs if isinstance(attr, Mapping): extra_keys = [ _k for _k in v.keys() if _k not in attr.keys() @@ -120,6 +121,10 @@ def __init__(self, om_model_config: Union[DictConfig, f'Expected (a subset of) keys: {list(attr.keys())}.' ) getattr(config, k).update(v) + # necessary case to allow for rope_scaling to be overriden in llama config + elif attr is None and isinstance(v, Mapping): + setattr(config, k, {}) + getattr(config, k).update(v) else: setattr(config, k, v) diff --git a/tests/test_hf_config.py b/tests/test_hf_config.py index 99d01f309f..35670b1566 100644 --- a/tests/test_hf_config.py +++ b/tests/test_hf_config.py @@ -139,3 +139,25 @@ def test_hf_config_override( assert getattr(hf_model.config, k)[_k] == _v else: assert getattr(hf_model.config, k) == v + +def test_rope_scaling_override(): + model_cfg = { + 'name': 'hf_causal_lm', + 'pretrained_model_name_or_path': 'meta-llama/Llama-2-7b-hf', + 'config_overrides': { + 'num_hidden_layers': 2, + 'hidden_size': 32, + 'intermediate_size': 64, + 'rope_scaling': { + "type": 'dynamic', "factor": 0.5 + } + }, + 'use_auth_token': True, + 'pretrained': False, + 'init_device': 'cpu', + } + model_cfg = om.create(model_cfg) + + model = COMPOSER_MODEL_REGISTRY[model_cfg.name](model_cfg, tokenizer=None) + model.get_metadata() + assert model.config.rope_scaling == {"type": 'dynamic', "factor": 0.5} \ No newline at end of file From 7beac1a960ca7521a866867e4af4537ba498086d Mon Sep 17 00:00:00 2001 From: Daniel King Date: Mon, 2 Oct 2023 18:04:46 -0700 Subject: [PATCH 2/6] precommit --- tests/test_hf_config.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/tests/test_hf_config.py b/tests/test_hf_config.py index 35670b1566..aaa76c51e6 100644 --- a/tests/test_hf_config.py +++ b/tests/test_hf_config.py @@ -140,6 +140,7 @@ def test_hf_config_override( else: assert getattr(hf_model.config, k) == v + def test_rope_scaling_override(): model_cfg = { 'name': 'hf_causal_lm', @@ -149,7 +150,8 @@ def test_rope_scaling_override(): 'hidden_size': 32, 'intermediate_size': 64, 'rope_scaling': { - "type": 'dynamic', "factor": 0.5 + 'type': 'dynamic', + 'factor': 0.5 } }, 'use_auth_token': True, @@ -160,4 +162,4 @@ def test_rope_scaling_override(): model = COMPOSER_MODEL_REGISTRY[model_cfg.name](model_cfg, tokenizer=None) model.get_metadata() - assert model.config.rope_scaling == {"type": 'dynamic', "factor": 0.5} \ No newline at end of file + assert model.config.rope_scaling == {'type': 'dynamic', 'factor': 0.5} From 35b6facb2532386ffea1dcca1f6eaaae02d81151 Mon Sep 17 00:00:00 2001 From: Daniel King Date: Mon, 2 Oct 2023 18:05:48 -0700 Subject: [PATCH 3/6] add comment --- tests/test_hf_config.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/test_hf_config.py b/tests/test_hf_config.py index aaa76c51e6..0ce66a42c3 100644 --- a/tests/test_hf_config.py +++ b/tests/test_hf_config.py @@ -161,5 +161,6 @@ def test_rope_scaling_override(): model_cfg = om.create(model_cfg) model = COMPOSER_MODEL_REGISTRY[model_cfg.name](model_cfg, tokenizer=None) + # This would error if the config isn't parsed into a proper dictionary model.get_metadata() assert model.config.rope_scaling == {'type': 'dynamic', 'factor': 0.5} From 78ff5445189d1d6591bbe76604a226998bab087f Mon Sep 17 00:00:00 2001 From: Daniel King Date: Tue, 3 Oct 2023 09:45:22 -0700 Subject: [PATCH 4/6] skip test --- tests/test_hf_config.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tests/test_hf_config.py b/tests/test_hf_config.py index 0ce66a42c3..4e195f3442 100644 --- a/tests/test_hf_config.py +++ b/tests/test_hf_config.py @@ -3,6 +3,7 @@ import tempfile from copy import deepcopy +import os from pathlib import Path from typing import Any, Dict, Mapping @@ -141,6 +142,7 @@ def test_hf_config_override( assert getattr(hf_model.config, k) == v +@pytest.mark.skipif('HUGGING_FACE_HUB_TOKEN' not in os.environ) def test_rope_scaling_override(): model_cfg = { 'name': 'hf_causal_lm', From 575cb13919b43fc3622c15daad87ec9ee7ba1053 Mon Sep 17 00:00:00 2001 From: Daniel King Date: Tue, 3 Oct 2023 09:46:36 -0700 Subject: [PATCH 5/6] fix --- tests/test_hf_config.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_hf_config.py b/tests/test_hf_config.py index 4e195f3442..1c7f255b35 100644 --- a/tests/test_hf_config.py +++ b/tests/test_hf_config.py @@ -142,7 +142,7 @@ def test_hf_config_override( assert getattr(hf_model.config, k) == v -@pytest.mark.skipif('HUGGING_FACE_HUB_TOKEN' not in os.environ) +@pytest.mark.skipif('HUGGING_FACE_HUB_TOKEN' not in os.environ, reason='CI does not have access to llama2') def test_rope_scaling_override(): model_cfg = { 'name': 'hf_causal_lm', From 87534309fe7dc8899edaca97cc4e7239800c9277 Mon Sep 17 00:00:00 2001 From: Daniel King Date: Tue, 3 Oct 2023 09:47:07 -0700 Subject: [PATCH 6/6] precommit --- tests/test_hf_config.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/tests/test_hf_config.py b/tests/test_hf_config.py index 1c7f255b35..5b3bb3d150 100644 --- a/tests/test_hf_config.py +++ b/tests/test_hf_config.py @@ -1,9 +1,9 @@ # Copyright 2022 MosaicML LLM Foundry authors # SPDX-License-Identifier: Apache-2.0 +import os import tempfile from copy import deepcopy -import os from pathlib import Path from typing import Any, Dict, Mapping @@ -142,7 +142,8 @@ def test_hf_config_override( assert getattr(hf_model.config, k) == v -@pytest.mark.skipif('HUGGING_FACE_HUB_TOKEN' not in os.environ, reason='CI does not have access to llama2') +@pytest.mark.skipif('HUGGING_FACE_HUB_TOKEN' not in os.environ, + reason='CI does not have access to llama2') def test_rope_scaling_override(): model_cfg = { 'name': 'hf_causal_lm',