Skip to content

Commit

Permalink
fixes
Browse files Browse the repository at this point in the history
  • Loading branch information
aspfohl committed Dec 1, 2023
1 parent cde6d48 commit 1ace566
Show file tree
Hide file tree
Showing 5 changed files with 22 additions and 19 deletions.
2 changes: 1 addition & 1 deletion scripts/eval/yamls/test_eval.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ device_eval_batch_size: 4
icl_subset_num_batches: 1
icl_tasks:
- label: lambada_openai
dataset_uri: scripts/eval/local_data/language_understanding/lambada_openai.jsonl
dataset_uri: eval/local_data/language_understanding/lambada_openai.jsonl
num_fewshot: [0]
icl_task_type: language_modeling
eval_gauntlet:
Expand Down
12 changes: 0 additions & 12 deletions tests/callbacks/test_eval_gauntlet_callback.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
# Copyright 2022 MosaicML LLM Foundry authors
# SPDX-License-Identifier: Apache-2.0

import os
from typing import Dict, List, Optional

import omegaconf as om
Expand All @@ -15,17 +14,6 @@
from llmfoundry.utils.builders import build_icl_data_and_gauntlet


@pytest.fixture(autouse=True)
def set_correct_cwd():
if not os.getcwd().endswith('llm-foundry/scripts'):
os.chdir('scripts')

yield

if os.getcwd().endswith('llm-foundry/scripts'):
os.chdir('..')


class MockState(State):

def __init__(self, logger_keys: List[str], accuracy: float = 0.25) -> None:
Expand Down
7 changes: 6 additions & 1 deletion tests/data_utils.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
# Copyright 2022 MosaicML LLM Foundry authors
# SPDX-License-Identifier: Apache-2.0

import json
import os
import pathlib
import shutil
Expand Down Expand Up @@ -113,10 +114,14 @@ def create_arxiv_dataset(path: pathlib.Path) -> str:
arxiv_dir = os.path.join(path, f'my-copy-arxiv')
downloaded_split = 'train'

arxiv_path = 'data_prep/example_data/arxiv.jsonl'
if not os.getcwd().endswith('scripts'):
arxiv_path = os.path.join('scripts', arxiv_path)

main_json(
Namespace(
**{
'path': 'data_prep/example_data/arxiv.jsonl',
'path': arxiv_path,
'out_root': arxiv_dir,
'compression': None,
'split': downloaded_split,
Expand Down
11 changes: 11 additions & 0 deletions tests/scripts/eval/test_eval.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,17 @@
gpt_tiny_cfg)


@pytest.fixture(autouse=True)
def set_correct_cwd():
if not os.getcwd().endswith('llm-foundry/scripts'):
os.chdir('scripts')

yield

if os.getcwd().endswith('llm-foundry/scripts'):
os.chdir('..')


@pytest.fixture
def eval_cfg(foundry_dir: str) -> Union[om.ListConfig, om.DictConfig]:
yaml_path = os.path.join(foundry_dir, 'scripts/eval/yamls/test_eval.yaml')
Expand Down
9 changes: 4 additions & 5 deletions tests/scripts/train/test_train.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
# SPDX-License-Identifier: Apache-2.0
import copy
import pathlib
from typing import Any, Optional
from typing import Optional

import pytest
from composer.loggers import InMemoryLogger
Expand All @@ -18,8 +18,7 @@
@pytest.mark.parametrize('averages', [{
'core_average': ['language_understanding_lite']
}, None])
def test_train_gauntlet(averages: Optional[dict], set_correct_cwd: Any,
tmp_path: pathlib.Path):
def test_train_gauntlet(averages: Optional[dict], tmp_path: pathlib.Path):
"""Test training run with a small dataset."""
dataset_name = create_c4_dataset_xxsmall(tmp_path)
test_cfg = gpt_tiny_cfg(dataset_name, 'cpu')
Expand All @@ -28,7 +27,7 @@ def test_train_gauntlet(averages: Optional[dict], set_correct_cwd: Any,
'label':
'lambada_openai',
'dataset_uri':
'eval/local_data/language_understanding/lambada_openai_small.jsonl',
'scripts/eval/local_data/language_understanding/lambada_openai_small.jsonl',
'num_fewshot': [0],
'icl_task_type':
'language_modeling'
Expand Down Expand Up @@ -98,7 +97,7 @@ def test_train_gauntlet(averages: Optional[dict], set_correct_cwd: Any,
-1][-1] == 0


def test_train_multi_eval(set_correct_cwd: Any, tmp_path: pathlib.Path):
def test_train_multi_eval(tmp_path: pathlib.Path):
"""Test training run with multiple eval datasets."""
c4_dataset_name = create_c4_dataset_xxsmall(tmp_path)
test_cfg = gpt_tiny_cfg(c4_dataset_name, 'cpu')
Expand Down

0 comments on commit 1ace566

Please sign in to comment.