Skip to content

Commit

Permalink
fix relative paths
Browse files Browse the repository at this point in the history
  • Loading branch information
aspfohl committed Nov 30, 2023
1 parent 2cef8cc commit 49adcac
Show file tree
Hide file tree
Showing 28 changed files with 54 additions and 119 deletions.
2 changes: 1 addition & 1 deletion scripts/eval/yamls/test_eval.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ device_eval_batch_size: 4
icl_subset_num_batches: 1
icl_tasks:
- label: lambada_openai
dataset_uri: eval/local_data/language_understanding/lambada_openai.jsonl
dataset_uri: scripts/eval/local_data/language_understanding/lambada_openai.jsonl
num_fewshot: [0]
icl_task_type: language_modeling
eval_gauntlet:
Expand Down
1 change: 0 additions & 1 deletion tests/callbacks/__init__.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,2 @@
# Copyright 2022 MosaicML LLM Foundry authors
# SPDX-License-Identifier: Apache-2.0

5 changes: 0 additions & 5 deletions tests/data/test_dataloader.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,6 @@
import pathlib
import random
import shutil
import sys
import tempfile
from argparse import Namespace
from typing import Literal, Optional, Union
Expand All @@ -26,10 +25,6 @@
build_text_dataloader,
get_tokens_per_batch_func)
from llmfoundry.utils.builders import build_tokenizer

# Add repo root to path so we can import scripts and test it
repo_dir = os.path.abspath(os.path.join(os.path.dirname(__file__), '..'))
sys.path.append(repo_dir)
from scripts.data_prep.convert_dataset_hf import main as main_hf
from tests.data_utils import make_tiny_ft_dataset

Expand Down
2 changes: 1 addition & 1 deletion tests/data/test_icl_datasets.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
from llmfoundry.utils.builders import build_icl_evaluators


def load_icl_config(conf_path: str = 'tests/test_tasks.yaml'):
def load_icl_config(conf_path: str = 'tests/data/test_tasks.yaml'):
with open(conf_path) as f:
test_cfg = om.load(f)
return test_cfg
Expand Down
File renamed without changes.
15 changes: 6 additions & 9 deletions tests/data_utils.py
Original file line number Diff line number Diff line change
@@ -1,14 +1,8 @@
# Copyright 2022 MosaicML LLM Foundry authors
# SPDX-License-Identifier: Apache-2.0

import os
import sys

# Add repo root to path so we can import scripts and test it
repo_dir = os.path.abspath(os.path.join(os.path.dirname(__file__), '..'))
sys.path.append(repo_dir)

import json
import os
import pathlib
import shutil
from argparse import Namespace
Expand Down Expand Up @@ -139,8 +133,11 @@ def create_arxiv_dataset(path: pathlib.Path) -> str:

def gpt_tiny_cfg(dataset_name: str, device: str):
"""Create gpt tiny cfg."""
conf_path: str = os.path.join(repo_dir,
'scripts/train/yamls/pretrain/testing.yaml')
from tests.fixtures.autouse import REPO_DIR
conf_path: str = os.path.join(
REPO_DIR,
'scripts/train/yamls/pretrain/testing.yaml',
)
with open(conf_path) as f:
test_cfg = om.load(f)
assert isinstance(test_cfg, DictConfig)
Expand Down
11 changes: 11 additions & 0 deletions tests/fixtures/autouse.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,11 +2,17 @@
# SPDX-License-Identifier: Apache-2.0

import gc
import os
import sys

import pytest
import torch
from composer.utils import dist, get_device, reproducibility

# Add llm-foundry repo root to path so we can import scripts in the tests
REPO_DIR = os.path.abspath(os.path.join(os.path.dirname(__file__), '../..'))
sys.path.append(REPO_DIR)


@pytest.fixture(autouse=True)
def initialize_dist(request: pytest.FixtureRequest):
Expand All @@ -33,6 +39,11 @@ def random_seed() -> int:
return 17


@pytest.fixture
def foundry_dir() -> str:
return REPO_DIR


@pytest.fixture(autouse=True)
def seed_all(random_seed: int):
"""Sets the seed for reproducibility."""
Expand Down
1 change: 0 additions & 1 deletion tests/models/__init__.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,2 @@
# Copyright 2022 MosaicML LLM Foundry authors
# SPDX-License-Identifier: Apache-2.0

1 change: 0 additions & 1 deletion tests/models/hf/__init__.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,2 @@
# Copyright 2022 MosaicML LLM Foundry authors
# SPDX-License-Identifier: Apache-2.0

1 change: 0 additions & 1 deletion tests/models/inference_api_wrapper/__init__.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,2 @@
# Copyright 2022 MosaicML LLM Foundry authors
# SPDX-License-Identifier: Apache-2.0

1 change: 0 additions & 1 deletion tests/models/layers/__init__.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,2 @@
# Copyright 2022 MosaicML LLM Foundry authors
# SPDX-License-Identifier: Apache-2.0

1 change: 0 additions & 1 deletion tests/models/utils/__init__.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,2 @@
# Copyright 2022 MosaicML LLM Foundry authors
# SPDX-License-Identifier: Apache-2.0

1 change: 0 additions & 1 deletion tests/optim/__init__.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,2 @@
# Copyright 2022 MosaicML LLM Foundry authors
# SPDX-License-Identifier: Apache-2.0

1 change: 0 additions & 1 deletion tests/scripts/__init__.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,2 @@
# Copyright 2022 MosaicML LLM Foundry authors
# SPDX-License-Identifier: Apache-2.0

1 change: 0 additions & 1 deletion tests/scripts/data_prep/__init__.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,2 @@
# Copyright 2022 MosaicML LLM Foundry authors
# SPDX-License-Identifier: Apache-2.0

4 changes: 0 additions & 4 deletions tests/scripts/data_prep/test_convert_dataset_hf.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,13 +2,9 @@
# SPDX-License-Identifier: Apache-2.0

import os
import sys
from argparse import Namespace
from pathlib import Path

# Add repo root to path so we can import scripts and test it
repo_dir = os.path.abspath(os.path.join(os.path.dirname(__file__), '..'))
sys.path.append(repo_dir)
from scripts.data_prep.convert_dataset_hf import main as main_hf


Expand Down
5 changes: 0 additions & 5 deletions tests/scripts/data_prep/test_convert_dataset_json.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,14 +2,9 @@
# SPDX-License-Identifier: Apache-2.0

import os
import sys
from argparse import Namespace
from pathlib import Path

# Add repo root to path so we can import scripts and test it
repo_dir = os.path.abspath(os.path.join(os.path.dirname(__file__), '..'))
sys.path.append(repo_dir)

from scripts.data_prep.convert_dataset_json import main as main_json


Expand Down
8 changes: 1 addition & 7 deletions tests/scripts/data_prep/test_convert_text_to_mds.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,20 +2,14 @@
# SPDX-License-Identifier: Apache-2.0

import os
import sys

import pytest

# Add repo root to path so we can import scripts and test it
repo_dir = os.path.abspath(os.path.join(os.path.dirname(__file__), '..'))
sys.path.append(repo_dir)
import pathlib
from concurrent.futures import ProcessPoolExecutor
from glob import glob
from typing import Callable, Iterable, List
from unittest.mock import Mock, patch

import numpy as np
import pytest
from streaming import StreamingDataset
from transformers import AutoTokenizer

Expand Down
1 change: 0 additions & 1 deletion tests/scripts/eval/__init__.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,2 @@
# Copyright 2022 MosaicML LLM Foundry authors
# SPDX-License-Identifier: Apache-2.0

41 changes: 14 additions & 27 deletions tests/scripts/eval/test_eval.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,7 @@
import copy
import os
import pathlib
import sys
from typing import Any
from typing import Any, Union

import omegaconf as om
import pytest
Expand All @@ -14,32 +13,21 @@

from llmfoundry import COMPOSER_MODEL_REGISTRY
from llmfoundry.utils import build_tokenizer
from scripts.eval.eval import main # noqa: E402
from tests.data_utils import (create_arxiv_dataset, create_c4_dataset_xxsmall,
gpt_tiny_cfg)

# Add repo root to path so we can import scripts and test it
repo_dir = os.path.abspath(os.path.join(os.path.dirname(__file__), '..'))
sys.path.append(repo_dir)

from scripts.eval.eval import main # noqa: E402


@pytest.fixture(autouse=True)
def set_correct_cwd():
if not os.getcwd().endswith('llm-foundry/scripts'):
os.chdir('scripts')

yield

if os.getcwd().endswith('llm-foundry/scripts'):
os.chdir('..')
@pytest.fixture
def eval_cfg(foundry_dir: str) -> Union[om.ListConfig, om.DictConfig]:
yaml_path = os.path.join(foundry_dir, 'scripts/eval/yamls/test_eval.yaml')
with open(yaml_path, 'r', encoding='utf-8') as f:
eval_cfg = om.OmegaConf.load(f)
return eval_cfg


@pytest.fixture()
def mock_saved_model_path():
# load the eval and model config
with open('eval/yamls/test_eval.yaml', 'r', encoding='utf-8') as f:
eval_cfg = om.OmegaConf.load(f)
def mock_saved_model_path(eval_cfg: Union[om.ListConfig, om.DictConfig]):
model_cfg = eval_cfg.models[0]
# set device to cpu
device = 'cpu'
Expand All @@ -60,12 +48,11 @@ def mock_saved_model_path():
os.remove(saved_model_path)


def test_icl_eval(capfd: Any, mock_saved_model_path: Any):
with open('eval/yamls/test_eval.yaml', 'r', encoding='utf-8') as f:
test_cfg = om.OmegaConf.load(f)
test_cfg.models[0].load_path = mock_saved_model_path
assert isinstance(test_cfg, om.DictConfig)
main(test_cfg)
def test_icl_eval(eval_cfg: Union[om.ListConfig, om.DictConfig], capfd: Any,
mock_saved_model_path: Any):
eval_cfg.models[0].load_path = mock_saved_model_path
assert isinstance(eval_cfg, om.DictConfig)
main(eval_cfg)
out, _ = capfd.readouterr()
expected_results = '| Category | Benchmark | Subtask | Accuracy | Number few shot | Model |\n|:----------------------------|:---------------|:----------|-----------:|:------------------|:---------|\n| language_understanding_lite | lambada_openai | | 0 | 0-shot | tiny_mpt |'
assert expected_results in out
Expand Down
23 changes: 11 additions & 12 deletions tests/scripts/eval/test_eval_inputs.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,29 +2,26 @@
# SPDX-License-Identifier: Apache-2.0
import copy
import os
import sys
import warnings

import omegaconf
import pytest
from omegaconf import DictConfig
from omegaconf import OmegaConf as om

# Add repo root to path so we can import scripts and test it
repo_dir = os.path.abspath(os.path.join(os.path.dirname(__file__), '..'))
sys.path.append(repo_dir)

from scripts.eval.eval import main # noqa: E402


class TestHuggingFaceEvalYAMLInputs:
"""Validate and tests error handling for the input YAML file."""

@pytest.fixture
def cfg(self) -> DictConfig:
def cfg(self, foundry_dir: str) -> DictConfig:
"""Create YAML cfg fixture for testing purposes."""
conf_path: str = os.path.join(repo_dir,
'scripts/eval/yamls/hf_eval.yaml')
conf_path: str = os.path.join(
foundry_dir,
'scripts/eval/yamls/hf_eval.yaml',
)
with open(conf_path, 'r', encoding='utf-8') as config:
test_cfg = om.load(config)
assert isinstance(test_cfg, DictConfig)
Expand Down Expand Up @@ -78,15 +75,17 @@ def test_optional_mispelled_params_raise_warning(self,
class TestMPTEvalYAMLInputs:

@pytest.fixture
def cfg(self) -> DictConfig:
def cfg(self, foundry_dir: str) -> DictConfig:
"""Create YAML cfg fixture for testing purposes."""
conf_path: str = os.path.join(repo_dir,
'scripts/eval/yamls/mpt_eval.yaml')
conf_path: str = os.path.join(
foundry_dir,
'scripts/eval/yamls/mpt_eval.yaml',
)
with open(conf_path, 'r', encoding='utf-8') as config:
test_cfg = om.load(config)

test_cfg.icl_tasks[0].dataset_uri = os.path.join(
repo_dir, 'scripts', test_cfg.icl_tasks[0].dataset_uri)
foundry_dir, 'scripts', test_cfg.icl_tasks[0].dataset_uri)

# make tests use cpu initialized transformer models only
test_cfg.models[0].model.init_device = 'cpu'
Expand Down
1 change: 0 additions & 1 deletion tests/scripts/inference/__init__.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,2 @@
# Copyright 2022 MosaicML LLM Foundry authors
# SPDX-License-Identifier: Apache-2.0

22 changes: 7 additions & 15 deletions tests/scripts/inference/test_convert_composer_to_hf.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,34 +4,26 @@
import math
import os
import pathlib
import sys
from typing import Callable
from unittest.mock import ANY, MagicMock, patch

from composer import Trainer
from composer.loggers import MLFlowLogger
from composer.utils import dist, get_device, using_torch_2

from llmfoundry.callbacks import HuggingFaceCheckpointer
from llmfoundry.models.mpt.modeling_mpt import ComposerMPTCausalLM

# Add repo root to path so we can import scripts and test it
repo_dir = os.path.abspath(os.path.join(os.path.dirname(__file__), '..'))
sys.path.append(repo_dir)
import shutil
from argparse import Namespace
from typing import Optional, cast
from typing import Callable, Optional, cast
from unittest.mock import ANY, MagicMock, patch

import pytest
import torch
import transformers
from composer import Trainer
from composer.loggers import MLFlowLogger
from composer.utils import dist, get_device, using_torch_2
from omegaconf import DictConfig
from omegaconf import OmegaConf as om
from torch.utils.data import DataLoader
from transformers import PreTrainedModel, PreTrainedTokenizerBase

from llmfoundry import COMPOSER_MODEL_REGISTRY
from llmfoundry.callbacks import HuggingFaceCheckpointer
from llmfoundry.data.finetuning import build_finetuning_dataloader
from llmfoundry.models.mpt.modeling_mpt import ComposerMPTCausalLM
from llmfoundry.utils.builders import build_optimizer, build_tokenizer
from scripts.inference.convert_composer_to_hf import convert_composer_to_hf
from tests.data_utils import make_tiny_ft_dataset
Expand Down
1 change: 0 additions & 1 deletion tests/scripts/train/__init__.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,2 @@
# Copyright 2022 MosaicML LLM Foundry authors
# SPDX-License-Identifier: Apache-2.0

12 changes: 0 additions & 12 deletions tests/scripts/train/test_train.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
# Copyright 2022 MosaicML LLM Foundry authors
# SPDX-License-Identifier: Apache-2.0
import copy
import os
import pathlib
from typing import Any, Optional

Expand All @@ -16,17 +15,6 @@
gpt_tiny_cfg)


@pytest.fixture(autouse=False)
def set_correct_cwd():
if not os.getcwd().endswith('llm-foundry/scripts'):
os.chdir('scripts')

yield

if os.getcwd().endswith('llm-foundry/scripts'):
os.chdir('..')


@pytest.mark.parametrize('averages', [{
'core_average': ['language_understanding_lite']
}, None])
Expand Down
Loading

0 comments on commit 49adcac

Please sign in to comment.