fix relative paths

mosaicml · Nov 30, 2023 · 49adcac · 49adcac
1 parent 2cef8cc
commit 49adcac
Show file tree

Hide file tree

Showing 28 changed files with 54 additions and 119 deletions.
diff --git a/scripts/eval/yamls/test_eval.yaml b/scripts/eval/yamls/test_eval.yaml
@@ -25,7 +25,7 @@ device_eval_batch_size: 4
 icl_subset_num_batches: 1
 icl_tasks:
 - label: lambada_openai
-  dataset_uri: eval/local_data/language_understanding/lambada_openai.jsonl
+  dataset_uri: scripts/eval/local_data/language_understanding/lambada_openai.jsonl
   num_fewshot: [0]
   icl_task_type: language_modeling
 eval_gauntlet:

diff --git a/tests/callbacks/__init__.py b/tests/callbacks/__init__.py
@@ -1,3 +1,2 @@
 # Copyright 2022 MosaicML LLM Foundry authors
 # SPDX-License-Identifier: Apache-2.0
-
diff --git a/tests/data/test_dataloader.py b/tests/data/test_dataloader.py
@@ -5,7 +5,6 @@
 import pathlib
 import random
 import shutil
-import sys
 import tempfile
 from argparse import Namespace
 from typing import Literal, Optional, Union
@@ -26,10 +25,6 @@
                                        build_text_dataloader,
                                        get_tokens_per_batch_func)
 from llmfoundry.utils.builders import build_tokenizer
-
-# Add repo root to path so we can import scripts and test it
-repo_dir = os.path.abspath(os.path.join(os.path.dirname(__file__), '..'))
-sys.path.append(repo_dir)
 from scripts.data_prep.convert_dataset_hf import main as main_hf
 from tests.data_utils import make_tiny_ft_dataset
 

diff --git a/tests/data/test_icl_datasets.py b/tests/data/test_icl_datasets.py
@@ -10,7 +10,7 @@
 from llmfoundry.utils.builders import build_icl_evaluators
 
 
-def load_icl_config(conf_path: str = 'tests/test_tasks.yaml'):
+def load_icl_config(conf_path: str = 'tests/data/test_tasks.yaml'):
     with open(conf_path) as f:
         test_cfg = om.load(f)
     return test_cfg

diff --git a/tests/test_tasks.yaml → tests/data/test_tasks.yaml b/tests/test_tasks.yaml → tests/data/test_tasks.yaml
diff --git a/tests/data_utils.py b/tests/data_utils.py
@@ -1,14 +1,8 @@
 # Copyright 2022 MosaicML LLM Foundry authors
 # SPDX-License-Identifier: Apache-2.0
 
-import os
-import sys
-
-# Add repo root to path so we can import scripts and test it
-repo_dir = os.path.abspath(os.path.join(os.path.dirname(__file__), '..'))
-sys.path.append(repo_dir)
-
 import json
+import os
 import pathlib
 import shutil
 from argparse import Namespace
@@ -139,8 +133,11 @@ def create_arxiv_dataset(path: pathlib.Path) -> str:
 
 def gpt_tiny_cfg(dataset_name: str, device: str):
     """Create gpt tiny cfg."""
-    conf_path: str = os.path.join(repo_dir,
-                                  'scripts/train/yamls/pretrain/testing.yaml')
+    from tests.fixtures.autouse import REPO_DIR
+    conf_path: str = os.path.join(
+        REPO_DIR,
+        'scripts/train/yamls/pretrain/testing.yaml',
+    )
     with open(conf_path) as f:
         test_cfg = om.load(f)
     assert isinstance(test_cfg, DictConfig)

diff --git a/tests/fixtures/autouse.py b/tests/fixtures/autouse.py
@@ -2,11 +2,17 @@
 # SPDX-License-Identifier: Apache-2.0
 
 import gc
+import os
+import sys
 
 import pytest
 import torch
 from composer.utils import dist, get_device, reproducibility
 
+# Add llm-foundry repo root to path so we can import scripts in the tests
+REPO_DIR = os.path.abspath(os.path.join(os.path.dirname(__file__), '../..'))
+sys.path.append(REPO_DIR)
+
 
 @pytest.fixture(autouse=True)
 def initialize_dist(request: pytest.FixtureRequest):
@@ -33,6 +39,11 @@ def random_seed() -> int:
     return 17
 
 
+@pytest.fixture
+def foundry_dir() -> str:
+    return REPO_DIR
+
+
 @pytest.fixture(autouse=True)
 def seed_all(random_seed: int):
     """Sets the seed for reproducibility."""

diff --git a/tests/models/__init__.py b/tests/models/__init__.py
@@ -1,3 +1,2 @@
 # Copyright 2022 MosaicML LLM Foundry authors
 # SPDX-License-Identifier: Apache-2.0
-
diff --git a/tests/models/hf/__init__.py b/tests/models/hf/__init__.py
@@ -1,3 +1,2 @@
 # Copyright 2022 MosaicML LLM Foundry authors
 # SPDX-License-Identifier: Apache-2.0
-
diff --git a/tests/models/inference_api_wrapper/__init__.py b/tests/models/inference_api_wrapper/__init__.py
@@ -1,3 +1,2 @@
 # Copyright 2022 MosaicML LLM Foundry authors
 # SPDX-License-Identifier: Apache-2.0
-
diff --git a/tests/models/layers/__init__.py b/tests/models/layers/__init__.py
@@ -1,3 +1,2 @@
 # Copyright 2022 MosaicML LLM Foundry authors
 # SPDX-License-Identifier: Apache-2.0
-
diff --git a/tests/models/utils/__init__.py b/tests/models/utils/__init__.py
@@ -1,3 +1,2 @@
 # Copyright 2022 MosaicML LLM Foundry authors
 # SPDX-License-Identifier: Apache-2.0
-
diff --git a/tests/optim/__init__.py b/tests/optim/__init__.py
@@ -1,3 +1,2 @@
 # Copyright 2022 MosaicML LLM Foundry authors
 # SPDX-License-Identifier: Apache-2.0
-
diff --git a/tests/scripts/__init__.py b/tests/scripts/__init__.py
@@ -1,3 +1,2 @@
 # Copyright 2022 MosaicML LLM Foundry authors
 # SPDX-License-Identifier: Apache-2.0
-
diff --git a/tests/scripts/data_prep/__init__.py b/tests/scripts/data_prep/__init__.py
@@ -1,3 +1,2 @@
 # Copyright 2022 MosaicML LLM Foundry authors
 # SPDX-License-Identifier: Apache-2.0
-
diff --git a/tests/scripts/data_prep/test_convert_dataset_hf.py b/tests/scripts/data_prep/test_convert_dataset_hf.py
@@ -2,13 +2,9 @@
 # SPDX-License-Identifier: Apache-2.0
 
 import os
-import sys
 from argparse import Namespace
 from pathlib import Path
 
-# Add repo root to path so we can import scripts and test it
-repo_dir = os.path.abspath(os.path.join(os.path.dirname(__file__), '..'))
-sys.path.append(repo_dir)
 from scripts.data_prep.convert_dataset_hf import main as main_hf
 
 

diff --git a/tests/scripts/data_prep/test_convert_dataset_json.py b/tests/scripts/data_prep/test_convert_dataset_json.py
@@ -2,14 +2,9 @@
 # SPDX-License-Identifier: Apache-2.0
 
 import os
-import sys
 from argparse import Namespace
 from pathlib import Path
 
-# Add repo root to path so we can import scripts and test it
-repo_dir = os.path.abspath(os.path.join(os.path.dirname(__file__), '..'))
-sys.path.append(repo_dir)
-
 from scripts.data_prep.convert_dataset_json import main as main_json
 
 

diff --git a/tests/scripts/data_prep/test_convert_text_to_mds.py b/tests/scripts/data_prep/test_convert_text_to_mds.py
@@ -2,20 +2,14 @@
 # SPDX-License-Identifier: Apache-2.0
 
 import os
-import sys
-
-import pytest
-
-# Add repo root to path so we can import scripts and test it
-repo_dir = os.path.abspath(os.path.join(os.path.dirname(__file__), '..'))
-sys.path.append(repo_dir)
 import pathlib
 from concurrent.futures import ProcessPoolExecutor
 from glob import glob
 from typing import Callable, Iterable, List
 from unittest.mock import Mock, patch
 
 import numpy as np
+import pytest
 from streaming import StreamingDataset
 from transformers import AutoTokenizer
 

diff --git a/tests/scripts/eval/__init__.py b/tests/scripts/eval/__init__.py
@@ -1,3 +1,2 @@
 # Copyright 2022 MosaicML LLM Foundry authors
 # SPDX-License-Identifier: Apache-2.0
-
diff --git a/tests/scripts/eval/test_eval.py b/tests/scripts/eval/test_eval.py
@@ -4,8 +4,7 @@
 import copy
 import os
 import pathlib
-import sys
-from typing import Any
+from typing import Any, Union
 
 import omegaconf as om
 import pytest
@@ -14,32 +13,21 @@
 
 from llmfoundry import COMPOSER_MODEL_REGISTRY
 from llmfoundry.utils import build_tokenizer
+from scripts.eval.eval import main  # noqa: E402
 from tests.data_utils import (create_arxiv_dataset, create_c4_dataset_xxsmall,
                               gpt_tiny_cfg)
 
-# Add repo root to path so we can import scripts and test it
-repo_dir = os.path.abspath(os.path.join(os.path.dirname(__file__), '..'))
-sys.path.append(repo_dir)
-
-from scripts.eval.eval import main  # noqa: E402
-
 
-@pytest.fixture(autouse=True)
-def set_correct_cwd():
-    if not os.getcwd().endswith('llm-foundry/scripts'):
-        os.chdir('scripts')
-
-    yield
-
-    if os.getcwd().endswith('llm-foundry/scripts'):
-        os.chdir('..')
+@pytest.fixture
+def eval_cfg(foundry_dir: str) -> Union[om.ListConfig, om.DictConfig]:
+    yaml_path = os.path.join(foundry_dir, 'scripts/eval/yamls/test_eval.yaml')
+    with open(yaml_path, 'r', encoding='utf-8') as f:
+        eval_cfg = om.OmegaConf.load(f)
+    return eval_cfg
 
 
 @pytest.fixture()
-def mock_saved_model_path():
-    # load the eval and model config
-    with open('eval/yamls/test_eval.yaml', 'r', encoding='utf-8') as f:
-        eval_cfg = om.OmegaConf.load(f)
+def mock_saved_model_path(eval_cfg: Union[om.ListConfig, om.DictConfig]):
     model_cfg = eval_cfg.models[0]
     # set device to cpu
     device = 'cpu'
@@ -60,12 +48,11 @@ def mock_saved_model_path():
     os.remove(saved_model_path)
 
 
-def test_icl_eval(capfd: Any, mock_saved_model_path: Any):
-    with open('eval/yamls/test_eval.yaml', 'r', encoding='utf-8') as f:
-        test_cfg = om.OmegaConf.load(f)
-    test_cfg.models[0].load_path = mock_saved_model_path
-    assert isinstance(test_cfg, om.DictConfig)
-    main(test_cfg)
+def test_icl_eval(eval_cfg: Union[om.ListConfig, om.DictConfig], capfd: Any,
+                  mock_saved_model_path: Any):
+    eval_cfg.models[0].load_path = mock_saved_model_path
+    assert isinstance(eval_cfg, om.DictConfig)
+    main(eval_cfg)
     out, _ = capfd.readouterr()
     expected_results = '| Category                    | Benchmark      | Subtask   |   Accuracy | Number few shot   | Model    |\n|:----------------------------|:---------------|:----------|-----------:|:------------------|:---------|\n| language_understanding_lite | lambada_openai |           |          0 | 0-shot            | tiny_mpt |'
     assert expected_results in out

diff --git a/tests/scripts/eval/test_eval_inputs.py b/tests/scripts/eval/test_eval_inputs.py
@@ -2,29 +2,26 @@
 # SPDX-License-Identifier: Apache-2.0
 import copy
 import os
-import sys
 import warnings
 
 import omegaconf
 import pytest
 from omegaconf import DictConfig
 from omegaconf import OmegaConf as om
 
-# Add repo root to path so we can import scripts and test it
-repo_dir = os.path.abspath(os.path.join(os.path.dirname(__file__), '..'))
-sys.path.append(repo_dir)
-
 from scripts.eval.eval import main  # noqa: E402
 
 
 class TestHuggingFaceEvalYAMLInputs:
     """Validate and tests error handling for the input YAML file."""
 
     @pytest.fixture
-    def cfg(self) -> DictConfig:
+    def cfg(self, foundry_dir: str) -> DictConfig:
         """Create YAML cfg fixture for testing purposes."""
-        conf_path: str = os.path.join(repo_dir,
-                                      'scripts/eval/yamls/hf_eval.yaml')
+        conf_path: str = os.path.join(
+            foundry_dir,
+            'scripts/eval/yamls/hf_eval.yaml',
+        )
         with open(conf_path, 'r', encoding='utf-8') as config:
             test_cfg = om.load(config)
         assert isinstance(test_cfg, DictConfig)
@@ -78,15 +75,17 @@ def test_optional_mispelled_params_raise_warning(self,
 class TestMPTEvalYAMLInputs:
 
     @pytest.fixture
-    def cfg(self) -> DictConfig:
+    def cfg(self, foundry_dir: str) -> DictConfig:
         """Create YAML cfg fixture for testing purposes."""
-        conf_path: str = os.path.join(repo_dir,
-                                      'scripts/eval/yamls/mpt_eval.yaml')
+        conf_path: str = os.path.join(
+            foundry_dir,
+            'scripts/eval/yamls/mpt_eval.yaml',
+        )
         with open(conf_path, 'r', encoding='utf-8') as config:
             test_cfg = om.load(config)
 
         test_cfg.icl_tasks[0].dataset_uri = os.path.join(
-            repo_dir, 'scripts', test_cfg.icl_tasks[0].dataset_uri)
+            foundry_dir, 'scripts', test_cfg.icl_tasks[0].dataset_uri)
 
         # make tests use cpu initialized transformer models only
         test_cfg.models[0].model.init_device = 'cpu'

diff --git a/tests/scripts/inference/__init__.py b/tests/scripts/inference/__init__.py
@@ -1,3 +1,2 @@
 # Copyright 2022 MosaicML LLM Foundry authors
 # SPDX-License-Identifier: Apache-2.0
-
diff --git a/tests/scripts/inference/test_convert_composer_to_hf.py b/tests/scripts/inference/test_convert_composer_to_hf.py
@@ -4,34 +4,26 @@
 import math
 import os
 import pathlib
-import sys
-from typing import Callable
-from unittest.mock import ANY, MagicMock, patch
-
-from composer import Trainer
-from composer.loggers import MLFlowLogger
-from composer.utils import dist, get_device, using_torch_2
-
-from llmfoundry.callbacks import HuggingFaceCheckpointer
-from llmfoundry.models.mpt.modeling_mpt import ComposerMPTCausalLM
-
-# Add repo root to path so we can import scripts and test it
-repo_dir = os.path.abspath(os.path.join(os.path.dirname(__file__), '..'))
-sys.path.append(repo_dir)
 import shutil
 from argparse import Namespace
-from typing import Optional, cast
+from typing import Callable, Optional, cast
+from unittest.mock import ANY, MagicMock, patch
 
 import pytest
 import torch
 import transformers
+from composer import Trainer
+from composer.loggers import MLFlowLogger
+from composer.utils import dist, get_device, using_torch_2
 from omegaconf import DictConfig
 from omegaconf import OmegaConf as om
 from torch.utils.data import DataLoader
 from transformers import PreTrainedModel, PreTrainedTokenizerBase
 
 from llmfoundry import COMPOSER_MODEL_REGISTRY
+from llmfoundry.callbacks import HuggingFaceCheckpointer
 from llmfoundry.data.finetuning import build_finetuning_dataloader
+from llmfoundry.models.mpt.modeling_mpt import ComposerMPTCausalLM
 from llmfoundry.utils.builders import build_optimizer, build_tokenizer
 from scripts.inference.convert_composer_to_hf import convert_composer_to_hf
 from tests.data_utils import make_tiny_ft_dataset

diff --git a/tests/scripts/train/__init__.py b/tests/scripts/train/__init__.py
@@ -1,3 +1,2 @@
 # Copyright 2022 MosaicML LLM Foundry authors
 # SPDX-License-Identifier: Apache-2.0
-
diff --git a/tests/scripts/train/test_train.py b/tests/scripts/train/test_train.py
@@ -1,7 +1,6 @@
 # Copyright 2022 MosaicML LLM Foundry authors
 # SPDX-License-Identifier: Apache-2.0
 import copy
-import os
 import pathlib
 from typing import Any, Optional
 
@@ -16,17 +15,6 @@
                               gpt_tiny_cfg)
 
 
-@pytest.fixture(autouse=False)
-def set_correct_cwd():
-    if not os.getcwd().endswith('llm-foundry/scripts'):
-        os.chdir('scripts')
-
-    yield
-
-    if os.getcwd().endswith('llm-foundry/scripts'):
-        os.chdir('..')
-
-
 @pytest.mark.parametrize('averages', [{
     'core_average': ['language_understanding_lite']
 }, None])
Original file line number	Diff line number	Diff line change
		@@ -1,3 +1,2 @@
		# Copyright 2022 MosaicML LLM Foundry authors
		# SPDX-License-Identifier: Apache-2.0