From dba1104161436f58af4edfcd463ac5620db51a47 Mon Sep 17 00:00:00 2001 From: Irene Dea Date: Tue, 24 Oct 2023 02:16:08 +0000 Subject: [PATCH] Remove os.path.join --- tests/fixtures/data.py | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) diff --git a/tests/fixtures/data.py b/tests/fixtures/data.py index 393bd420bd..c44c01ef95 100644 --- a/tests/fixtures/data.py +++ b/tests/fixtures/data.py @@ -1,7 +1,6 @@ # Copyright 2022 MosaicML LLM Foundry authors # SPDX-License-Identifier: Apache-2.0 -import os from pathlib import Path from omegaconf import DictConfig @@ -14,24 +13,24 @@ @fixture -def tiny_ft_dataset_path(tmp_path: Path, dataset_size: int = 4) -> str: +def tiny_ft_dataset_path(tmp_path: Path, dataset_size: int = 4) -> Path: """Creates a tiny dataset and returns the path.""" - tiny_dataset_path = os.path.join(tmp_path, 'test-ift-data-small') - os.mkdir(tiny_dataset_path) - tiny_dataset_file = os.path.join(tiny_dataset_path, 'train.jsonl') - make_tiny_ft_dataset(path=tiny_dataset_file, size=dataset_size) + tiny_dataset_path = tmp_path / 'test-ift-data-small' + tiny_dataset_path.mkdir(exist_ok=True) + tiny_dataset_file = tiny_dataset_path / 'train.jsonl' + make_tiny_ft_dataset(path=str(tiny_dataset_file), size=dataset_size) return tiny_dataset_path @fixture -def tiny_ft_dataloader(tiny_ft_dataset_path: str, +def tiny_ft_dataloader(tiny_ft_dataset_path: Path, mpt_tokenizer: PreTrainedTokenizerBase, max_seq_len: int = 128, device_batch_size: int = 1) -> DataLoader: dataloader_cfg = DictConfig({ 'name': 'finetuning', 'dataset': { - 'hf_name': tiny_ft_dataset_path, + 'hf_name': str(tiny_ft_dataset_path), 'split': 'train', 'max_seq_len': max_seq_len, 'decoder_only_format': True,