From dba1104161436f58af4edfcd463ac5620db51a47 Mon Sep 17 00:00:00 2001
From: Irene Dea <deaairene@gmail.com>
Date: Tue, 24 Oct 2023 02:16:08 +0000
Subject: [PATCH] Remove os.path.join

---
 tests/fixtures/data.py | 15 +++++++--------
 1 file changed, 7 insertions(+), 8 deletions(-)

diff --git a/tests/fixtures/data.py b/tests/fixtures/data.py
index 393bd420bd..c44c01ef95 100644
--- a/tests/fixtures/data.py
+++ b/tests/fixtures/data.py
@@ -1,7 +1,6 @@
 # Copyright 2022 MosaicML LLM Foundry authors
 # SPDX-License-Identifier: Apache-2.0
 
-import os
 from pathlib import Path
 
 from omegaconf import DictConfig
@@ -14,24 +13,24 @@
 
 
 @fixture
-def tiny_ft_dataset_path(tmp_path: Path, dataset_size: int = 4) -> str:
+def tiny_ft_dataset_path(tmp_path: Path, dataset_size: int = 4) -> Path:
     """Creates a tiny dataset and returns the path."""
-    tiny_dataset_path = os.path.join(tmp_path, 'test-ift-data-small')
-    os.mkdir(tiny_dataset_path)
-    tiny_dataset_file = os.path.join(tiny_dataset_path, 'train.jsonl')
-    make_tiny_ft_dataset(path=tiny_dataset_file, size=dataset_size)
+    tiny_dataset_path = tmp_path / 'test-ift-data-small'
+    tiny_dataset_path.mkdir(exist_ok=True)
+    tiny_dataset_file = tiny_dataset_path / 'train.jsonl'
+    make_tiny_ft_dataset(path=str(tiny_dataset_file), size=dataset_size)
     return tiny_dataset_path
 
 
 @fixture
-def tiny_ft_dataloader(tiny_ft_dataset_path: str,
+def tiny_ft_dataloader(tiny_ft_dataset_path: Path,
                        mpt_tokenizer: PreTrainedTokenizerBase,
                        max_seq_len: int = 128,
                        device_batch_size: int = 1) -> DataLoader:
     dataloader_cfg = DictConfig({
         'name': 'finetuning',
         'dataset': {
-            'hf_name': tiny_ft_dataset_path,
+            'hf_name': str(tiny_ft_dataset_path),
             'split': 'train',
             'max_seq_len': max_seq_len,
             'decoder_only_format': True,