Skip to content

Commit

Permalink
Remove os.path.join
Browse files Browse the repository at this point in the history
  • Loading branch information
irenedea committed Oct 24, 2023
1 parent e85fc95 commit dba1104
Showing 1 changed file with 7 additions and 8 deletions.
15 changes: 7 additions & 8 deletions tests/fixtures/data.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
# Copyright 2022 MosaicML LLM Foundry authors
# SPDX-License-Identifier: Apache-2.0

import os
from pathlib import Path

from omegaconf import DictConfig
Expand All @@ -14,24 +13,24 @@


@fixture
def tiny_ft_dataset_path(tmp_path: Path, dataset_size: int = 4) -> str:
def tiny_ft_dataset_path(tmp_path: Path, dataset_size: int = 4) -> Path:
"""Creates a tiny dataset and returns the path."""
tiny_dataset_path = os.path.join(tmp_path, 'test-ift-data-small')
os.mkdir(tiny_dataset_path)
tiny_dataset_file = os.path.join(tiny_dataset_path, 'train.jsonl')
make_tiny_ft_dataset(path=tiny_dataset_file, size=dataset_size)
tiny_dataset_path = tmp_path / 'test-ift-data-small'
tiny_dataset_path.mkdir(exist_ok=True)
tiny_dataset_file = tiny_dataset_path / 'train.jsonl'
make_tiny_ft_dataset(path=str(tiny_dataset_file), size=dataset_size)
return tiny_dataset_path


@fixture
def tiny_ft_dataloader(tiny_ft_dataset_path: str,
def tiny_ft_dataloader(tiny_ft_dataset_path: Path,
mpt_tokenizer: PreTrainedTokenizerBase,
max_seq_len: int = 128,
device_batch_size: int = 1) -> DataLoader:
dataloader_cfg = DictConfig({
'name': 'finetuning',
'dataset': {
'hf_name': tiny_ft_dataset_path,
'hf_name': str(tiny_ft_dataset_path),
'split': 'train',
'max_seq_len': max_seq_len,
'decoder_only_format': True,
Expand Down

0 comments on commit dba1104

Please sign in to comment.