Skip to content

Commit

Permalink
Use tempdir for finetuning dataset download
Browse files Browse the repository at this point in the history
  • Loading branch information
irenedea committed Oct 22, 2024
1 parent 6448e4e commit abc21f0
Showing 1 changed file with 2 additions and 9 deletions.
11 changes: 2 additions & 9 deletions llmfoundry/data/finetuning/tasks.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@ def preprocessing_fn(example: Dict) -> Dict[str, str]:
import importlib
import logging
import os
import tempfile
import warnings
from collections.abc import Mapping
from functools import partial
Expand Down Expand Up @@ -107,15 +108,7 @@ def preprocessing_fn(example: Dict) -> Dict[str, str]:
_ALLOWED_CONTENT_KEYS = {'content'}
_ALLOWED_ROLES = {'user', 'assistant', 'system', 'tool'}
_ALLOWED_LAST_MESSAGE_ROLES = {'assistant'}
DOWNLOADED_FT_DATASETS_DIRPATH = os.path.abspath(
os.path.join(
os.path.realpath(__file__),
os.pardir,
os.pardir,
os.pardir,
'.downloaded_finetuning',
),
)
DOWNLOADED_FT_DATASETS_DIRPATH = tempfile.mkdtemp()
SUPPORTED_EXTENSIONS = ['.csv', '.json', '.jsonl', '.parquet']
HUGGINGFACE_FOLDER_EXTENSIONS = ['.lock', '.metadata']
DEFAULT_TARGET_RESPONSES = 'last'
Expand Down

0 comments on commit abc21f0

Please sign in to comment.