Skip to content

Commit

Permalink
Support loading datasetes saved via save_to_disk
Browse files Browse the repository at this point in the history
  • Loading branch information
fozziethebeat committed Mar 22, 2024
1 parent 4e69aa4 commit e75de2b
Showing 1 changed file with 11 additions and 8 deletions.
19 changes: 11 additions & 8 deletions src/axolotl/utils/data.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
"""Module containing data utilities"""

import functools
import hashlib
import logging
Expand Down Expand Up @@ -290,14 +291,16 @@ def for_d_in_datasets(dataset_configs):
local_path = Path(config_dataset.path)
if local_path.exists():
if local_path.is_dir():
# TODO dirs with arrow or parquet files could be loaded with `load_from_disk`
ds = load_dataset(
config_dataset.path,
name=config_dataset.name,
data_files=config_dataset.data_files,
streaming=False,
split=None,
)
if config_dataset.data_files:
ds = load_dataset(
config_dataset.path,
name=config_dataset.name,
data_files=config_dataset.data_files,
streaming=False,
split=None,
)
else:
ds = load_from_disk(config_dataset.path)
elif local_path.is_file():
ds_type = get_ds_type(config_dataset)

Expand Down

0 comments on commit e75de2b

Please sign in to comment.