diff --git a/src/axolotl/utils/data.py b/src/axolotl/utils/data.py index 49b36202c0..5c41d16fe4 100644 --- a/src/axolotl/utils/data.py +++ b/src/axolotl/utils/data.py @@ -242,7 +242,14 @@ def for_d_in_datasets(dataset_configs): local_path = Path(config_dataset.path) if local_path.exists(): if local_path.is_dir(): - ds = load_from_disk(config_dataset.path) + # TODO dirs with arrow or parquet files could be loaded with `load_from_disk` + ds = load_dataset( + config_dataset.path, + name=config_dataset.name, + data_files=config_dataset.data_files, + streaming=False, + split=None, + ) elif local_path.is_file(): ds_type = get_ds_type(config_dataset)