diff --git a/src/axolotl/utils/data.py b/src/axolotl/utils/data.py index eeac15d302..2713796774 100644 --- a/src/axolotl/utils/data.py +++ b/src/axolotl/utils/data.py @@ -205,11 +205,26 @@ def for_d_in_datasets(dataset_configs): use_auth_token=use_auth_token, ) else: - fp = hf_hub_download( - repo_id=d.path, - repo_type="dataset", - filename=d.data_files, - ) + if isinstance(d.data_files, str): + fp = hf_hub_download( + repo_id=d.path, + repo_type="dataset", + filename=d.data_files, + ) + elif isinstance(d.data_files, list): + fp = [] + for file in d.data_files: + fp.append( + hf_hub_download( + repo_id=d.path, + repo_type="dataset", + filename=file, + ) + ) + else: + raise ValueError( + "data_files must be either a string or list of strings" + ) ds = load_dataset( "json", name=d.name, data_files=fp, streaming=False, split=None )