Skip to content

Commit

Permalink
support local datasets for dpo
Browse files Browse the repository at this point in the history
Co-authored-by: Agus <[email protected]>
  • Loading branch information
winglian and plaguss committed Jan 23, 2024
1 parent e9b368e commit 9c1c3c8
Showing 1 changed file with 10 additions and 4 deletions.
14 changes: 10 additions & 4 deletions src/axolotl/utils/data.py
Original file line number Diff line number Diff line change
Expand Up @@ -846,10 +846,16 @@ def load_prepare_dpo_datasets(cfg):
def load_split(dataset_cfgs, _cfg):
split_datasets: List[Any] = []
for i, ds_cfg in enumerate(dataset_cfgs):
ds = load_dataset( # pylint: disable=invalid-name
ds_cfg["path"], split=ds_cfg["split"]
)
split_datasets.insert(i, ds)
if ds_cfg["ds_type"] == "json":
for data_file in ds_cfg["data_files"]:
data_files = {ds_cfg["split"]: data_file}
ds = load_dataset("json", data_files=data_files, split=ds_cfg["split"])
split_datasets.insert(i, ds)
else:
ds = load_dataset( # pylint: disable=invalid-name
ds_cfg["path"], split=ds_cfg["split"]
)
split_datasets.insert(i, ds)

for i, data_set in enumerate(split_datasets):
_type = dataset_cfgs[i]["type"]
Expand Down

0 comments on commit 9c1c3c8

Please sign in to comment.