Skip to content

Commit

Permalink
lint and fix broken code
Browse files Browse the repository at this point in the history
  • Loading branch information
winglian committed Oct 28, 2023
1 parent a898808 commit 8f11779
Show file tree
Hide file tree
Showing 2 changed files with 5 additions and 6 deletions.
1 change: 0 additions & 1 deletion gitbook/README.md
Original file line number Diff line number Diff line change
@@ -1,2 +1 @@
# Page

10 changes: 5 additions & 5 deletions src/axolotl/utils/data.py
Original file line number Diff line number Diff line change
Expand Up @@ -251,10 +251,10 @@ def for_d_in_datasets(dataset_configs):
raise ValueError("unhandled dataset load")
# support for using a subset of the data
if config_dataset.shards:
shard_split = d.split if d.split else split
shard_split = config_dataset.split if config_dataset.split else split
if shard_split in ds:
ds = ds.shuffle(seed=seed)[shard_split].shard(
num_shards=d.shards, index=0
num_shards=config_dataset.shards, index=0
)
else:
ds = ds.shuffle(seed=seed).shard(
Expand All @@ -268,13 +268,13 @@ def for_d_in_datasets(dataset_configs):
d_base_type = d_type_split[0]
d_prompt_style = d_type_split[1] if len(d_type_split) > 1 else None

if d.split and d.split in ds:
ds = ds[d.split]
if config_dataset.split and config_dataset.split in ds:
ds = ds[config_dataset.split]
elif split in ds:
ds = ds[split]
elif isinstance(ds, DatasetDict):
raise ValueError(
f"no {split} split found for dataset {d.path}, you may specify a split with 'split: ...`"
f"no {split} split found for dataset {config_dataset.path}, you may specify a split with 'split: ...`"
)

dataset_wrapper, dataset_prompter = get_dataset_wrapper(
Expand Down

0 comments on commit 8f11779

Please sign in to comment.