Skip to content

Commit

Permalink
whitespace
Browse files Browse the repository at this point in the history
  • Loading branch information
mapmeld committed Mar 30, 2024
1 parent cf1843b commit f95b7e0
Showing 1 changed file with 5 additions and 5 deletions.
10 changes: 5 additions & 5 deletions src/axolotl/utils/data.py
Original file line number Diff line number Diff line change
Expand Up @@ -90,7 +90,7 @@ def prepare_dataset(cfg, tokenizer):
path = cfg.pretraining_dataset[0]["path"]
name = cfg.pretraining_dataset[0]["name"]
if "split" in cfg.pretraining_dataset[0]:
split = cfg.pretraining_dataset[0]["split"]
split = cfg.pretraining_dataset[0]["split"]

ds_wrapper_partial = functools.partial(
get_dataset_wrapper,
Expand Down Expand Up @@ -839,11 +839,11 @@ def wrap_pretraining_dataset(
# this is empty during streaming/pretraining
remove_columns = []
if dataset.features is None:
for first_row in dataset:
remove_columns = first_row.keys()
break
for first_row in dataset:
remove_columns = first_row.keys()
break
else:
remove_columns = dataset.features.keys()
remove_columns = dataset.features.keys()

dataset = dataset.map(
encode,
Expand Down

0 comments on commit f95b7e0

Please sign in to comment.