From f95b7e0b9a8473a2cb8628980ee5ce3f01af789c Mon Sep 17 00:00:00 2001 From: Nick Doiron Date: Sat, 30 Mar 2024 10:37:27 -0500 Subject: [PATCH] whitespace --- src/axolotl/utils/data.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/axolotl/utils/data.py b/src/axolotl/utils/data.py index b039779c24..6cc27fbdbd 100644 --- a/src/axolotl/utils/data.py +++ b/src/axolotl/utils/data.py @@ -90,7 +90,7 @@ def prepare_dataset(cfg, tokenizer): path = cfg.pretraining_dataset[0]["path"] name = cfg.pretraining_dataset[0]["name"] if "split" in cfg.pretraining_dataset[0]: - split = cfg.pretraining_dataset[0]["split"] + split = cfg.pretraining_dataset[0]["split"] ds_wrapper_partial = functools.partial( get_dataset_wrapper, @@ -839,11 +839,11 @@ def wrap_pretraining_dataset( # this is empty during streaming/pretraining remove_columns = [] if dataset.features is None: - for first_row in dataset: - remove_columns = first_row.keys() - break + for first_row in dataset: + remove_columns = first_row.keys() + break else: - remove_columns = dataset.features.keys() + remove_columns = dataset.features.keys() dataset = dataset.map( encode,