diff --git a/llmfoundry/data/finetuning/tasks.py b/llmfoundry/data/finetuning/tasks.py index 42a9c0da02..edbfcc28c7 100644 --- a/llmfoundry/data/finetuning/tasks.py +++ b/llmfoundry/data/finetuning/tasks.py @@ -347,7 +347,7 @@ def dataset_mapper(example: Dict): dataset_mapper, batched=False, remove_columns=columns_to_remove, - num_proc=num_cpus_to_use), + num_proc=num_cpus_to_use, ) prompt_length_filtered_dataset = tokenized_dataset.filter( lambda example: len(example['input_ids']) < max_seq_len,