From 52c40f2b948a09ced616737781c57f6aef84d149 Mon Sep 17 00:00:00 2001
From: Daniel King <daniel@mosaicml.com>
Date: Mon, 30 Oct 2023 22:51:16 -0700
Subject: [PATCH] pr nit

---
 llmfoundry/data/finetuning/tasks.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/llmfoundry/data/finetuning/tasks.py b/llmfoundry/data/finetuning/tasks.py
index ed9191a776..42a9c0da02 100644
--- a/llmfoundry/data/finetuning/tasks.py
+++ b/llmfoundry/data/finetuning/tasks.py
@@ -340,17 +340,18 @@ def dataset_mapper(example: Dict):
             return _tokenize_formatted_example(example, tokenizer)
 
         detected_cpu_count = os.cpu_count() or 1
+        num_cpus_to_use = max(1, detected_cpu_count - 4)
 
         columns_to_remove = list(dataset[0].keys())
         tokenized_dataset = dataset.map(
             dataset_mapper,
             batched=False,
             remove_columns=columns_to_remove,
-            num_proc=max(1, detected_cpu_count - 4),
+            num_proc=num_cpus_to_use),
         )
         prompt_length_filtered_dataset = tokenized_dataset.filter(
             lambda example: len(example['input_ids']) < max_seq_len,
-            num_proc=max(1, detected_cpu_count - 4),
+            num_proc=num_cpus_to_use,
         )
 
         examples_removed = len(tokenized_dataset) - len(