From 10b221f440fa474915b88e767685db41dc757f05 Mon Sep 17 00:00:00 2001 From: Irene Dea Date: Mon, 18 Sep 2023 14:46:49 -0700 Subject: [PATCH] Add default processes in text to mds conversion (#608) --- scripts/data_prep/convert_text_to_mds.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/scripts/data_prep/convert_text_to_mds.py b/scripts/data_prep/convert_text_to_mds.py index 5e37da639a..d28c38a704 100644 --- a/scripts/data_prep/convert_text_to_mds.py +++ b/scripts/data_prep/convert_text_to_mds.py @@ -10,6 +10,7 @@ from glob import glob from typing import Iterable, List, Tuple, cast +import psutil from composer.utils import (ObjectStore, maybe_create_object_store_from_uri, parse_uri) from streaming import MDSWriter @@ -87,7 +88,7 @@ def parse_args() -> Namespace: '--processes', type=int, required=False, - default=1, + default=min(max(psutil.cpu_count() - 2, 1), 32), help= 'The number of processes to use to download and convert the dataset', )