diff --git a/llmfoundry/command_utils/data_prep/convert_text_to_mds.py b/llmfoundry/command_utils/data_prep/convert_text_to_mds.py
index 14afe279fd..336c82a5e7 100644
--- a/llmfoundry/command_utils/data_prep/convert_text_to_mds.py
+++ b/llmfoundry/command_utils/data_prep/convert_text_to_mds.py
@@ -394,6 +394,13 @@ def convert_text_to_mds(
         reprocess (bool): Whether to always reprocess the given folder of text files
         trust_remote_code (bool): If true, allows custom code to be executed to load the tokenizer
     """
+    # Load the tokenizer once on the main process so that the files are cached to avoid race conditions
+    # in the Hugging Face load code
+    AutoTokenizer.from_pretrained(
+        tokenizer_name,
+        trust_remote_code=trust_remote_code,
+    )
+
     is_remote_output = is_remote_path(output_folder)
     log.info(f'Output is remote: {is_remote_output}')