Skip to content

Commit

Permalink
Merge branch 'main' into chuck/aws-docker
Browse files Browse the repository at this point in the history
  • Loading branch information
j316chuck authored Nov 10, 2023
2 parents 6379634 + c3f0cf9 commit be31d92
Showing 1 changed file with 13 additions and 9 deletions.
22 changes: 13 additions & 9 deletions llmfoundry/utils/builders.py
Original file line number Diff line number Diff line change
Expand Up @@ -190,9 +190,11 @@ def build_tokenizer(

signal_file_path = f'.node_{dist.get_node_rank()}_local_rank0_completed_tokenizer_setup'

# Make sure the tokenizer files are downloaded and cached first by local rank 0
with dist.local_rank_zero_download_and_wait(signal_file_path):
pass
if dist.is_available() and dist.is_initialized(
) and dist.get_world_size() > 1:
# Make sure the tokenizer files are downloaded and cached first by local rank 0
with dist.local_rank_zero_download_and_wait(signal_file_path):
pass

if tokenizer_name.startswith('tiktoken'):
tokenizer = TiktokenTokenizerWrapper(**tokenizer_kwargs)
Expand All @@ -208,14 +210,16 @@ def build_tokenizer(
int(1e30),
)

if dist.get_local_rank() == 0:
with open(signal_file_path, 'wb') as f:
f.write(b'local_rank0_completed_tokenizer_setup')
if dist.is_available() and dist.is_initialized(
) and dist.get_world_size() > 1:
if dist.get_local_rank() == 0:
with open(signal_file_path, 'wb') as f:
f.write(b'local_rank0_completed_tokenizer_setup')

dist.barrier()
dist.barrier()

if dist.get_local_rank() == 0:
os.remove(signal_file_path)
if dist.get_local_rank() == 0:
os.remove(signal_file_path)

return tokenizer

Expand Down

0 comments on commit be31d92

Please sign in to comment.