From ff3d9018027ad8f8e47f4f0c246364f8887b520d Mon Sep 17 00:00:00 2001 From: janEbert Date: Thu, 5 Dec 2024 16:03:16 +0000 Subject: [PATCH] Expose `DistributedSampler` RNG seed argument (#1677) Co-authored-by: Mihir Patel Co-authored-by: Saaketh Narayan --- llmfoundry/data/finetuning/dataloader.py | 1 + 1 file changed, 1 insertion(+) diff --git a/llmfoundry/data/finetuning/dataloader.py b/llmfoundry/data/finetuning/dataloader.py index 661729ff8a..fce694f160 100644 --- a/llmfoundry/data/finetuning/dataloader.py +++ b/llmfoundry/data/finetuning/dataloader.py @@ -336,6 +336,7 @@ def build_finetuning_dataloader( replication_factor if replication_factor > 1 else None, rank=dist.get_global_rank() // replication_factor if replication_factor > 1 else None, + seed=dataset_cfg.get('shuffle_seed', 0), ) assert streaming_dataset is not None # for pyright