From dd15791818fa53ae792de66d3529d94e0dcb83d9 Mon Sep 17 00:00:00 2001 From: Daniel King <43149077+dakinggg@users.noreply.github.com> Date: Mon, 6 Nov 2023 23:19:54 -0800 Subject: [PATCH] Set persistent_workers = False for packing profiling (#718) --- llmfoundry/data/finetuning/dataloader.py | 7 +++++++ llmfoundry/data/packing.py | 1 + 2 files changed, 8 insertions(+) diff --git a/llmfoundry/data/finetuning/dataloader.py b/llmfoundry/data/finetuning/dataloader.py index 6e988ac149..44d6d345f5 100644 --- a/llmfoundry/data/finetuning/dataloader.py +++ b/llmfoundry/data/finetuning/dataloader.py @@ -400,6 +400,13 @@ def _build_collate_fn( packing_ratio = auto_packing_ratio(dataloader_cfg, tokenizer, device_batch_size) + if isinstance(packing_ratio, str): + raise ValueError( + 'dataset.packing_ratio must be a float or "auto", but it was set to ' + + f'{packing_ratio}.') + + log.info(f'Using packing ratio {packing_ratio}') + if packing_ratio == 1.0: return collate_fn, device_batch_size elif packing_ratio < 1.0: diff --git a/llmfoundry/data/packing.py b/llmfoundry/data/packing.py index 1ae9efcce5..45322c9b2f 100644 --- a/llmfoundry/data/packing.py +++ b/llmfoundry/data/packing.py @@ -348,6 +348,7 @@ def profile_packing( dataloader_cfg.drop_last = False dataloader_cfg.num_workers = 0 dataloader_cfg.prefetch_factor = None + dataloader_cfg.persistent_workers = False # Determine the packing_ratio values we'll try packing_ratios, raw_batch_sizes = [], []