From 334f02cd017ba5ac30aef33feb93b5a4eedb8f29 Mon Sep 17 00:00:00 2001 From: Wing Lian Date: Fri, 19 Jan 2024 23:32:24 -0500 Subject: [PATCH] warn about not pre-processing --- src/axolotl/utils/trainer.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/axolotl/utils/trainer.py b/src/axolotl/utils/trainer.py index 9a9eeab4b9..6d2f08c8ec 100644 --- a/src/axolotl/utils/trainer.py +++ b/src/axolotl/utils/trainer.py @@ -107,6 +107,10 @@ def drop_long_seq(sample, sequence_len=2048): def process_datasets_for_packing(cfg, train_dataset, eval_dataset, tokenizer): + if cfg.is_preprocess: + LOG.warning( + "Processing datasets during training can lead to VRAM instability. Please pre-process your dataset" + ) drop_long = partial(drop_long_seq, sequence_len=cfg.sequence_len) with zero_first(is_main_process()): if cfg.group_by_length: