From c56b450cf501c2d985a3ecd9b5814baa4f456423 Mon Sep 17 00:00:00 2001 From: Wing Lian Date: Wed, 30 Aug 2023 06:55:26 -0700 Subject: [PATCH] drop empty tokenized rows too (#509) --- src/axolotl/utils/trainer.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/axolotl/utils/trainer.py b/src/axolotl/utils/trainer.py index 37578908e4..0aceee5190 100644 --- a/src/axolotl/utils/trainer.py +++ b/src/axolotl/utils/trainer.py @@ -361,7 +361,7 @@ def add_position_ids(sample): def drop_long_seq(sample, sequence_len=2048): - return len(sample["input_ids"]) <= sequence_len + return len(sample["input_ids"]) <= sequence_len and len(sample["input_ids"]) > 0 @contextmanager