diff --git a/src/axolotl/utils/data.py b/src/axolotl/utils/data.py index 52002a7c35..2af85831ad 100644 --- a/src/axolotl/utils/data.py +++ b/src/axolotl/utils/data.py @@ -99,7 +99,12 @@ def load_tokenized_prepared_datasets( str(cfg.sequence_len) + "@" + "|".join( - sorted([f"{d.path}:{d.type}:{d.shards}" for d in cfg.datasets]) + sorted( + [ + f"{d.path}:{d.type}:{d.shards}:{d.conversation}" + for d in cfg.datasets + ] + ) ) + "|" + tokenizer_name