diff --git a/README.md b/README.md index 422185ed6a..80c0fedffc 100644 --- a/README.md +++ b/README.md @@ -607,6 +607,17 @@ datasets: # For `completion` datsets only, uses the provided field instead of `text` column field: +# A list of one or more datasets to eval the model with. +# You can use either test_datasets, or val_set_size, but not both. +test_datasets: + - path: /workspace/data/eval.jsonl + ds_type: json + # You need to specify a split. For "json" datasets the default split is called "train". + split: train + type: completion + data_files: + - /workspace/data/eval.jsonl + # use RL training: dpo, ipo, kto_pair rl: diff --git a/src/axolotl/core/trainer_builder.py b/src/axolotl/core/trainer_builder.py index 63f9272acc..96054dc50f 100644 --- a/src/axolotl/core/trainer_builder.py +++ b/src/axolotl/core/trainer_builder.py @@ -735,7 +735,7 @@ def build(self, total_num_steps): elif self.cfg.sample_packing and self.cfg.eval_sample_packing is False: training_arguments_kwargs["dataloader_drop_last"] = True - if self.cfg.val_set_size == 0: + if not self.cfg.test_datasets and self.cfg.val_set_size == 0: # no eval set, so don't eval training_arguments_kwargs["evaluation_strategy"] = "no" elif self.cfg.eval_steps: @@ -822,6 +822,7 @@ def build(self, total_num_steps): self.cfg.load_best_model_at_end is not False or self.cfg.early_stopping_patience ) + and not self.cfg.test_datasets and self.cfg.val_set_size > 0 and self.cfg.save_steps and self.cfg.eval_steps diff --git a/src/axolotl/utils/data.py b/src/axolotl/utils/data.py index d9a590bc3c..39766868ea 100644 --- a/src/axolotl/utils/data.py +++ b/src/axolotl/utils/data.py @@ -440,7 +440,7 @@ def load_prepare_datasets( split="train", ) -> Tuple[Dataset, Dataset, List[Prompter]]: dataset, prompters = load_tokenized_prepared_datasets( - tokenizer, cfg, default_dataset_prepared_path + tokenizer, cfg, default_dataset_prepared_path, split=split ) if cfg.dataset_shard_num and cfg.dataset_shard_idx is not None: