From 80cb137af10c62371779f9f263e0768613fa49f4 Mon Sep 17 00:00:00 2001 From: Wing Lian Date: Sun, 21 Jan 2024 06:21:38 -0500 Subject: [PATCH] fixes for smoke tests --- src/axolotl/utils/trainer.py | 6 ++++++ tests/e2e/patched/test_falcon_samplepack.py | 1 + 2 files changed, 7 insertions(+) diff --git a/src/axolotl/utils/trainer.py b/src/axolotl/utils/trainer.py index b8235d3cf8..2dec90eb79 100644 --- a/src/axolotl/utils/trainer.py +++ b/src/axolotl/utils/trainer.py @@ -124,6 +124,12 @@ def process_datasets_for_packing(cfg, train_dataset, eval_dataset, tokenizer): if eval_dataset: eval_dataset = eval_dataset.remove_columns("attention_mask") + if cfg.model_config_type == "falcon": + LOG.info("dropping token_type_ids column") + train_dataset = train_dataset.remove_columns("token_type_ids") + if eval_dataset: + eval_dataset = eval_dataset.remove_columns("token_type_ids") + train_dataset = train_dataset.filter( drop_long, num_proc=cfg.dataset_processes, diff --git a/tests/e2e/patched/test_falcon_samplepack.py b/tests/e2e/patched/test_falcon_samplepack.py index 35f2c390df..ae6a497391 100644 --- a/tests/e2e/patched/test_falcon_samplepack.py +++ b/tests/e2e/patched/test_falcon_samplepack.py @@ -39,6 +39,7 @@ def test_qlora(self, temp_dir): "lora_alpha": 32, "lora_dropout": 0.1, "lora_target_linear": True, + "lora_modules_to_save": ["word_embeddings", "lm_head"], "val_set_size": 0.1, "special_tokens": { "bos_token": "<|endoftext|>",