From 2ab77ab838c01e28810524ce1482463547b1a800 Mon Sep 17 00:00:00 2001 From: Charles Goddard Date: Sat, 30 Nov 2024 16:10:58 -0800 Subject: [PATCH] Fix bug in setting output vocab size in tokensurgeon --- mergekit/scripts/tokensurgeon.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mergekit/scripts/tokensurgeon.py b/mergekit/scripts/tokensurgeon.py index a6715643..d5680283 100644 --- a/mergekit/scripts/tokensurgeon.py +++ b/mergekit/scripts/tokensurgeon.py @@ -190,7 +190,7 @@ def main( tokenizer.save_pretrained(out_path) cfg_out = arch_info.config try: - cfg_out.vocab_size = tokenizer.vocab_size + cfg_out.vocab_size = new_embed.shape[0] except AttributeError: LOG.error( "Could not set vocab size in config.json - you may need to update it manually."