diff --git a/mergekit/scripts/tokensurgeon.py b/mergekit/scripts/tokensurgeon.py index a6715643..d5680283 100644 --- a/mergekit/scripts/tokensurgeon.py +++ b/mergekit/scripts/tokensurgeon.py @@ -190,7 +190,7 @@ def main( tokenizer.save_pretrained(out_path) cfg_out = arch_info.config try: - cfg_out.vocab_size = tokenizer.vocab_size + cfg_out.vocab_size = new_embed.shape[0] except AttributeError: LOG.error( "Could not set vocab size in config.json - you may need to update it manually."