From cc60a21b1cb68af6adafdfc28763a934b93d79ac Mon Sep 17 00:00:00 2001 From: Daniel King Date: Tue, 2 Apr 2024 00:38:21 -0700 Subject: [PATCH] more tests --- tests/tokenizers/test_tiktoken.py | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/tests/tokenizers/test_tiktoken.py b/tests/tokenizers/test_tiktoken.py index d7fc6dced6..bb936db617 100644 --- a/tests/tokenizers/test_tiktoken.py +++ b/tests/tokenizers/test_tiktoken.py @@ -372,6 +372,15 @@ def test_additional_special_tokens_len(): no_special = TiktokenTokenizerWrapper(model_name='gpt-4',) assert len(with_special.get_vocab()) == len(no_special.get_vocab()) + 1 + ret = with_special.add_special_tokens( + {'additional_special_tokens': ['<|im_start|>']}) + assert ret == 0 + + ret = with_special.add_special_tokens( + {'additional_special_tokens': ['<|im_end|>']}) + assert ret == 1 + assert len(with_special.get_vocab()) == len(no_special.get_vocab()) + 2 + @pytest.mark.parametrize('model_name,encoding_name', MODEL_ENCODING_NAME_PARAMETRIZATION)