Skip to content

Commit

Permalink
more tests
Browse files Browse the repository at this point in the history
  • Loading branch information
dakinggg committed Apr 2, 2024
1 parent c6e9f05 commit cc60a21
Showing 1 changed file with 9 additions and 0 deletions.
9 changes: 9 additions & 0 deletions tests/tokenizers/test_tiktoken.py
Original file line number Diff line number Diff line change
Expand Up @@ -372,6 +372,15 @@ def test_additional_special_tokens_len():
no_special = TiktokenTokenizerWrapper(model_name='gpt-4',)
assert len(with_special.get_vocab()) == len(no_special.get_vocab()) + 1

ret = with_special.add_special_tokens(
{'additional_special_tokens': ['<|im_start|>']})
assert ret == 0

ret = with_special.add_special_tokens(
{'additional_special_tokens': ['<|im_end|>']})
assert ret == 1
assert len(with_special.get_vocab()) == len(no_special.get_vocab()) + 2


@pytest.mark.parametrize('model_name,encoding_name',
MODEL_ENCODING_NAME_PARAMETRIZATION)
Expand Down

0 comments on commit cc60a21

Please sign in to comment.