From 10388a8daf593ca1e762c668b30feab373c6c42a Mon Sep 17 00:00:00 2001 From: NanoCode012 Date: Tue, 31 Oct 2023 13:21:20 +0900 Subject: [PATCH] fix(tokenizer): update log order after update (#806) --- src/axolotl/utils/models.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/axolotl/utils/models.py b/src/axolotl/utils/models.py index 5ce7de5e51..cc83840ba7 100644 --- a/src/axolotl/utils/models.py +++ b/src/axolotl/utils/models.py @@ -72,11 +72,6 @@ def load_tokenizer(cfg): # set a pad_token, but use eos_token so we don't add a new token tokenizer.pad_token = LLAMA_DEFAULT_EOS_TOKEN - LOG.debug(f"EOS: {tokenizer.eos_token_id} / {tokenizer.eos_token}") - LOG.debug(f"BOS: {tokenizer.bos_token_id} / {tokenizer.bos_token}") - LOG.debug(f"PAD: {tokenizer.pad_token_id} / {tokenizer.pad_token}") - LOG.debug(f"UNK: {tokenizer.unk_token_id} / {tokenizer.unk_token}") - if tokenizer.__class__.__name__ == "GPTNeoXTokenizerFast": tokenizer.add_special_tokens({"pad_token": "[PAD]"}) os.environ["TOKENIZERS_PARALLELISM"] = "false" @@ -98,6 +93,11 @@ def load_tokenizer(cfg): ] ) + LOG.debug(f"EOS: {tokenizer.eos_token_id} / {tokenizer.eos_token}") + LOG.debug(f"BOS: {tokenizer.bos_token_id} / {tokenizer.bos_token}") + LOG.debug(f"PAD: {tokenizer.pad_token_id} / {tokenizer.pad_token}") + LOG.debug(f"UNK: {tokenizer.unk_token_id} / {tokenizer.unk_token}") + return tokenizer