From 020b6d90e24f85fa18cbf8442c1925ad2912a52b Mon Sep 17 00:00:00 2001
From: Matt <rocketknight1@gmail.com>
Date: Fri, 4 Oct 2024 18:32:02 +0100
Subject: [PATCH 01/33] Initial draft

---
 src/transformers/tokenization_utils_base.py | 19 +++++++++++++++++++
 1 file changed, 19 insertions(+)

diff --git a/src/transformers/tokenization_utils_base.py b/src/transformers/tokenization_utils_base.py
index 03df02d21ff..bafdb03e4c9 100644
--- a/src/transformers/tokenization_utils_base.py
+++ b/src/transformers/tokenization_utils_base.py
@@ -145,6 +145,7 @@ class EncodingFast:
 SPECIAL_TOKENS_MAP_FILE = "special_tokens_map.json"
 ADDED_TOKENS_FILE = "added_tokens.json"
 TOKENIZER_CONFIG_FILE = "tokenizer_config.json"
+CHAT_TEMPLATE_FILE = "chat_template.jinja"
 
 # Fast tokenizers (provided by HuggingFace tokenizer's library) can be saved in a single file
 FULL_TOKENIZER_FILE = "tokenizer.json"
@@ -1941,6 +1942,7 @@ def from_pretrained(
                     "tokenizer_config_file": TOKENIZER_CONFIG_FILE,
                     # tokenizer_file used to initialize a slow from a fast. Properly copy the `addedTokens` instead of adding in random orders
                     "tokenizer_file": FULL_TOKENIZER_FILE,
+                    "chat_template_file": CHAT_TEMPLATE_FILE,
                 }
                 vocab_files = {**cls.vocab_files_names, **additional_files_names}
                 if "tokenizer_file" in vocab_files:
@@ -2061,6 +2063,7 @@ def _from_pretrained(
         from_slow = kwargs.get("from_slow", False)
         gguf_file = kwargs.get("gguf_file", None)
         has_tokenizer_file = resolved_vocab_files.get("tokenizer_file", None) is not None
+        has_chat_template_file = resolved_vocab_files.get("chat_template_file", None) is not None
 
         # If one passes a GGUF file path to `gguf_file` there is no need for this check as the tokenizer will be
         # loaded directly from the GGUF file.
@@ -2097,6 +2100,12 @@ def _from_pretrained(
             config_tokenizer_class = None
             init_kwargs = init_configuration
 
+        # If an independent chat template file exists, it takes priority over template entries in the tokenizer config
+        if has_chat_template_file:
+            with open(resolved_vocab_files["chat_template_file"]) as chat_template_handle:
+                chat_template = chat_template_handle.read()
+            init_kwargs["chat_template"] = chat_template  # Clobbers any template in the config
+
         if not _is_local:
             if "auto_map" in init_kwargs:
                 # For backward compatibility with odl format.
@@ -2259,6 +2268,7 @@ def _from_pretrained(
                 if key != "additional_special_tokens":
                     init_kwargs[key] = added_tokens_map.get(str(init_kwargs[key]), init_kwargs[key])
 
+
         # Instantiate the tokenizer.
         try:
             tokenizer = cls(*init_inputs, **init_kwargs)
@@ -2396,6 +2406,9 @@ def save_pretrained(
         tokenizer_config_file = os.path.join(
             save_directory, (filename_prefix + "-" if filename_prefix else "") + TOKENIZER_CONFIG_FILE
         )
+        chat_template_file = os.path.join(
+            save_directory, (filename_prefix + "-" if filename_prefix else "") + CHAT_TEMPLATE_FILE
+        )
 
         tokenizer_config = copy.deepcopy(self.init_kwargs)
 
@@ -2418,7 +2431,13 @@ def save_pretrained(
             if isinstance(self.chat_template, dict):
                 # Chat template dicts are saved to the config as lists of dicts with fixed key names.
                 # They will be reconstructed as a single dict during loading.
+                # We're trying to discourage chat template dicts, and they are always
+                # saved in the config, never as single files.
                 tokenizer_config["chat_template"] = [{"name": k, "template": v} for k, v in self.chat_template.items()]
+            elif kwargs.get("save_chat_template_file", False):
+                with open(chat_template_file, "w", encoding="utf-8") as f:
+                    f.write(self.chat_template)
+                logger.info(f"chat template saved in {chat_template_file}")
             else:
                 tokenizer_config["chat_template"] = self.chat_template
 

From b8f9e0f15f8d27bdd427317d7c9972a9652b7ad1 Mon Sep 17 00:00:00 2001
From: Matt <rocketknight1@gmail.com>
Date: Mon, 7 Oct 2024 17:12:16 +0100
Subject: [PATCH 02/33] Add .jinja file loading for processors

---
 src/transformers/processing_utils.py | 30 +++++++++++++++++++++++-----
 1 file changed, 25 insertions(+), 5 deletions(-)

diff --git a/src/transformers/processing_utils.py b/src/transformers/processing_utils.py
index b5b02f6a00a..00f9878eaac 100644
--- a/src/transformers/processing_utils.py
+++ b/src/transformers/processing_utils.py
@@ -44,7 +44,6 @@
     TruncationStrategy,
 )
 from .utils import (
-    CHAT_TEMPLATE_NAME,
     PROCESSOR_NAME,
     PushToHubMixin,
     TensorType,
@@ -527,7 +526,7 @@ def save_pretrained(self, save_directory, push_to_hub: bool = False, **kwargs):
         # If we save using the predefined names, we can load using `from_pretrained`
         # plus we save chat_template in its own file
         output_processor_file = os.path.join(save_directory, PROCESSOR_NAME)
-        output_chat_template_file = os.path.join(save_directory, CHAT_TEMPLATE_NAME)
+        output_chat_template_file = os.path.join(save_directory, "chat_template.json")
 
         processor_dict = self.to_dict()
         # Save `chat_template` in its own file. We can't get it from `processor_dict` as we popped it in `to_dict`
@@ -601,21 +600,23 @@ def get_processor_dict(
         is_local = os.path.isdir(pretrained_model_name_or_path)
         if os.path.isdir(pretrained_model_name_or_path):
             processor_file = os.path.join(pretrained_model_name_or_path, PROCESSOR_NAME)
-            chat_template_file = os.path.join(pretrained_model_name_or_path, "chat_template.json")
 
         if os.path.isfile(pretrained_model_name_or_path):
             resolved_processor_file = pretrained_model_name_or_path
             # cant't load chat-template when given a file as pretrained_model_name_or_path
             resolved_chat_template_file = None
+            resolved_naked_chat_template_file = None
             is_local = True
         elif is_remote_url(pretrained_model_name_or_path):
             processor_file = pretrained_model_name_or_path
             resolved_processor_file = download_url(pretrained_model_name_or_path)
             # can't load chat-template when given a file url as pretrained_model_name_or_path
             resolved_chat_template_file = None
+            resolved_naked_chat_template_file = None
         else:
             processor_file = PROCESSOR_NAME
-            chat_template_file = CHAT_TEMPLATE_NAME
+            chat_template_file = "chat_template.json"
+            naked_chat_template_file = "chat_template.jinja"
             try:
                 # Load from local folder or from cache or download from model Hub and cache
                 resolved_processor_file = cached_file(
@@ -650,6 +651,21 @@ def get_processor_dict(
                     subfolder=subfolder,
                     _raise_exceptions_for_missing_entries=False,
                 )
+
+                resolved_naked_chat_template_file = cached_file(
+                    pretrained_model_name_or_path,
+                    naked_chat_template_file,
+                    cache_dir=cache_dir,
+                    force_download=force_download,
+                    proxies=proxies,
+                    resume_download=resume_download,
+                    local_files_only=local_files_only,
+                    token=token,
+                    user_agent=user_agent,
+                    revision=revision,
+                    subfolder=subfolder,
+                    _raise_exceptions_for_missing_entries=False,
+                )
             except EnvironmentError:
                 # Raise any environment error raise by `cached_file`. It will have a helpful error message adapted to
                 # the original exception.
@@ -665,6 +681,10 @@ def get_processor_dict(
 
         # Add chat template as kwarg before returning because most models don't have processor config
         chat_template = None
+        if resolved_naked_chat_template_file is not None:
+            with open(resolved_chat_template_file, "r", encoding="utf-8") as reader:
+                chat_template = reader.read()
+            kwargs["chat_template"] = chat_template
         if resolved_chat_template_file is not None:
             with open(resolved_chat_template_file, "r", encoding="utf-8") as reader:
                 text = reader.read()
@@ -696,7 +716,7 @@ def get_processor_dict(
 
         if "chat_template" in processor_dict and processor_dict["chat_template"] is not None:
             logger.warning_once(
-                "Chat templates should be in a 'chat_template.json' file but found key='chat_template' "
+                "Chat templates should be in a 'chat_template.jinja' file but found key='chat_template' "
                 "in the processor's config. Make sure to move your template to its own file."
             )
 

From ca101e6536f46bd37a965b7b75ab86176d4ed85b Mon Sep 17 00:00:00 2001
From: Matt <rocketknight1@gmail.com>
Date: Mon, 7 Oct 2024 17:15:46 +0100
Subject: [PATCH 03/33] Add processor saving of naked chat template files

---
 src/transformers/processing_utils.py | 18 ++++++++++++------
 1 file changed, 12 insertions(+), 6 deletions(-)

diff --git a/src/transformers/processing_utils.py b/src/transformers/processing_utils.py
index 00f9878eaac..731e253a8c2 100644
--- a/src/transformers/processing_utils.py
+++ b/src/transformers/processing_utils.py
@@ -526,18 +526,24 @@ def save_pretrained(self, save_directory, push_to_hub: bool = False, **kwargs):
         # If we save using the predefined names, we can load using `from_pretrained`
         # plus we save chat_template in its own file
         output_processor_file = os.path.join(save_directory, PROCESSOR_NAME)
+        output_naked_chat_template_file = os.path.join(save_directory, "chat_template.jinja")
         output_chat_template_file = os.path.join(save_directory, "chat_template.json")
 
         processor_dict = self.to_dict()
         # Save `chat_template` in its own file. We can't get it from `processor_dict` as we popped it in `to_dict`
         # to avoid serializing chat template in json config file. So let's get it from `self` directly
         if self.chat_template is not None:
-            chat_template_json_string = (
-                json.dumps({"chat_template": self.chat_template}, indent=2, sort_keys=True) + "\n"
-            )
-            with open(output_chat_template_file, "w", encoding="utf-8") as writer:
-                writer.write(chat_template_json_string)
-            logger.info(f"chat template saved in {output_chat_template_file}")
+            if kwargs.get("save_naked_chat_template", False):
+                with open(output_naked_chat_template_file, "w", encoding="utf-8") as writer:
+                    writer.write(self.chat_template)
+                logger.info(f"chat template saved in {output_naked_chat_template_file}")
+            else:
+                chat_template_json_string = (
+                    json.dumps({"chat_template": self.chat_template}, indent=2, sort_keys=True) + "\n"
+                )
+                with open(output_chat_template_file, "w", encoding="utf-8") as writer:
+                    writer.write(chat_template_json_string)
+                logger.info(f"chat template saved in {output_chat_template_file}")
 
         # For now, let's not save to `processor_config.json` if the processor doesn't have extra attributes and
         # `auto_map` is not specified.

From 82c357e2766448f0b8c7d3c35778ce565793773c Mon Sep 17 00:00:00 2001
From: Matt <rocketknight1@gmail.com>
Date: Mon, 7 Oct 2024 17:21:14 +0100
Subject: [PATCH 04/33] make fixup

---
 src/transformers/tokenization_utils_base.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/src/transformers/tokenization_utils_base.py b/src/transformers/tokenization_utils_base.py
index bafdb03e4c9..3bd824312e9 100644
--- a/src/transformers/tokenization_utils_base.py
+++ b/src/transformers/tokenization_utils_base.py
@@ -2268,7 +2268,6 @@ def _from_pretrained(
                 if key != "additional_special_tokens":
                     init_kwargs[key] = added_tokens_map.get(str(init_kwargs[key]), init_kwargs[key])
 
-
         # Instantiate the tokenizer.
         try:
             tokenizer = cls(*init_inputs, **init_kwargs)

From 3775e2b75720d5fa0507922aa8028115405e3c85 Mon Sep 17 00:00:00 2001
From: Matt <rocketknight1@gmail.com>
Date: Mon, 7 Oct 2024 17:41:38 +0100
Subject: [PATCH 05/33] Add save-load test for tokenizers

---
 tests/test_tokenization_common.py | 44 ++++++++++++++++++++++---------
 1 file changed, 32 insertions(+), 12 deletions(-)

diff --git a/tests/test_tokenization_common.py b/tests/test_tokenization_common.py
index f04a4255556..3f342c7a24b 100644
--- a/tests/test_tokenization_common.py
+++ b/tests/test_tokenization_common.py
@@ -26,6 +26,7 @@
 from collections import OrderedDict
 from itertools import takewhile
 from typing import TYPE_CHECKING, Any, Dict, List, Tuple, Union
+from pathlib import Path
 
 from parameterized import parameterized
 
@@ -1115,6 +1116,23 @@ def test_chat_template(self):
                 # Check that no error raised
                 tokenizer.apply_chat_template(dummy_conversation, tokenize=True, return_dict=False)
 
+                with tempfile.TemporaryDirectory() as tmp_dir_name:
+                    tokenizer.save_pretrained(tmp_dir_name, save_chat_template_file=True)
+                    chat_template_file = Path(tmp_dir_name) / "chat_template.jinja"
+                    self.assertTrue(chat_template_file.is_file())
+                    self.assertEqual(chat_template_file.read_text(), dummy_template)
+                    config_dict = json.loads((Path(tmp_dir_name) / "tokenizer_config.json").read_text())
+                    # Assert the chat template is not in the config when it's saved as a separate file
+                    self.assertNotIn("chat_template", config_dict)
+                    tokenizer = tokenizer.from_pretrained(tmp_dir_name)
+
+                self.assertEqual(tokenizer.chat_template, dummy_template)  # Test template has persisted
+                output = tokenizer.apply_chat_template(dummy_conversation, tokenize=False, return_dict=False)
+                self.assertEqual(output, expected_output)  # Test output is the same after reloading
+                # Check that no error raised
+                tokenizer.apply_chat_template(dummy_conversation, tokenize=True, return_dict=False)
+
+
     @require_jinja
     def test_chat_template_batched(self):
         dummy_template = "{% for message in messages %}{{message['role'] + message['content']}}{% endfor %}"
@@ -1526,18 +1544,20 @@ def test_chat_template_dict_saving(self):
         tokenizers = self.get_tokenizers()
         for tokenizer in tokenizers:
             with self.subTest(f"{tokenizer.__class__.__name__}"):
-                tokenizer.chat_template = {"template1": dummy_template_1, "template2": dummy_template_2}
-                with tempfile.TemporaryDirectory() as tmp_dir_name:
-                    tokenizer.save_pretrained(tmp_dir_name)
-                    config_dict = json.load(open(os.path.join(tmp_dir_name, "tokenizer_config.json")))
-                    # Assert that chat templates are correctly serialized as lists of dictionaries
-                    self.assertEqual(
-                        config_dict["chat_template"],
-                        [{"name": "template1", "template": "{{'a'}}"}, {"name": "template2", "template": "{{'b'}}"}],
-                    )
-                    new_tokenizer = tokenizer.from_pretrained(tmp_dir_name)
-                # Assert that the serialized list is correctly reconstructed as a single dict
-                self.assertEqual(new_tokenizer.chat_template, tokenizer.chat_template)
+                for save_chat_template_file in (True, False):
+                    tokenizer.chat_template = {"template1": dummy_template_1, "template2": dummy_template_2}
+                    with tempfile.TemporaryDirectory() as tmp_dir_name:
+                        # Test that save_chat_template_file is ignored when there's a dict of multiple templates
+                        tokenizer.save_pretrained(tmp_dir_name, save_chat_template_file=save_chat_template_file)
+                        config_dict = json.load(open(os.path.join(tmp_dir_name, "tokenizer_config.json")))
+                        # Assert that chat templates are correctly serialized as lists of dictionaries
+                        self.assertEqual(
+                            config_dict["chat_template"],
+                            [{"name": "template1", "template": "{{'a'}}"}, {"name": "template2", "template": "{{'b'}}"}],
+                        )
+                        new_tokenizer = tokenizer.from_pretrained(tmp_dir_name)
+                    # Assert that the serialized list is correctly reconstructed as a single dict
+                    self.assertEqual(new_tokenizer.chat_template, tokenizer.chat_template)
 
     def test_number_of_added_tokens(self):
         tokenizers = self.get_tokenizers(do_lower_case=False)

From 91483a0b51475ee3fcb04e90927bca066f22b18d Mon Sep 17 00:00:00 2001
From: Matt <rocketknight1@gmail.com>
Date: Mon, 7 Oct 2024 17:41:48 +0100
Subject: [PATCH 06/33] Add save-load test for tokenizers

---
 tests/test_tokenization_common.py | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/tests/test_tokenization_common.py b/tests/test_tokenization_common.py
index 3f342c7a24b..f5122831507 100644
--- a/tests/test_tokenization_common.py
+++ b/tests/test_tokenization_common.py
@@ -25,8 +25,8 @@
 import unittest
 from collections import OrderedDict
 from itertools import takewhile
-from typing import TYPE_CHECKING, Any, Dict, List, Tuple, Union
 from pathlib import Path
+from typing import TYPE_CHECKING, Any, Dict, List, Tuple, Union
 
 from parameterized import parameterized
 
@@ -1132,7 +1132,6 @@ def test_chat_template(self):
                 # Check that no error raised
                 tokenizer.apply_chat_template(dummy_conversation, tokenize=True, return_dict=False)
 
-
     @require_jinja
     def test_chat_template_batched(self):
         dummy_template = "{% for message in messages %}{{message['role'] + message['content']}}{% endfor %}"
@@ -1553,7 +1552,10 @@ def test_chat_template_dict_saving(self):
                         # Assert that chat templates are correctly serialized as lists of dictionaries
                         self.assertEqual(
                             config_dict["chat_template"],
-                            [{"name": "template1", "template": "{{'a'}}"}, {"name": "template2", "template": "{{'b'}}"}],
+                            [
+                                {"name": "template1", "template": "{{'a'}}"},
+                                {"name": "template2", "template": "{{'b'}}"},
+                            ],
                         )
                         new_tokenizer = tokenizer.from_pretrained(tmp_dir_name)
                     # Assert that the serialized list is correctly reconstructed as a single dict

From 47205a26861d3273194b1471ad619ccea98fec79 Mon Sep 17 00:00:00 2001
From: Matt <rocketknight1@gmail.com>
Date: Tue, 8 Oct 2024 13:16:58 +0100
Subject: [PATCH 07/33] stash commit

---
 src/transformers/tokenization_utils_base.py | 3 +++
 tests/test_tokenization_common.py           | 2 ++
 2 files changed, 5 insertions(+)

diff --git a/src/transformers/tokenization_utils_base.py b/src/transformers/tokenization_utils_base.py
index 3bd824312e9..40af28702b5 100644
--- a/src/transformers/tokenization_utils_base.py
+++ b/src/transformers/tokenization_utils_base.py
@@ -2437,6 +2437,9 @@ def save_pretrained(
                 with open(chat_template_file, "w", encoding="utf-8") as f:
                     f.write(self.chat_template)
                 logger.info(f"chat template saved in {chat_template_file}")
+                if "chat_template" in tokenizer_config:
+                    breakpoint()
+                    tokenizer_config.pop("chat_template")  # To ensure it doesn't somehow end up in the config too
             else:
                 tokenizer_config["chat_template"] = self.chat_template
 
diff --git a/tests/test_tokenization_common.py b/tests/test_tokenization_common.py
index f5122831507..04c1f638b75 100644
--- a/tests/test_tokenization_common.py
+++ b/tests/test_tokenization_common.py
@@ -1125,6 +1125,8 @@ def test_chat_template(self):
                     # Assert the chat template is not in the config when it's saved as a separate file
                     self.assertNotIn("chat_template", config_dict)
                     tokenizer = tokenizer.from_pretrained(tmp_dir_name)
+                    # TODO Figure out how "chat_template" gets into init_kwargs
+                    # TODO Ensure "chat_template_file" doesn't end up anywhere! Where is it getting into the config?
 
                 self.assertEqual(tokenizer.chat_template, dummy_template)  # Test template has persisted
                 output = tokenizer.apply_chat_template(dummy_conversation, tokenize=False, return_dict=False)

From 820990b9635d35c92ccb72756d271607eddff42a Mon Sep 17 00:00:00 2001
From: Matt <rocketknight1@gmail.com>
Date: Tue, 8 Oct 2024 15:44:52 +0100
Subject: [PATCH 08/33] Try popping the file

---
 src/transformers/tokenization_utils_base.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/transformers/tokenization_utils_base.py b/src/transformers/tokenization_utils_base.py
index 40af28702b5..6125af2f790 100644
--- a/src/transformers/tokenization_utils_base.py
+++ b/src/transformers/tokenization_utils_base.py
@@ -2177,6 +2177,7 @@ def _from_pretrained(
         # Merge resolved_vocab_files arguments in init_kwargs.
         added_tokens_file = resolved_vocab_files.pop("added_tokens_file", None)
         special_tokens_map_file = resolved_vocab_files.pop("special_tokens_map_file", None)
+        chat_template_file = resolved_vocab_files.pop("chat_template_file", None)
         for args_name, file_path in resolved_vocab_files.items():
             if args_name not in init_kwargs:
                 init_kwargs[args_name] = file_path
@@ -2438,7 +2439,6 @@ def save_pretrained(
                     f.write(self.chat_template)
                 logger.info(f"chat template saved in {chat_template_file}")
                 if "chat_template" in tokenizer_config:
-                    breakpoint()
                     tokenizer_config.pop("chat_template")  # To ensure it doesn't somehow end up in the config too
             else:
                 tokenizer_config["chat_template"] = self.chat_template

From 2aeb31990217e543951d18e3caf67607561d2789 Mon Sep 17 00:00:00 2001
From: Matt <rocketknight1@gmail.com>
Date: Tue, 8 Oct 2024 16:04:33 +0100
Subject: [PATCH 09/33] make fixup

---
 src/transformers/tokenization_utils_base.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/transformers/tokenization_utils_base.py b/src/transformers/tokenization_utils_base.py
index 6125af2f790..df19ecaa7b4 100644
--- a/src/transformers/tokenization_utils_base.py
+++ b/src/transformers/tokenization_utils_base.py
@@ -2177,7 +2177,7 @@ def _from_pretrained(
         # Merge resolved_vocab_files arguments in init_kwargs.
         added_tokens_file = resolved_vocab_files.pop("added_tokens_file", None)
         special_tokens_map_file = resolved_vocab_files.pop("special_tokens_map_file", None)
-        chat_template_file = resolved_vocab_files.pop("chat_template_file", None)
+        resolved_vocab_files.pop("chat_template_file", None)
         for args_name, file_path in resolved_vocab_files.items():
             if args_name not in init_kwargs:
                 init_kwargs[args_name] = file_path

From a92dc10946f79a474109b43e1389dbf300664cfa Mon Sep 17 00:00:00 2001
From: Matt <rocketknight1@gmail.com>
Date: Tue, 8 Oct 2024 18:52:31 +0100
Subject: [PATCH 10/33] Pop the arg correctly

---
 src/transformers/tokenization_utils_base.py |  9 ++++-----
 tests/test_tokenization_common.py           | 18 ++++++++----------
 2 files changed, 12 insertions(+), 15 deletions(-)

diff --git a/src/transformers/tokenization_utils_base.py b/src/transformers/tokenization_utils_base.py
index df19ecaa7b4..75b15921b47 100644
--- a/src/transformers/tokenization_utils_base.py
+++ b/src/transformers/tokenization_utils_base.py
@@ -2063,7 +2063,7 @@ def _from_pretrained(
         from_slow = kwargs.get("from_slow", False)
         gguf_file = kwargs.get("gguf_file", None)
         has_tokenizer_file = resolved_vocab_files.get("tokenizer_file", None) is not None
-        has_chat_template_file = resolved_vocab_files.get("chat_template_file", None) is not None
+        chat_template_file = resolved_vocab_files.pop("chat_template_file", None)
 
         # If one passes a GGUF file path to `gguf_file` there is no need for this check as the tokenizer will be
         # loaded directly from the GGUF file.
@@ -2101,10 +2101,9 @@ def _from_pretrained(
             init_kwargs = init_configuration
 
         # If an independent chat template file exists, it takes priority over template entries in the tokenizer config
-        if has_chat_template_file:
-            with open(resolved_vocab_files["chat_template_file"]) as chat_template_handle:
-                chat_template = chat_template_handle.read()
-            init_kwargs["chat_template"] = chat_template  # Clobbers any template in the config
+        if chat_template_file is not None:
+            with open(chat_template_file) as chat_template_handle:
+                init_kwargs["chat_template"] = chat_template_handle.read()  # Clobbers any template in the config
 
         if not _is_local:
             if "auto_map" in init_kwargs:
diff --git a/tests/test_tokenization_common.py b/tests/test_tokenization_common.py
index 04c1f638b75..266690a0464 100644
--- a/tests/test_tokenization_common.py
+++ b/tests/test_tokenization_common.py
@@ -1108,13 +1108,13 @@ def test_chat_template(self):
 
                 with tempfile.TemporaryDirectory() as tmp_dir_name:
                     tokenizer.save_pretrained(tmp_dir_name)
-                    tokenizer = tokenizer.from_pretrained(tmp_dir_name)
+                    new_tokenizer = tokenizer.from_pretrained(tmp_dir_name)
 
-                self.assertEqual(tokenizer.chat_template, dummy_template)  # Test template has persisted
-                output = tokenizer.apply_chat_template(dummy_conversation, tokenize=False, return_dict=False)
+                self.assertEqual(new_tokenizer.chat_template, dummy_template)  # Test template has persisted
+                output = new_tokenizer.apply_chat_template(dummy_conversation, tokenize=False, return_dict=False)
                 self.assertEqual(output, expected_output)  # Test output is the same after reloading
                 # Check that no error raised
-                tokenizer.apply_chat_template(dummy_conversation, tokenize=True, return_dict=False)
+                new_tokenizer.apply_chat_template(dummy_conversation, tokenize=True, return_dict=False)
 
                 with tempfile.TemporaryDirectory() as tmp_dir_name:
                     tokenizer.save_pretrained(tmp_dir_name, save_chat_template_file=True)
@@ -1124,15 +1124,13 @@ def test_chat_template(self):
                     config_dict = json.loads((Path(tmp_dir_name) / "tokenizer_config.json").read_text())
                     # Assert the chat template is not in the config when it's saved as a separate file
                     self.assertNotIn("chat_template", config_dict)
-                    tokenizer = tokenizer.from_pretrained(tmp_dir_name)
-                    # TODO Figure out how "chat_template" gets into init_kwargs
-                    # TODO Ensure "chat_template_file" doesn't end up anywhere! Where is it getting into the config?
+                    new_tokenizer = tokenizer.from_pretrained(tmp_dir_name)
 
-                self.assertEqual(tokenizer.chat_template, dummy_template)  # Test template has persisted
-                output = tokenizer.apply_chat_template(dummy_conversation, tokenize=False, return_dict=False)
+                self.assertEqual(new_tokenizer.chat_template, dummy_template)  # Test template has persisted
+                output = new_tokenizer.apply_chat_template(dummy_conversation, tokenize=False, return_dict=False)
                 self.assertEqual(output, expected_output)  # Test output is the same after reloading
                 # Check that no error raised
-                tokenizer.apply_chat_template(dummy_conversation, tokenize=True, return_dict=False)
+                new_tokenizer.apply_chat_template(dummy_conversation, tokenize=True, return_dict=False)
 
     @require_jinja
     def test_chat_template_batched(self):

From 1d54b7d16d417e67023424898394127cb83642b2 Mon Sep 17 00:00:00 2001
From: Matt <rocketknight1@gmail.com>
Date: Tue, 8 Oct 2024 18:53:27 +0100
Subject: [PATCH 11/33] Pop the arg correctly

---
 src/transformers/tokenization_utils_base.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/src/transformers/tokenization_utils_base.py b/src/transformers/tokenization_utils_base.py
index 75b15921b47..d814d990743 100644
--- a/src/transformers/tokenization_utils_base.py
+++ b/src/transformers/tokenization_utils_base.py
@@ -2176,7 +2176,6 @@ def _from_pretrained(
         # Merge resolved_vocab_files arguments in init_kwargs.
         added_tokens_file = resolved_vocab_files.pop("added_tokens_file", None)
         special_tokens_map_file = resolved_vocab_files.pop("special_tokens_map_file", None)
-        resolved_vocab_files.pop("chat_template_file", None)
         for args_name, file_path in resolved_vocab_files.items():
             if args_name not in init_kwargs:
                 init_kwargs[args_name] = file_path

From 5b2674873a8b681720b6febf17bf09b166bb9ac6 Mon Sep 17 00:00:00 2001
From: Matt <rocketknight1@gmail.com>
Date: Wed, 9 Oct 2024 18:01:31 +0100
Subject: [PATCH 12/33] Add processor test

---
 tests/test_processing_common.py | 16 ++++++++++++++++
 1 file changed, 16 insertions(+)

diff --git a/tests/test_processing_common.py b/tests/test_processing_common.py
index 9f0d8808912..e93ce245f86 100644
--- a/tests/test_processing_common.py
+++ b/tests/test_processing_common.py
@@ -18,6 +18,7 @@
 import json
 import random
 import tempfile
+from pathlib import Path
 from typing import Optional
 
 import numpy as np
@@ -519,3 +520,18 @@ def test_prepare_and_validate_optional_call_args(self):
             processor.prepare_and_validate_optional_call_args(
                 *(f"optional_{i}" for i in range(num_optional_call_args + 1))
             )
+
+    def test_chat_template_save_loading(self):
+        processor = self.get_processor()
+        processor.chat_template = "test template"
+        with tempfile.TemporaryDirectory() as tmpdirname:
+            processor.save_pretrained(tmpdirname)
+            self.assertFalse(Path(tmpdirname, "chat_template.jinja").is_file())
+            reloaded_processor = self.processor_class.from_pretrained(tmpdirname)
+            self.assertEqual(processor.chat_template, reloaded_processor.chat_template)
+
+        with tempfile.TemporaryDirectory() as tmpdirname:
+            processor.save_pretrained(tmpdirname, save_naked_chat_template=True)
+            self.assertTrue(Path(tmpdirname, "chat_template.jinja").is_file())
+            reloaded_processor = self.processor_class.from_pretrained(tmpdirname)
+            self.assertEqual(processor.chat_template, reloaded_processor.chat_template)

From 743a4a502c47460dc2f2dea6242b7d3533788de6 Mon Sep 17 00:00:00 2001
From: Matt <rocketknight1@gmail.com>
Date: Wed, 9 Oct 2024 18:06:57 +0100
Subject: [PATCH 13/33] Fix processor code

---
 src/transformers/processing_utils.py | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/src/transformers/processing_utils.py b/src/transformers/processing_utils.py
index 731e253a8c2..5b3abd64460 100644
--- a/src/transformers/processing_utils.py
+++ b/src/transformers/processing_utils.py
@@ -686,12 +686,11 @@ def get_processor_dict(
                 )
 
         # Add chat template as kwarg before returning because most models don't have processor config
-        chat_template = None
         if resolved_naked_chat_template_file is not None:
-            with open(resolved_chat_template_file, "r", encoding="utf-8") as reader:
+            with open(resolved_naked_chat_template_file, "r", encoding="utf-8") as reader:
                 chat_template = reader.read()
             kwargs["chat_template"] = chat_template
-        if resolved_chat_template_file is not None:
+        elif resolved_chat_template_file is not None:
             with open(resolved_chat_template_file, "r", encoding="utf-8") as reader:
                 text = reader.read()
             chat_template = json.loads(text)["chat_template"]

From 455a297c8112c28655c0e7aab6181d17a80410b4 Mon Sep 17 00:00:00 2001
From: Matt <rocketknight1@gmail.com>
Date: Thu, 10 Oct 2024 13:09:52 +0100
Subject: [PATCH 14/33] stash commit

---
 src/transformers/processing_utils.py        |  5 ++++-
 src/transformers/tokenization_utils_base.py |  2 +-
 tests/test_processing_common.py             | 13 +++++++++++++
 3 files changed, 18 insertions(+), 2 deletions(-)

diff --git a/src/transformers/processing_utils.py b/src/transformers/processing_utils.py
index 5b3abd64460..f62347793ec 100644
--- a/src/transformers/processing_utils.py
+++ b/src/transformers/processing_utils.py
@@ -514,7 +514,10 @@ def save_pretrained(self, save_directory, push_to_hub: bool = False, **kwargs):
             # `AutoProcessor` API.
             if hasattr(attribute, "_set_processor_class"):
                 attribute._set_processor_class(self.__class__.__name__)
-            attribute.save_pretrained(save_directory)
+            if getattr(self, "chat_template", None) is not None and getattr(attribute, "chat_template", None) is not None and kwargs.get("save_naked_chat_template", False):
+                attribute.save_pretrained(save_directory, skip_chat_template_save=True)
+            else:
+                attribute.save_pretrained(save_directory)
 
         if self._auto_class is not None:
             # We added an attribute to the init_kwargs of the tokenizers, which needs to be cleaned up.
diff --git a/src/transformers/tokenization_utils_base.py b/src/transformers/tokenization_utils_base.py
index d814d990743..ee3bf226ad5 100644
--- a/src/transformers/tokenization_utils_base.py
+++ b/src/transformers/tokenization_utils_base.py
@@ -2425,7 +2425,7 @@ def save_pretrained(
             tokenizer_config["extra_special_tokens"] = self.extra_special_tokens
             tokenizer_config.update(self.extra_special_tokens)
 
-        if self.chat_template is not None:
+        if self.chat_template is not None and not kwargs.get("skip_chat_template_save", False):
             if isinstance(self.chat_template, dict):
                 # Chat template dicts are saved to the config as lists of dicts with fixed key names.
                 # They will be reconstructed as a single dict during loading.
diff --git a/tests/test_processing_common.py b/tests/test_processing_common.py
index e93ce245f86..77628378682 100644
--- a/tests/test_processing_common.py
+++ b/tests/test_processing_common.py
@@ -535,3 +535,16 @@ def test_chat_template_save_loading(self):
             self.assertTrue(Path(tmpdirname, "chat_template.jinja").is_file())
             reloaded_processor = self.processor_class.from_pretrained(tmpdirname)
             self.assertEqual(processor.chat_template, reloaded_processor.chat_template)
+
+    def test_chat_template_tokenizer_interactions(self):
+        processor = self.get_processor()
+        if not hasattr(processor, "tokenizer"):
+            self.skipTest("This processor doesn't have a tokenizer.")
+        processor.chat_template = "test template"
+        processor.tokenizer.chat_template = "test_template"
+        with tempfile.TemporaryDirectory() as tmpdirname:
+            processor.save_pretrained(tmpdirname, save_naked_chat_template=True)
+
+            reloaded_processor = self.processor_class.from_pretrained(tmpdirname)
+            self.assertEqual(processor.chat_template, reloaded_processor.chat_template)
+            self.assertEqual(processor.tokenizer.chat_template, reloaded_processor.tokenizer.chat_template)

From 52614f3e698624db15e1823a0cdf1350cab54310 Mon Sep 17 00:00:00 2001
From: Matt <rocketknight1@gmail.com>
Date: Thu, 10 Oct 2024 14:18:05 +0100
Subject: [PATCH 15/33] Processor clobbers child tokenizer's chat template

---
 src/transformers/processing_utils.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/transformers/processing_utils.py b/src/transformers/processing_utils.py
index f62347793ec..3bbe9156419 100644
--- a/src/transformers/processing_utils.py
+++ b/src/transformers/processing_utils.py
@@ -515,6 +515,7 @@ def save_pretrained(self, save_directory, push_to_hub: bool = False, **kwargs):
             if hasattr(attribute, "_set_processor_class"):
                 attribute._set_processor_class(self.__class__.__name__)
             if getattr(self, "chat_template", None) is not None and getattr(attribute, "chat_template", None) is not None and kwargs.get("save_naked_chat_template", False):
+                # If we're saving a chat template file, it clobbers any chat template in the tokenizer
                 attribute.save_pretrained(save_directory, skip_chat_template_save=True)
             else:
                 attribute.save_pretrained(save_directory)

From a200a427b45b6070bd16c264c7a8fe5bdca4795a Mon Sep 17 00:00:00 2001
From: Matt <rocketknight1@gmail.com>
Date: Thu, 10 Oct 2024 14:33:47 +0100
Subject: [PATCH 16/33] Processor clobbers child tokenizer's chat template

---
 tests/test_processing_common.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tests/test_processing_common.py b/tests/test_processing_common.py
index 77628378682..2101d0c5625 100644
--- a/tests/test_processing_common.py
+++ b/tests/test_processing_common.py
@@ -544,7 +544,7 @@ def test_chat_template_tokenizer_interactions(self):
         processor.tokenizer.chat_template = "test_template"
         with tempfile.TemporaryDirectory() as tmpdirname:
             processor.save_pretrained(tmpdirname, save_naked_chat_template=True)
-
             reloaded_processor = self.processor_class.from_pretrained(tmpdirname)
             self.assertEqual(processor.chat_template, reloaded_processor.chat_template)
-            self.assertEqual(processor.tokenizer.chat_template, reloaded_processor.tokenizer.chat_template)
+            # Check that the processor clobbers the tokenizer's template
+            self.assertEqual(processor.tokenizer.chat_template, None)

From 5c792bc8e88d5b2487e46c2be62f07b2d3959ae0 Mon Sep 17 00:00:00 2001
From: Matt <rocketknight1@gmail.com>
Date: Thu, 10 Oct 2024 14:33:57 +0100
Subject: [PATCH 17/33] make fixup

---
 src/transformers/processing_utils.py | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/src/transformers/processing_utils.py b/src/transformers/processing_utils.py
index 3bbe9156419..51a1b346b96 100644
--- a/src/transformers/processing_utils.py
+++ b/src/transformers/processing_utils.py
@@ -514,7 +514,11 @@ def save_pretrained(self, save_directory, push_to_hub: bool = False, **kwargs):
             # `AutoProcessor` API.
             if hasattr(attribute, "_set_processor_class"):
                 attribute._set_processor_class(self.__class__.__name__)
-            if getattr(self, "chat_template", None) is not None and getattr(attribute, "chat_template", None) is not None and kwargs.get("save_naked_chat_template", False):
+            if (
+                getattr(self, "chat_template", None) is not None
+                and getattr(attribute, "chat_template", None) is not None
+                and kwargs.get("save_naked_chat_template", False)
+            ):
                 # If we're saving a chat template file, it clobbers any chat template in the tokenizer
                 attribute.save_pretrained(save_directory, skip_chat_template_save=True)
             else:

From 5305a87b5b1e8bc67be92202f981f2f91c5eef4c Mon Sep 17 00:00:00 2001
From: Matt <rocketknight1@gmail.com>
Date: Thu, 10 Oct 2024 14:42:33 +0100
Subject: [PATCH 18/33] Split processor/tokenizer files to avoid interactions

---
 src/transformers/processing_utils.py | 14 +++-----------
 tests/test_processing_common.py      | 13 -------------
 2 files changed, 3 insertions(+), 24 deletions(-)

diff --git a/src/transformers/processing_utils.py b/src/transformers/processing_utils.py
index 51a1b346b96..27032fef82d 100644
--- a/src/transformers/processing_utils.py
+++ b/src/transformers/processing_utils.py
@@ -514,15 +514,7 @@ def save_pretrained(self, save_directory, push_to_hub: bool = False, **kwargs):
             # `AutoProcessor` API.
             if hasattr(attribute, "_set_processor_class"):
                 attribute._set_processor_class(self.__class__.__name__)
-            if (
-                getattr(self, "chat_template", None) is not None
-                and getattr(attribute, "chat_template", None) is not None
-                and kwargs.get("save_naked_chat_template", False)
-            ):
-                # If we're saving a chat template file, it clobbers any chat template in the tokenizer
-                attribute.save_pretrained(save_directory, skip_chat_template_save=True)
-            else:
-                attribute.save_pretrained(save_directory)
+            attribute.save_pretrained(save_directory)
 
         if self._auto_class is not None:
             # We added an attribute to the init_kwargs of the tokenizers, which needs to be cleaned up.
@@ -534,7 +526,7 @@ def save_pretrained(self, save_directory, push_to_hub: bool = False, **kwargs):
         # If we save using the predefined names, we can load using `from_pretrained`
         # plus we save chat_template in its own file
         output_processor_file = os.path.join(save_directory, PROCESSOR_NAME)
-        output_naked_chat_template_file = os.path.join(save_directory, "chat_template.jinja")
+        output_naked_chat_template_file = os.path.join(save_directory, "processor_chat_template.jinja")
         output_chat_template_file = os.path.join(save_directory, "chat_template.json")
 
         processor_dict = self.to_dict()
@@ -630,7 +622,7 @@ def get_processor_dict(
         else:
             processor_file = PROCESSOR_NAME
             chat_template_file = "chat_template.json"
-            naked_chat_template_file = "chat_template.jinja"
+            naked_chat_template_file = "processor_chat_template.jinja"
             try:
                 # Load from local folder or from cache or download from model Hub and cache
                 resolved_processor_file = cached_file(
diff --git a/tests/test_processing_common.py b/tests/test_processing_common.py
index 2101d0c5625..e93ce245f86 100644
--- a/tests/test_processing_common.py
+++ b/tests/test_processing_common.py
@@ -535,16 +535,3 @@ def test_chat_template_save_loading(self):
             self.assertTrue(Path(tmpdirname, "chat_template.jinja").is_file())
             reloaded_processor = self.processor_class.from_pretrained(tmpdirname)
             self.assertEqual(processor.chat_template, reloaded_processor.chat_template)
-
-    def test_chat_template_tokenizer_interactions(self):
-        processor = self.get_processor()
-        if not hasattr(processor, "tokenizer"):
-            self.skipTest("This processor doesn't have a tokenizer.")
-        processor.chat_template = "test template"
-        processor.tokenizer.chat_template = "test_template"
-        with tempfile.TemporaryDirectory() as tmpdirname:
-            processor.save_pretrained(tmpdirname, save_naked_chat_template=True)
-            reloaded_processor = self.processor_class.from_pretrained(tmpdirname)
-            self.assertEqual(processor.chat_template, reloaded_processor.chat_template)
-            # Check that the processor clobbers the tokenizer's template
-            self.assertEqual(processor.tokenizer.chat_template, None)

From 36564683db3801917390e3ab6b8df7e1dd791ac2 Mon Sep 17 00:00:00 2001
From: Matt <rocketknight1@gmail.com>
Date: Thu, 10 Oct 2024 14:47:16 +0100
Subject: [PATCH 19/33] fix test

---
 tests/test_processing_common.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tests/test_processing_common.py b/tests/test_processing_common.py
index e93ce245f86..54dd124e9b5 100644
--- a/tests/test_processing_common.py
+++ b/tests/test_processing_common.py
@@ -526,12 +526,12 @@ def test_chat_template_save_loading(self):
         processor.chat_template = "test template"
         with tempfile.TemporaryDirectory() as tmpdirname:
             processor.save_pretrained(tmpdirname)
-            self.assertFalse(Path(tmpdirname, "chat_template.jinja").is_file())
+            self.assertFalse(Path(tmpdirname, "processor_chat_template.jinja").is_file())
             reloaded_processor = self.processor_class.from_pretrained(tmpdirname)
             self.assertEqual(processor.chat_template, reloaded_processor.chat_template)
 
         with tempfile.TemporaryDirectory() as tmpdirname:
             processor.save_pretrained(tmpdirname, save_naked_chat_template=True)
-            self.assertTrue(Path(tmpdirname, "chat_template.jinja").is_file())
+            self.assertTrue(Path(tmpdirname, "processor_chat_template.jinja").is_file())
             reloaded_processor = self.processor_class.from_pretrained(tmpdirname)
             self.assertEqual(processor.chat_template, reloaded_processor.chat_template)

From e412619bda4b57575789a28ae97099702f159e25 Mon Sep 17 00:00:00 2001
From: Matt <rocketknight1@gmail.com>
Date: Thu, 10 Oct 2024 15:23:16 +0100
Subject: [PATCH 20/33] Expand processor tests

---
 tests/test_processing_common.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/tests/test_processing_common.py b/tests/test_processing_common.py
index 54dd124e9b5..66bc4add7e4 100644
--- a/tests/test_processing_common.py
+++ b/tests/test_processing_common.py
@@ -526,6 +526,7 @@ def test_chat_template_save_loading(self):
         processor.chat_template = "test template"
         with tempfile.TemporaryDirectory() as tmpdirname:
             processor.save_pretrained(tmpdirname)
+            self.assertTrue(Path(tmpdirname, "chat_template.json").is_file())
             self.assertFalse(Path(tmpdirname, "processor_chat_template.jinja").is_file())
             reloaded_processor = self.processor_class.from_pretrained(tmpdirname)
             self.assertEqual(processor.chat_template, reloaded_processor.chat_template)
@@ -533,5 +534,6 @@ def test_chat_template_save_loading(self):
         with tempfile.TemporaryDirectory() as tmpdirname:
             processor.save_pretrained(tmpdirname, save_naked_chat_template=True)
             self.assertTrue(Path(tmpdirname, "processor_chat_template.jinja").is_file())
+            self.assertFalse(Path(tmpdirname, "chat_template.json").is_file())
             reloaded_processor = self.processor_class.from_pretrained(tmpdirname)
             self.assertEqual(processor.chat_template, reloaded_processor.chat_template)

From bd8168210872d8ba6274a883c855be2a5b477042 Mon Sep 17 00:00:00 2001
From: Matt <rocketknight1@gmail.com>
Date: Thu, 10 Oct 2024 15:28:05 +0100
Subject: [PATCH 21/33] Rename arg to "save_raw_chat_template" across all
 classes

---
 src/transformers/processing_utils.py        | 22 ++++++++++-----------
 src/transformers/tokenization_utils_base.py |  4 ++--
 tests/test_processing_common.py             |  2 +-
 tests/test_tokenization_common.py           |  8 ++++----
 4 files changed, 18 insertions(+), 18 deletions(-)

diff --git a/src/transformers/processing_utils.py b/src/transformers/processing_utils.py
index 27032fef82d..ed734333349 100644
--- a/src/transformers/processing_utils.py
+++ b/src/transformers/processing_utils.py
@@ -526,17 +526,17 @@ def save_pretrained(self, save_directory, push_to_hub: bool = False, **kwargs):
         # If we save using the predefined names, we can load using `from_pretrained`
         # plus we save chat_template in its own file
         output_processor_file = os.path.join(save_directory, PROCESSOR_NAME)
-        output_naked_chat_template_file = os.path.join(save_directory, "processor_chat_template.jinja")
+        output_raw_chat_template_file = os.path.join(save_directory, "processor_chat_template.jinja")
         output_chat_template_file = os.path.join(save_directory, "chat_template.json")
 
         processor_dict = self.to_dict()
         # Save `chat_template` in its own file. We can't get it from `processor_dict` as we popped it in `to_dict`
         # to avoid serializing chat template in json config file. So let's get it from `self` directly
         if self.chat_template is not None:
-            if kwargs.get("save_naked_chat_template", False):
-                with open(output_naked_chat_template_file, "w", encoding="utf-8") as writer:
+            if kwargs.get("save_raw_chat_template", False):
+                with open(output_raw_chat_template_file, "w", encoding="utf-8") as writer:
                     writer.write(self.chat_template)
-                logger.info(f"chat template saved in {output_naked_chat_template_file}")
+                logger.info(f"chat template saved in {output_raw_chat_template_file}")
             else:
                 chat_template_json_string = (
                     json.dumps({"chat_template": self.chat_template}, indent=2, sort_keys=True) + "\n"
@@ -611,18 +611,18 @@ def get_processor_dict(
             resolved_processor_file = pretrained_model_name_or_path
             # cant't load chat-template when given a file as pretrained_model_name_or_path
             resolved_chat_template_file = None
-            resolved_naked_chat_template_file = None
+            resolved_raw_chat_template_file = None
             is_local = True
         elif is_remote_url(pretrained_model_name_or_path):
             processor_file = pretrained_model_name_or_path
             resolved_processor_file = download_url(pretrained_model_name_or_path)
             # can't load chat-template when given a file url as pretrained_model_name_or_path
             resolved_chat_template_file = None
-            resolved_naked_chat_template_file = None
+            resolved_raw_chat_template_file = None
         else:
             processor_file = PROCESSOR_NAME
             chat_template_file = "chat_template.json"
-            naked_chat_template_file = "processor_chat_template.jinja"
+            raw_chat_template_file = "processor_chat_template.jinja"
             try:
                 # Load from local folder or from cache or download from model Hub and cache
                 resolved_processor_file = cached_file(
@@ -658,9 +658,9 @@ def get_processor_dict(
                     _raise_exceptions_for_missing_entries=False,
                 )
 
-                resolved_naked_chat_template_file = cached_file(
+                resolved_raw_chat_template_file = cached_file(
                     pretrained_model_name_or_path,
-                    naked_chat_template_file,
+                    raw_chat_template_file,
                     cache_dir=cache_dir,
                     force_download=force_download,
                     proxies=proxies,
@@ -686,8 +686,8 @@ def get_processor_dict(
                 )
 
         # Add chat template as kwarg before returning because most models don't have processor config
-        if resolved_naked_chat_template_file is not None:
-            with open(resolved_naked_chat_template_file, "r", encoding="utf-8") as reader:
+        if resolved_raw_chat_template_file is not None:
+            with open(resolved_raw_chat_template_file, "r", encoding="utf-8") as reader:
                 chat_template = reader.read()
             kwargs["chat_template"] = chat_template
         elif resolved_chat_template_file is not None:
diff --git a/src/transformers/tokenization_utils_base.py b/src/transformers/tokenization_utils_base.py
index ee3bf226ad5..881acbe5600 100644
--- a/src/transformers/tokenization_utils_base.py
+++ b/src/transformers/tokenization_utils_base.py
@@ -2425,14 +2425,14 @@ def save_pretrained(
             tokenizer_config["extra_special_tokens"] = self.extra_special_tokens
             tokenizer_config.update(self.extra_special_tokens)
 
-        if self.chat_template is not None and not kwargs.get("skip_chat_template_save", False):
+        if self.chat_template is not None:
             if isinstance(self.chat_template, dict):
                 # Chat template dicts are saved to the config as lists of dicts with fixed key names.
                 # They will be reconstructed as a single dict during loading.
                 # We're trying to discourage chat template dicts, and they are always
                 # saved in the config, never as single files.
                 tokenizer_config["chat_template"] = [{"name": k, "template": v} for k, v in self.chat_template.items()]
-            elif kwargs.get("save_chat_template_file", False):
+            elif kwargs.get("save_raw_chat_template", False):
                 with open(chat_template_file, "w", encoding="utf-8") as f:
                     f.write(self.chat_template)
                 logger.info(f"chat template saved in {chat_template_file}")
diff --git a/tests/test_processing_common.py b/tests/test_processing_common.py
index 66bc4add7e4..8d8fc52a08a 100644
--- a/tests/test_processing_common.py
+++ b/tests/test_processing_common.py
@@ -532,7 +532,7 @@ def test_chat_template_save_loading(self):
             self.assertEqual(processor.chat_template, reloaded_processor.chat_template)
 
         with tempfile.TemporaryDirectory() as tmpdirname:
-            processor.save_pretrained(tmpdirname, save_naked_chat_template=True)
+            processor.save_pretrained(tmpdirname, save_raw_chat_template=True)
             self.assertTrue(Path(tmpdirname, "processor_chat_template.jinja").is_file())
             self.assertFalse(Path(tmpdirname, "chat_template.json").is_file())
             reloaded_processor = self.processor_class.from_pretrained(tmpdirname)
diff --git a/tests/test_tokenization_common.py b/tests/test_tokenization_common.py
index 266690a0464..34c924e4771 100644
--- a/tests/test_tokenization_common.py
+++ b/tests/test_tokenization_common.py
@@ -1117,7 +1117,7 @@ def test_chat_template(self):
                 new_tokenizer.apply_chat_template(dummy_conversation, tokenize=True, return_dict=False)
 
                 with tempfile.TemporaryDirectory() as tmp_dir_name:
-                    tokenizer.save_pretrained(tmp_dir_name, save_chat_template_file=True)
+                    tokenizer.save_pretrained(tmp_dir_name, save_raw_chat_template=True)
                     chat_template_file = Path(tmp_dir_name) / "chat_template.jinja"
                     self.assertTrue(chat_template_file.is_file())
                     self.assertEqual(chat_template_file.read_text(), dummy_template)
@@ -1543,11 +1543,11 @@ def test_chat_template_dict_saving(self):
         tokenizers = self.get_tokenizers()
         for tokenizer in tokenizers:
             with self.subTest(f"{tokenizer.__class__.__name__}"):
-                for save_chat_template_file in (True, False):
+                for save_raw_chat_template in (True, False):
                     tokenizer.chat_template = {"template1": dummy_template_1, "template2": dummy_template_2}
                     with tempfile.TemporaryDirectory() as tmp_dir_name:
-                        # Test that save_chat_template_file is ignored when there's a dict of multiple templates
-                        tokenizer.save_pretrained(tmp_dir_name, save_chat_template_file=save_chat_template_file)
+                        # Test that save_raw_chat_template is ignored when there's a dict of multiple templates
+                        tokenizer.save_pretrained(tmp_dir_name, save_raw_chat_template=save_raw_chat_template)
                         config_dict = json.load(open(os.path.join(tmp_dir_name, "tokenizer_config.json")))
                         # Assert that chat templates are correctly serialized as lists of dictionaries
                         self.assertEqual(

From 50d374a131ff23ac05ddbb4a06bcbb58e3269e97 Mon Sep 17 00:00:00 2001
From: Matt <rocketknight1@gmail.com>
Date: Thu, 10 Oct 2024 15:43:15 +0100
Subject: [PATCH 22/33] Update processor warning

---
 src/transformers/processing_utils.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/transformers/processing_utils.py b/src/transformers/processing_utils.py
index ed734333349..5b4464d8906 100644
--- a/src/transformers/processing_utils.py
+++ b/src/transformers/processing_utils.py
@@ -721,7 +721,7 @@ def get_processor_dict(
 
         if "chat_template" in processor_dict and processor_dict["chat_template"] is not None:
             logger.warning_once(
-                "Chat templates should be in a 'chat_template.jinja' file but found key='chat_template' "
+                "Chat templates should be in a 'processor_chat_template.jinja' file but found key='chat_template' "
                 "in the processor's config. Make sure to move your template to its own file."
             )
 

From a81a7e1543588b8fa22bcb2dabfbaf912bb4f804 Mon Sep 17 00:00:00 2001
From: Matt <rocketknight1@gmail.com>
Date: Thu, 31 Oct 2024 16:56:27 +0000
Subject: [PATCH 23/33] Move templates to single file

---
 src/transformers/processing_utils.py | 4 ++--
 tests/test_processing_common.py      | 4 ++--
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/src/transformers/processing_utils.py b/src/transformers/processing_utils.py
index 5b4464d8906..bc8658064b1 100644
--- a/src/transformers/processing_utils.py
+++ b/src/transformers/processing_utils.py
@@ -526,7 +526,7 @@ def save_pretrained(self, save_directory, push_to_hub: bool = False, **kwargs):
         # If we save using the predefined names, we can load using `from_pretrained`
         # plus we save chat_template in its own file
         output_processor_file = os.path.join(save_directory, PROCESSOR_NAME)
-        output_raw_chat_template_file = os.path.join(save_directory, "processor_chat_template.jinja")
+        output_raw_chat_template_file = os.path.join(save_directory, "chat_template.jinja")
         output_chat_template_file = os.path.join(save_directory, "chat_template.json")
 
         processor_dict = self.to_dict()
@@ -622,7 +622,7 @@ def get_processor_dict(
         else:
             processor_file = PROCESSOR_NAME
             chat_template_file = "chat_template.json"
-            raw_chat_template_file = "processor_chat_template.jinja"
+            raw_chat_template_file = "chat_template.jinja"
             try:
                 # Load from local folder or from cache or download from model Hub and cache
                 resolved_processor_file = cached_file(
diff --git a/tests/test_processing_common.py b/tests/test_processing_common.py
index 8d8fc52a08a..a8338343fdd 100644
--- a/tests/test_processing_common.py
+++ b/tests/test_processing_common.py
@@ -527,13 +527,13 @@ def test_chat_template_save_loading(self):
         with tempfile.TemporaryDirectory() as tmpdirname:
             processor.save_pretrained(tmpdirname)
             self.assertTrue(Path(tmpdirname, "chat_template.json").is_file())
-            self.assertFalse(Path(tmpdirname, "processor_chat_template.jinja").is_file())
+            self.assertFalse(Path(tmpdirname, "chat_template.jinja").is_file())
             reloaded_processor = self.processor_class.from_pretrained(tmpdirname)
             self.assertEqual(processor.chat_template, reloaded_processor.chat_template)
 
         with tempfile.TemporaryDirectory() as tmpdirname:
             processor.save_pretrained(tmpdirname, save_raw_chat_template=True)
-            self.assertTrue(Path(tmpdirname, "processor_chat_template.jinja").is_file())
+            self.assertTrue(Path(tmpdirname, "chat_template.jinja").is_file())
             self.assertFalse(Path(tmpdirname, "chat_template.json").is_file())
             reloaded_processor = self.processor_class.from_pretrained(tmpdirname)
             self.assertEqual(processor.chat_template, reloaded_processor.chat_template)

From f9127c17fbb55d5d0db96d51444912ee0c94e4d0 Mon Sep 17 00:00:00 2001
From: Matt <rocketknight1@gmail.com>
Date: Fri, 1 Nov 2024 17:22:54 +0000
Subject: [PATCH 24/33] Move templates to single file

---
 tests/test_processing_common.py | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/tests/test_processing_common.py b/tests/test_processing_common.py
index a8338343fdd..ab19345df58 100644
--- a/tests/test_processing_common.py
+++ b/tests/test_processing_common.py
@@ -530,6 +530,10 @@ def test_chat_template_save_loading(self):
             self.assertFalse(Path(tmpdirname, "chat_template.jinja").is_file())
             reloaded_processor = self.processor_class.from_pretrained(tmpdirname)
             self.assertEqual(processor.chat_template, reloaded_processor.chat_template)
+            if hasattr(processor, "tokenizer"):
+                # When we don't use single-file chat template saving, processor and tokenizer chat templates
+                # should remain separate
+                self.assertIsNone(getattr(reloaded_processor.tokenizer, "chat_template", None))
 
         with tempfile.TemporaryDirectory() as tmpdirname:
             processor.save_pretrained(tmpdirname, save_raw_chat_template=True)
@@ -537,3 +541,7 @@ def test_chat_template_save_loading(self):
             self.assertFalse(Path(tmpdirname, "chat_template.json").is_file())
             reloaded_processor = self.processor_class.from_pretrained(tmpdirname)
             self.assertEqual(processor.chat_template, reloaded_processor.chat_template)
+            # When we save as single files, tokenizers and processors share a chat template, which means
+            # the reloaded tokenizer should get the chat template as well
+            if hasattr(processor, "tokenizer"):
+                self.assertEqual(reloaded_processor.chat_template, reloaded_processor.tokenizer.chat_template)

From 3878828df23f715b16cc5b899384389332a8cd4a Mon Sep 17 00:00:00 2001
From: Matt <rocketknight1@gmail.com>
Date: Fri, 1 Nov 2024 17:32:31 +0000
Subject: [PATCH 25/33] Improve testing for processor/tokenizer clashes

---
 tests/test_processing_common.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/tests/test_processing_common.py b/tests/test_processing_common.py
index ab19345df58..7b384b25f1b 100644
--- a/tests/test_processing_common.py
+++ b/tests/test_processing_common.py
@@ -526,6 +526,7 @@ def test_chat_template_save_loading(self):
         processor.chat_template = "test template"
         with tempfile.TemporaryDirectory() as tmpdirname:
             processor.save_pretrained(tmpdirname)
+            existing_tokenizer_template = getattr(processor.tokenizer, "chat_template", None)
             self.assertTrue(Path(tmpdirname, "chat_template.json").is_file())
             self.assertFalse(Path(tmpdirname, "chat_template.jinja").is_file())
             reloaded_processor = self.processor_class.from_pretrained(tmpdirname)
@@ -533,7 +534,7 @@ def test_chat_template_save_loading(self):
             if hasattr(processor, "tokenizer"):
                 # When we don't use single-file chat template saving, processor and tokenizer chat templates
                 # should remain separate
-                self.assertIsNone(getattr(reloaded_processor.tokenizer, "chat_template", None))
+                self.assertEqual(getattr(reloaded_processor.tokenizer, "chat_template", None), existing_tokenizer_template)
 
         with tempfile.TemporaryDirectory() as tmpdirname:
             processor.save_pretrained(tmpdirname, save_raw_chat_template=True)

From 74cd295a011683d56dbd5ada788e87328cd8cbc8 Mon Sep 17 00:00:00 2001
From: Matt <rocketknight1@gmail.com>
Date: Fri, 1 Nov 2024 17:37:36 +0000
Subject: [PATCH 26/33] Improve testing for processor/tokenizer clashes

---
 tests/test_processing_common.py | 12 +++++-------
 1 file changed, 5 insertions(+), 7 deletions(-)

diff --git a/tests/test_processing_common.py b/tests/test_processing_common.py
index 7b384b25f1b..4c4f6fac498 100644
--- a/tests/test_processing_common.py
+++ b/tests/test_processing_common.py
@@ -523,18 +523,17 @@ def test_prepare_and_validate_optional_call_args(self):
 
     def test_chat_template_save_loading(self):
         processor = self.get_processor()
+        existing_tokenizer_template = getattr(processor.tokenizer, "chat_template", None)
         processor.chat_template = "test template"
         with tempfile.TemporaryDirectory() as tmpdirname:
             processor.save_pretrained(tmpdirname)
-            existing_tokenizer_template = getattr(processor.tokenizer, "chat_template", None)
             self.assertTrue(Path(tmpdirname, "chat_template.json").is_file())
             self.assertFalse(Path(tmpdirname, "chat_template.jinja").is_file())
             reloaded_processor = self.processor_class.from_pretrained(tmpdirname)
             self.assertEqual(processor.chat_template, reloaded_processor.chat_template)
-            if hasattr(processor, "tokenizer"):
-                # When we don't use single-file chat template saving, processor and tokenizer chat templates
-                # should remain separate
-                self.assertEqual(getattr(reloaded_processor.tokenizer, "chat_template", None), existing_tokenizer_template)
+            # When we don't use single-file chat template saving, processor and tokenizer chat templates
+            # should remain separate
+            self.assertEqual(getattr(reloaded_processor.tokenizer, "chat_template", None), existing_tokenizer_template)
 
         with tempfile.TemporaryDirectory() as tmpdirname:
             processor.save_pretrained(tmpdirname, save_raw_chat_template=True)
@@ -544,5 +543,4 @@ def test_chat_template_save_loading(self):
             self.assertEqual(processor.chat_template, reloaded_processor.chat_template)
             # When we save as single files, tokenizers and processors share a chat template, which means
             # the reloaded tokenizer should get the chat template as well
-            if hasattr(processor, "tokenizer"):
-                self.assertEqual(reloaded_processor.chat_template, reloaded_processor.tokenizer.chat_template)
+            self.assertEqual(reloaded_processor.chat_template, reloaded_processor.tokenizer.chat_template)

From cf577e1aa8be79f19f94b4f624fe86ecced6bac1 Mon Sep 17 00:00:00 2001
From: Matt <rocketknight1@gmail.com>
Date: Tue, 5 Nov 2024 16:25:55 +0000
Subject: [PATCH 27/33] Extend saving test

---
 tests/test_tokenization_common.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/tests/test_tokenization_common.py b/tests/test_tokenization_common.py
index 34c924e4771..a34abe4284f 100644
--- a/tests/test_tokenization_common.py
+++ b/tests/test_tokenization_common.py
@@ -1557,6 +1557,7 @@ def test_chat_template_dict_saving(self):
                                 {"name": "template2", "template": "{{'b'}}"},
                             ],
                         )
+                        self.assertFalse(os.path.exists(os.path.join(tmp_dir_name, "chat_template.jinja")))
                         new_tokenizer = tokenizer.from_pretrained(tmp_dir_name)
                     # Assert that the serialized list is correctly reconstructed as a single dict
                     self.assertEqual(new_tokenizer.chat_template, tokenizer.chat_template)

From b97e76de5d57d81d94939aebc2278ac986a967c0 Mon Sep 17 00:00:00 2001
From: Matt <rocketknight1@gmail.com>
Date: Tue, 5 Nov 2024 16:36:06 +0000
Subject: [PATCH 28/33] Test file priority correctly

---
 tests/test_tokenization_common.py | 17 +++++++++++++++++
 1 file changed, 17 insertions(+)

diff --git a/tests/test_tokenization_common.py b/tests/test_tokenization_common.py
index a34abe4284f..ee22b900870 100644
--- a/tests/test_tokenization_common.py
+++ b/tests/test_tokenization_common.py
@@ -1132,6 +1132,7 @@ def test_chat_template(self):
                 # Check that no error raised
                 new_tokenizer.apply_chat_template(dummy_conversation, tokenize=True, return_dict=False)
 
+
     @require_jinja
     def test_chat_template_batched(self):
         dummy_template = "{% for message in messages %}{{message['role'] + message['content']}}{% endfor %}"
@@ -1562,6 +1563,22 @@ def test_chat_template_dict_saving(self):
                     # Assert that the serialized list is correctly reconstructed as a single dict
                     self.assertEqual(new_tokenizer.chat_template, tokenizer.chat_template)
 
+    @require_jinja
+    def test_chat_template_file_priority(self):
+        dummy_template1 = "a"
+        dummy_template2 = "b"
+        tokenizers = self.get_tokenizers()
+        for tokenizer in tokenizers:
+            with self.subTest(f"{tokenizer.__class__.__name__}"):
+                with tempfile.TemporaryDirectory() as tmp_dir_name:
+                    tokenizer.chat_template = dummy_template1
+                    tokenizer.save_pretrained(tmp_dir_name, save_raw_chat_template=False)
+                    with open(os.path.join(tmp_dir_name, "chat_template.jinja"), "w") as f:
+                        f.write(dummy_template2)
+                    new_tokenizer = tokenizer.from_pretrained(tmp_dir_name)
+                # Assert the file template clobbers any template in the config
+                self.assertEqual(new_tokenizer.chat_template, dummy_template2)
+
     def test_number_of_added_tokens(self):
         tokenizers = self.get_tokenizers(do_lower_case=False)
         for tokenizer in tokenizers:

From e5b8b76a35fc342de18a14138a8cdda12fefb081 Mon Sep 17 00:00:00 2001
From: Matt <rocketknight1@gmail.com>
Date: Tue, 5 Nov 2024 17:10:20 +0000
Subject: [PATCH 29/33] make fixup

---
 tests/test_tokenization_common.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/tests/test_tokenization_common.py b/tests/test_tokenization_common.py
index ee22b900870..ed09d800ad6 100644
--- a/tests/test_tokenization_common.py
+++ b/tests/test_tokenization_common.py
@@ -1132,7 +1132,6 @@ def test_chat_template(self):
                 # Check that no error raised
                 new_tokenizer.apply_chat_template(dummy_conversation, tokenize=True, return_dict=False)
 
-
     @require_jinja
     def test_chat_template_batched(self):
         dummy_template = "{% for message in messages %}{{message['role'] + message['content']}}{% endfor %}"
@@ -1573,7 +1572,7 @@ def test_chat_template_file_priority(self):
                 with tempfile.TemporaryDirectory() as tmp_dir_name:
                     tokenizer.chat_template = dummy_template1
                     tokenizer.save_pretrained(tmp_dir_name, save_raw_chat_template=False)
-                    with open(os.path.join(tmp_dir_name, "chat_template.jinja"), "w") as f:
+                    with Path(tmp_dir_name, "chat_template.jinja").open("w") as f:
                         f.write(dummy_template2)
                     new_tokenizer = tokenizer.from_pretrained(tmp_dir_name)
                 # Assert the file template clobbers any template in the config

From 5b40f040389d779b6493ac31a135d3c9926a6677 Mon Sep 17 00:00:00 2001
From: Matt <rocketknight1@gmail.com>
Date: Tue, 5 Nov 2024 18:13:34 +0000
Subject: [PATCH 30/33] Don't pop the chat template file before the slow
 tokenizer gets a look

---
 src/transformers/tokenization_utils_base.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/src/transformers/tokenization_utils_base.py b/src/transformers/tokenization_utils_base.py
index 881acbe5600..d22aeacb4e4 100644
--- a/src/transformers/tokenization_utils_base.py
+++ b/src/transformers/tokenization_utils_base.py
@@ -2063,7 +2063,7 @@ def _from_pretrained(
         from_slow = kwargs.get("from_slow", False)
         gguf_file = kwargs.get("gguf_file", None)
         has_tokenizer_file = resolved_vocab_files.get("tokenizer_file", None) is not None
-        chat_template_file = resolved_vocab_files.pop("chat_template_file", None)
+
 
         # If one passes a GGUF file path to `gguf_file` there is no need for this check as the tokenizer will be
         # loaded directly from the GGUF file.
@@ -2101,6 +2101,7 @@ def _from_pretrained(
             init_kwargs = init_configuration
 
         # If an independent chat template file exists, it takes priority over template entries in the tokenizer config
+        chat_template_file = resolved_vocab_files.pop("chat_template_file", None)
         if chat_template_file is not None:
             with open(chat_template_file) as chat_template_handle:
                 init_kwargs["chat_template"] = chat_template_handle.read()  # Clobbers any template in the config
@@ -2301,6 +2302,7 @@ def _from_pretrained(
                 "Special tokens have been added in the vocabulary, make sure the associated word embeddings are"
                 " fine-tuned or trained."
             )
+        breakpoint()
         return tokenizer
 
     @staticmethod

From 2159cdc7f70024c1c773ecab49cc7b9c0a436024 Mon Sep 17 00:00:00 2001
From: Matt <rocketknight1@gmail.com>
Date: Tue, 5 Nov 2024 18:13:56 +0000
Subject: [PATCH 31/33] Remove breakpoint

---
 src/transformers/tokenization_utils_base.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/src/transformers/tokenization_utils_base.py b/src/transformers/tokenization_utils_base.py
index d22aeacb4e4..35d9344276e 100644
--- a/src/transformers/tokenization_utils_base.py
+++ b/src/transformers/tokenization_utils_base.py
@@ -2302,7 +2302,6 @@ def _from_pretrained(
                 "Special tokens have been added in the vocabulary, make sure the associated word embeddings are"
                 " fine-tuned or trained."
             )
-        breakpoint()
         return tokenizer
 
     @staticmethod

From 5b88768b7400e25b24e06b047df7c0cdc24089f0 Mon Sep 17 00:00:00 2001
From: Matt <rocketknight1@gmail.com>
Date: Tue, 5 Nov 2024 18:14:11 +0000
Subject: [PATCH 32/33] make fixup

---
 src/transformers/tokenization_utils_base.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/src/transformers/tokenization_utils_base.py b/src/transformers/tokenization_utils_base.py
index 35d9344276e..2346f3380c4 100644
--- a/src/transformers/tokenization_utils_base.py
+++ b/src/transformers/tokenization_utils_base.py
@@ -2064,7 +2064,6 @@ def _from_pretrained(
         gguf_file = kwargs.get("gguf_file", None)
         has_tokenizer_file = resolved_vocab_files.get("tokenizer_file", None) is not None
 
-
         # If one passes a GGUF file path to `gguf_file` there is no need for this check as the tokenizer will be
         # loaded directly from the GGUF file.
         if (from_slow or not has_tokenizer_file) and cls.slow_tokenizer_class is not None and not gguf_file:

From 1187047d3f8be8fef9a0d37a6a4b05aded64c5d4 Mon Sep 17 00:00:00 2001
From: Matt <rocketknight1@gmail.com>
Date: Tue, 5 Nov 2024 18:31:45 +0000
Subject: [PATCH 33/33] Fix error

---
 src/transformers/processing_utils.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/transformers/processing_utils.py b/src/transformers/processing_utils.py
index bc8658064b1..c2646300367 100644
--- a/src/transformers/processing_utils.py
+++ b/src/transformers/processing_utils.py
@@ -721,7 +721,7 @@ def get_processor_dict(
 
         if "chat_template" in processor_dict and processor_dict["chat_template"] is not None:
             logger.warning_once(
-                "Chat templates should be in a 'processor_chat_template.jinja' file but found key='chat_template' "
+                "Chat templates should be in a 'chat_template.jinja' file but found key='chat_template' "
                 "in the processor's config. Make sure to move your template to its own file."
             )