From 11fcf0e0661365f24bfff9591434a0cec640df6c Mon Sep 17 00:00:00 2001
From: Noam Gat <noamgat@gmail.com>
Date: Fri, 22 Nov 2024 09:59:47 +0200
Subject: [PATCH] Remove token-adding chat embedding params (#10551)

Signed-off-by: Noam Gat <noamgat@gmail.com>
---
 vllm/entrypoints/openai/protocol.py          | 16 ----------------
 vllm/entrypoints/openai/serving_embedding.py |  6 ++++--
 2 files changed, 4 insertions(+), 18 deletions(-)

diff --git a/vllm/entrypoints/openai/protocol.py b/vllm/entrypoints/openai/protocol.py
index a82212677f63a..9db5951e5fe5b 100644
--- a/vllm/entrypoints/openai/protocol.py
+++ b/vllm/entrypoints/openai/protocol.py
@@ -760,22 +760,6 @@ class EmbeddingChatRequest(OpenAIBaseModel):
     # doc: end-chat-embedding-pooling-params
 
     # doc: begin-chat-embedding-extra-params
-    add_generation_prompt: bool = Field(
-        default=True,
-        description=
-        ("If true, the generation prompt will be added to the chat template. "
-         "This is a parameter used by chat template in tokenizer config of the "
-         "model."),
-    )
-    continue_final_message: bool = Field(
-        default=False,
-        description=
-        ("If this is set, the chat will be formatted so that the final "
-         "message in the chat is open-ended, without any EOS tokens. The "
-         "model will continue this message rather than starting a new one. "
-         "This allows you to \"prefill\" part of the model's response for it. "
-         "Cannot be used at the same time as `add_generation_prompt`."),
-    )
     add_special_tokens: bool = Field(
         default=False,
         description=(
diff --git a/vllm/entrypoints/openai/serving_embedding.py b/vllm/entrypoints/openai/serving_embedding.py
index 74ad7389784fc..c84a7d2d8e13e 100644
--- a/vllm/entrypoints/openai/serving_embedding.py
+++ b/vllm/entrypoints/openai/serving_embedding.py
@@ -148,8 +148,10 @@ async def create_embedding(
                     chat_template=request.chat_template or self.chat_template,
                     chat_template_content_format=self.
                     chat_template_content_format,
-                    add_generation_prompt=request.add_generation_prompt,
-                    continue_final_message=request.continue_final_message,
+                    # In embedding requests, we are not generating tokens,
+                    # so there is no need to append extra tokens to the input
+                    add_generation_prompt=False,
+                    continue_final_message=False,
                     truncate_prompt_tokens=truncate_prompt_tokens,
                     add_special_tokens=request.add_special_tokens,
                 )