[Bugfix][Frontend] Disable embedding API for chat models (vllm-projec…

…t#7504) Co-authored-by: jack <jack@alex>
neuralmagic · Aug 14, 2024 · 67d115d · 67d115d
1 parent d3d9cb6
commit 67d115d
Showing 1 changed file with 4 additions and 1 deletion.
diff --git a/vllm/entrypoints/openai/serving_embedding.py b/vllm/entrypoints/openai/serving_embedding.py
@@ -71,7 +71,7 @@ def __init__(
                          lora_modules=None,
                          prompt_adapters=None,
                          request_logger=request_logger)
-        self._check_embedding_mode(model_config.embedding_mode)
+        self._enabled = self._check_embedding_mode(model_config.embedding_mode)
 
     async def create_embedding(
         self,
@@ -83,6 +83,8 @@ async def create_embedding(
         See https://platform.openai.com/docs/api-reference/embeddings/create
         for the API specification. This API mimics the OpenAI Embedding API.
         """
+        if not self._enabled:
+            return self.create_error_response("Embedding API disabled")
         error_check_ret = await self._check_model(request)
         if error_check_ret is not None:
             return error_check_ret
@@ -179,3 +181,4 @@ def _check_embedding_mode(self, embedding_mode: bool):
                 "embedding_mode is False. Embedding API will not work.")
         else:
             logger.info("Activating the server engine with embedding enabled.")
+        return embedding_mode