From 9b9cef3145381721fa950c89718fe71849ac2a55 Mon Sep 17 00:00:00 2001
From: Joe Runde <Joseph.Runde@ibm.com>
Date: Tue, 10 Dec 2024 09:38:23 -0700
Subject: [PATCH] [Bugfix] Backport request id validation to v0 (#11036)

Signed-off-by: Joe Runde <Joseph.Runde@ibm.com>
---
 vllm/engine/multiprocessing/client.py | 4 ++++
 vllm/v1/engine/async_llm.py           | 2 +-
 2 files changed, 5 insertions(+), 1 deletion(-)

diff --git a/vllm/engine/multiprocessing/client.py b/vllm/engine/multiprocessing/client.py
index 32bd83305bb8f..a729023bc00bb 100644
--- a/vllm/engine/multiprocessing/client.py
+++ b/vllm/engine/multiprocessing/client.py
@@ -576,6 +576,10 @@ async def _process_request(
         if self._errored_with is not None:
             raise ENGINE_DEAD_ERROR(self._errored_with)
 
+        # Ensure the request id is unique among running requests
+        if request_id in self.output_queues:
+            raise ValueError(f"Request {request_id} already exists")
+
         # Constructing guided decoding logits processors is expensive, so we do
         # it here to avoid contending with cpu resources and the GIL on the
         # backend process.
diff --git a/vllm/v1/engine/async_llm.py b/vllm/v1/engine/async_llm.py
index 26fd650aee4b7..24cafeff63d1e 100644
--- a/vllm/v1/engine/async_llm.py
+++ b/vllm/v1/engine/async_llm.py
@@ -152,7 +152,7 @@ async def add_request(
         """Add new request to the AsyncLLM."""
 
         if self.detokenizer.is_request_active(request_id):
-            raise KeyError(f"Request {request_id} already exists.")
+            raise ValueError(f"Request {request_id} already exists.")
 
         # 1) Create a new AsyncStream for the request.
         stream = self._add_request_to_streams(request_id)