Skip to content

Commit

Permalink
[Frontend] Create ErrorResponse instead of raising exceptions in run_…
Browse files Browse the repository at this point in the history
  • Loading branch information
pooyadavoodi authored Sep 11, 2024
1 parent 6a512a0 commit cea95df
Show file tree
Hide file tree
Showing 2 changed files with 31 additions and 4 deletions.
4 changes: 3 additions & 1 deletion tests/entrypoints/openai/test_run_batch.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,9 @@
INPUT_BATCH = """{"custom_id": "request-1", "method": "POST", "url": "/v1/chat/completions", "body": {"model": "NousResearch/Meta-Llama-3-8B-Instruct", "messages": [{"role": "system", "content": "You are a helpful assistant."},{"role": "user", "content": "Hello world!"}],"max_tokens": 1000}}
{"custom_id": "request-2", "method": "POST", "url": "/v1/chat/completions", "body": {"model": "NousResearch/Meta-Llama-3-8B-Instruct", "messages": [{"role": "system", "content": "You are an unhelpful assistant."},{"role": "user", "content": "Hello world!"}],"max_tokens": 1000}}
{"custom_id": "request-3", "method": "POST", "url": "/v1/chat/completions", "body": {"model": "NonExistModel", "messages": [{"role": "system", "content": "You are an unhelpful assistant."},{"role": "user", "content": "Hello world!"}],"max_tokens": 1000}}"""
{"custom_id": "request-3", "method": "POST", "url": "/v1/chat/completions", "body": {"model": "NonExistModel", "messages": [{"role": "system", "content": "You are an unhelpful assistant."},{"role": "user", "content": "Hello world!"}],"max_tokens": 1000}}
{"custom_id": "request-4", "method": "POST", "url": "/bad_url", "body": {"model": "NousResearch/Meta-Llama-3-8B-Instruct", "messages": [{"role": "system", "content": "You are an unhelpful assistant."},{"role": "user", "content": "Hello world!"}],"max_tokens": 1000}}
{"custom_id": "request-5", "method": "POST", "url": "/v1/chat/completions", "body": {"stream": "True", "model": "NousResearch/Meta-Llama-3-8B-Instruct", "messages": [{"role": "system", "content": "You are an unhelpful assistant."},{"role": "user", "content": "Hello world!"}],"max_tokens": 1000}}"""

INVALID_INPUT_BATCH = """{"invalid_field": "request-1", "method": "POST", "url": "/v1/chat/completions", "body": {"model": "NousResearch/Meta-Llama-3-8B-Instruct", "messages": [{"role": "system", "content": "You are a helpful assistant."},{"role": "user", "content": "Hello world!"}],"max_tokens": 1000}}
{"custom_id": "request-2", "method": "POST", "url": "/v1/chat/completions", "body": {"model": "NousResearch/Meta-Llama-3-8B-Instruct", "messages": [{"role": "system", "content": "You are an unhelpful assistant."},{"role": "user", "content": "Hello world!"}],"max_tokens": 1000}}"""
Expand Down
31 changes: 28 additions & 3 deletions vllm/entrypoints/openai/run_batch.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import asyncio
from http import HTTPStatus
from io import StringIO
from typing import Awaitable, Callable, List, Optional

Expand Down Expand Up @@ -135,6 +136,25 @@ async def write_file(path_or_url: str, data: str) -> None:
f.write(data)


def make_error_request_output(request: BatchRequestInput,
error_msg: str) -> BatchRequestOutput:
batch_output = BatchRequestOutput(
id=f"vllm-{random_uuid()}",
custom_id=request.custom_id,
response=BatchResponseData(
status_code=HTTPStatus.BAD_REQUEST,
request_id=f"vllm-batch-{random_uuid()}",
),
error=error_msg,
)
return batch_output


async def make_async_error_request_output(
request: BatchRequestInput, error_msg: str) -> BatchRequestOutput:
return make_error_request_output(request, error_msg)


async def run_request(serving_engine_func: Callable,
request: BatchRequestInput,
tracker: BatchProgressTracker) -> BatchRequestOutput:
Expand All @@ -158,7 +178,8 @@ async def run_request(serving_engine_func: Callable,
error=response,
)
else:
raise ValueError("Request must not be sent in stream mode")
batch_output = make_error_request_output(
request, error_msg="Request must not be sent in stream mode")

tracker.completed()
return batch_output
Expand Down Expand Up @@ -225,8 +246,12 @@ async def main(args):
tracker))
tracker.submitted()
else:
raise ValueError("Only /v1/chat/completions and /v1/embeddings are"
"supported in the batch endpoint.")
response_futures.append(
make_async_error_request_output(
request,
error_msg="Only /v1/chat/completions and "
"/v1/embeddings are supported in the batch endpoint.",
))

with tracker.pbar():
responses = await asyncio.gather(*response_futures)
Expand Down

0 comments on commit cea95df

Please sign in to comment.