Skip to content

Commit

Permalink
example
Browse files Browse the repository at this point in the history
  • Loading branch information
robertgshaw2-neuralmagic committed Aug 10, 2024
1 parent 3015a4b commit 8957faf
Show file tree
Hide file tree
Showing 3 changed files with 33 additions and 7 deletions.
2 changes: 1 addition & 1 deletion examples/openai_completion_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

# Modify OpenAI's API key and API base to use vLLM's API server.
openai_api_key = "EMPTY"
openai_api_base = "http://localhost:8001/v1"
openai_api_base = "http://localhost:8000/v1"

client = OpenAI(
# defaults to os.environ.get("OPENAI_API_KEY")
Expand Down
37 changes: 31 additions & 6 deletions vllm/engine/async_llm_engine.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,8 @@
import asyncio
import time
import zmq
import zmq.asyncio
import pickle
from functools import partial
from typing import (AsyncGenerator, Callable, Dict, Iterable, List, Mapping,
Optional, Set, Tuple, Type, Union)
Expand Down Expand Up @@ -252,9 +255,24 @@ def has_new_requests(self):
class _AsyncLLMEngine(LLMEngine):
"""Extension of LLMEngine to add async methods."""

async def do_log_stats_async(self, scheduler_outputs, model_output):
self.do_log_stats(scheduler_outputs, model_output)
def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
self.logger_ctx = zmq.asyncio.Context()

self.to_logger = self.logger_ctx.socket(zmq.constants.PUSH)
self.to_logger.bind("inproc://doesitwork")

self.from_engine = self.logger_ctx.socket(zmq.constants.PULL)
self.from_engine.connect("inproc://doesitwork")

self.logging_task = asyncio.create_task(self.run_logging_loop())


async def run_logging_loop(self):

while True:
data = await self.from_engine.recv_pyobj()
self.do_log_stats(**data)

async def step_async(
self, virtual_engine: int
Expand Down Expand Up @@ -294,15 +312,22 @@ async def step_async(
scheduler_outputs.ignored_seq_groups, seq_group_metadata_list)

# Log stats.
log_task = asyncio.create_task(self.do_log_stats_async(
scheduler_outputs, output))
_running_tasks.add(log_task)
log_task.add_done_callback(_running_tasks.discard)
# log_task = asyncio.create_task(self.do_log_stats_async(
# scheduler_outputs, output))
# _running_tasks.add(log_task)
# log_task.add_done_callback(_running_tasks.discard)
# self.do_log_stats(scheduler_outputs, output)
await self.to_logger.send_pyobj(
{
"scheduler_outputs": scheduler_outputs,
"model_output": output
}
)

# Tracing
self.do_tracing(scheduler_outputs)


return request_outputs

async def stop_remote_worker_execution_loop_async(self) -> None:
Expand Down
1 change: 1 addition & 0 deletions vllm/engine/llm_engine.py
Original file line number Diff line number Diff line change
Expand Up @@ -363,6 +363,7 @@ def __init__(
),
))


def _initialize_kv_caches(self) -> None:
"""Initialize the KV cache in the worker(s).
Expand Down

0 comments on commit 8957faf

Please sign in to comment.