From 746709642c81aa22926765aef67e086a15aef076 Mon Sep 17 00:00:00 2001 From: Rui Qiao <161574667+ruisearch42@users.noreply.github.com> Date: Wed, 7 Aug 2024 17:06:01 -0700 Subject: [PATCH] [Misc] Fix typos in scheduler.py (#7285) Signed-off-by: Rui Qiao --- vllm/core/scheduler.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/vllm/core/scheduler.py b/vllm/core/scheduler.py index f60463107be44..950abfccba4c3 100644 --- a/vllm/core/scheduler.py +++ b/vllm/core/scheduler.py @@ -678,7 +678,7 @@ def _schedule_prefills( all tokens. Returns: - SchedulerSwappedInOutputs. + SchedulerPrefillOutputs. """ ignored_seq_groups: List[SequenceGroup] = [] seq_groups: List[SequenceGroup] = [] @@ -851,7 +851,7 @@ def _schedule_default(self) -> SchedulerOutputs: preempted=preempted, ) - def _schedule_chunked_prefill(self): + def _schedule_chunked_prefill(self) -> SchedulerOutputs: """Schedule queued requests. Chunked prefill allows to chunk prefill requests, batch them together @@ -862,7 +862,7 @@ def _schedule_chunked_prefill(self): The policy can sustain the high GPU utilization because it can put prefill and decodes requests to the same batch, while it improves - inter token latency because decodes requests don't need to blocked + inter token latency because decodes requests don't need to be blocked by prefill requests. """ budget = SchedulingBudget(