use RequestType

octoml · Feb 2, 2024 · e58b7d3 · e58b7d3
1 parent ec7b61d
commit e58b7d3
Show file tree

Hide file tree

Showing 3 changed files with 9 additions and 8 deletions.
diff --git a/serve/mlc_serve/engine/engine_common.py b/serve/mlc_serve/engine/engine_common.py
@@ -27,6 +27,7 @@
     ConversationTemplate,
     KVCacheManager,
     ModelModule,
+    RequestType,
     TextGenerator,
     Tokenizer as TokenizerP,
 )
@@ -228,10 +229,8 @@ def update_sequence(
 
 def get_requests_to_process(
     current_states: list[RequestState], cache_manager: KVCacheManager
-) -> Tuple[
-    list[Union[PrefillRequest, DecodeRequest, EvalMultiQueryRequest]], bool, int
-]:
-    requests: list[Union[PrefillRequest, DecodeRequest, EvalMultiQueryRequest]] = []
+) -> Tuple[list[RequestType], bool, int]:
+    requests: list[RequestType] = []
     # TODO: consider having hybrid batch if the underlying attention kernel supports
     # mixing prefill and decode.
     is_prompt_batch = any(not state.is_prefilled for state in current_states)

diff --git a/serve/mlc_serve/model/dummy_model.py b/serve/mlc_serve/model/dummy_model.py
@@ -11,6 +11,7 @@
     DecodeRequest,
     KVCache,
     PrefillRequest,
+    RequestType,
     SequenceId,
     TextGenerationResult,
 )
@@ -97,7 +98,7 @@ def get_max_new_tokens(self) -> int:
 class DummyTextGenerator:
     def generate(
         self,
-        requests: list[Union[PrefillRequest, DecodeRequest]],
+        requests: list[RequestType],
         kv_cache: DummyCache,
     ) -> list[TextGenerationResult]:
         result = []

diff --git a/serve/mlc_serve/model/paged_cache_model.py b/serve/mlc_serve/model/paged_cache_model.py
@@ -1,6 +1,6 @@
 from pathlib import Path
 import structlog
-from typing import List, Union
+from typing import List
 
 from .base import get_model_artifact_config
 from .paged_cache_manager import CacheManager
@@ -13,6 +13,7 @@
     ModelModule,
     PrefillRequest,
     EvalMultiQueryRequest,
+    RequestType,
     TextGenerationResult,
     TextGenerator,
 )
@@ -26,9 +27,9 @@ def __init__(self, model: TextGenerator):
 
     def generate(
         self,
-        requests: list[Union[PrefillRequest, DecodeRequest, EvalMultiQueryRequest]],
+        requests: List[RequestType],
         kv_cache,
-    ) -> list[TextGenerationResult]:
+    ) -> List[TextGenerationResult]:
         prefill_requests = []
         decode_requests = []
         multi_query_decode_requests = []