diff --git a/serve/mlc_serve/engine/base.py b/serve/mlc_serve/engine/base.py index f2cc6e2326..976d9de070 100644 --- a/serve/mlc_serve/engine/base.py +++ b/serve/mlc_serve/engine/base.py @@ -4,12 +4,11 @@ from enum import Enum from abc import ABC, abstractmethod -from typing import List, Callable, Any, Optional, Dict, Tuple +from typing import List, Callable, Any, Optional, Dict import inspect import numpy as np from .sampling_params import SamplingParams, SamplingType -from ..api.protocol import LogprobsContent RequestId = str @@ -174,7 +173,7 @@ class SequenceOutput: finish_reason: Optional[FinishReason] = None # Number of generated tokens so far num_generated_tokens: int = 0 - logprob_info: Optional[LogprobsContent] = None + logprob_info: Optional[RawLogprobsInfo] = None @property def is_finished(self) -> bool: diff --git a/serve/mlc_serve/model/paged_cache_model.py b/serve/mlc_serve/model/paged_cache_model.py index 9fc03a21fd..2443cad54a 100644 --- a/serve/mlc_serve/model/paged_cache_model.py +++ b/serve/mlc_serve/model/paged_cache_model.py @@ -15,7 +15,6 @@ from .paged_cache_manager import KVCache, CacheManager from .tokenizer import HfTokenizerModule, ConversationTemplate, Tokenizer from ..engine import ( - RequestId, SamplingType, MLCServeEngineConfig, SamplingParams, @@ -29,7 +28,6 @@ DecodeRequest, PrefillRequest, TextGenerationResult, - LOGPROBS_TYPE ) from ..engine.model_module import ModelModule