diff --git a/examples/offline_inference_vision_language.py b/examples/offline_inference_vision_language.py index a2e35ac365a2c..17ca7ca37398d 100644 --- a/examples/offline_inference_vision_language.py +++ b/examples/offline_inference_vision_language.py @@ -6,6 +6,7 @@ on HuggingFace model repository. """ import time + from transformers import AutoTokenizer from vllm import LLM, SamplingParams @@ -24,8 +25,7 @@ def run_llava(question: str, modality: str): prompt = f"USER: \n{question}\nASSISTANT:" - llm = LLM(model="llava-hf/llava-1.5-7b-hf", - max_model_len=4096) + llm = LLM(model="llava-hf/llava-1.5-7b-hf", max_model_len=4096) stop_token_ids = None return llm, prompt, stop_token_ids diff --git a/vllm/inputs/data.py b/vllm/inputs/data.py index ce5895ca4bb49..266bd32d86946 100644 --- a/vllm/inputs/data.py +++ b/vllm/inputs/data.py @@ -7,7 +7,8 @@ from typing_extensions import NotRequired, TypedDict, TypeVar, assert_never if TYPE_CHECKING: - from vllm.multimodal import MultiModalDataDict, MultiModalKwargs, MultiModalPlaceholderDict + from vllm.multimodal import (MultiModalDataDict, MultiModalKwargs, + MultiModalPlaceholderDict) from vllm.multimodal.inputs import MultiModalInputsV2 @@ -265,14 +266,14 @@ def multi_modal_data(self) -> "MultiModalDataDict": assert_never(inputs) @cached_property - def multi_modal_inputs(self) -> "MultiModalKwargs": + def multi_modal_inputs(self) -> Union[Dict, "MultiModalKwargs"]: inputs = self.inputs if inputs["type"] == "token": return inputs.get("multi_modal_inputs", {}) if inputs["type"] == "multimodal": - return inputs.get("mm_inputs", {}) + return inputs.get("mm_kwargs", {}) assert_never(inputs) diff --git a/vllm/v1/engine/__init__.py b/vllm/v1/engine/__init__.py index 02b35cc273257..083141d811c53 100644 --- a/vllm/v1/engine/__init__.py +++ b/vllm/v1/engine/__init__.py @@ -1,12 +1,11 @@ import enum from dataclasses import dataclass -from typing import Any, Dict, List, Optional, Union +from typing import List, Optional, Union import msgspec from vllm.lora.request import LoRARequest -from vllm.multimodal import (MultiModalDataDict, MultiModalKwargs, - MultiModalPlaceholderDict) +from vllm.multimodal import MultiModalKwargs, MultiModalPlaceholderDict from vllm.sampling_params import RequestOutputKind, SamplingParams diff --git a/vllm/v1/engine/core.py b/vllm/v1/engine/core.py index b3be92470ed6e..31773b3eed5f7 100644 --- a/vllm/v1/engine/core.py +++ b/vllm/v1/engine/core.py @@ -14,7 +14,6 @@ from vllm.config import CacheConfig, VllmConfig from vllm.logger import init_logger from vllm.usage.usage_lib import UsageContext -from vllm.multimodal import MultiModalDataDict, MultiModalKwargs from vllm.v1.core.scheduler import Scheduler from vllm.v1.engine import (EngineCoreOutput, EngineCoreOutputs, EngineCoreRequest, EngineCoreRequestType)