From 6858f23da6d5f20ab68bcbbcbb42909c352e2ea0 Mon Sep 17 00:00:00 2001 From: sroy745 <142070531+sroy745@users.noreply.github.com> Date: Sun, 6 Oct 2024 20:52:42 -0700 Subject: [PATCH] [BugFix][Core] Fix BlockManagerV2 when Encoder Input is None (#9103) --- vllm/core/block/block_table.py | 2 -- vllm/core/block_manager_v2.py | 4 +++- vllm/engine/arg_utils.py | 5 ----- 3 files changed, 3 insertions(+), 8 deletions(-) diff --git a/vllm/core/block/block_table.py b/vllm/core/block/block_table.py index a9f4bd871dfda..d10cb29ef4a7c 100644 --- a/vllm/core/block/block_table.py +++ b/vllm/core/block/block_table.py @@ -220,7 +220,6 @@ def free(self) -> None: occupied by each block. After freeing all the blocks, the `_blocks` list is set to `None`. """ - assert self._is_allocated for block in self.blocks: self._allocator.free(block) self._blocks.reset() @@ -239,7 +238,6 @@ def physical_block_ids(self) -> List[int]: List[int]: A list of physical block indices for the blocks in the BlockTable. """ - assert self._is_allocated return self._blocks.ids() def get_unseen_token_ids(self, sequence_token_ids: List[int]) -> List[int]: diff --git a/vllm/core/block_manager_v2.py b/vllm/core/block_manager_v2.py index 0fad5fa99daf8..c7ee6609306d7 100644 --- a/vllm/core/block_manager_v2.py +++ b/vllm/core/block_manager_v2.py @@ -151,7 +151,9 @@ def _allocate_sequence(self, seq: Sequence) -> BlockTable: block_allocator=self.block_allocator, max_block_sliding_window=self.max_block_sliding_window, ) - block_table.allocate(seq.get_token_ids()) + if seq.get_token_ids(): + # Add blocks to the block table only if the sequence is non empty. + block_table.allocate(seq.get_token_ids()) return block_table diff --git a/vllm/engine/arg_utils.py b/vllm/engine/arg_utils.py index 1623ebb3aa74c..cae95d20ca23d 100644 --- a/vllm/engine/arg_utils.py +++ b/vllm/engine/arg_utils.py @@ -903,11 +903,6 @@ def create_engine_config(self) -> EngineConfig: "--enable-prefix-caching is currently not " "supported for multimodal models and has been disabled.") self.enable_prefix_caching = False - if model_config.is_encoder_decoder_model: - logger.warning( - "Block Manager v2 does not support encoder-decoder models" - " currently. Using Block Manager v1 as fallback.") - self.use_v2_block_manager = False cache_config = CacheConfig( block_size=self.block_size if self.device != "neuron" else