Skip to content

Commit

Permalink
minor
Browse files Browse the repository at this point in the history
Signed-off-by: Woosuk Kwon <[email protected]>
  • Loading branch information
WoosukKwon committed Dec 10, 2024
1 parent e2baff8 commit 426d3f6
Showing 1 changed file with 1 addition and 1 deletion.
2 changes: 1 addition & 1 deletion vllm/v1/worker/gpu_model_runner.py
Original file line number Diff line number Diff line change
Expand Up @@ -453,6 +453,7 @@ def execute_model(
else:
# Eager mode.
num_input_tokens = num_scheduled_tokens
attn_metadata.num_input_tokens = num_input_tokens

if self.is_multimodal_model:
# NOTE(woosuk): To unify token ids and soft tokens (vision
Expand All @@ -478,7 +479,6 @@ def execute_model(

# Run the decoder.
# Use persistent buffers for CUDA graphs.
attn_metadata.num_input_tokens = num_input_tokens
with set_forward_context(attn_metadata, self.vllm_config):
hidden_states = self.model(
input_ids=input_ids,
Expand Down

0 comments on commit 426d3f6

Please sign in to comment.