From 4be63091c26eb419cde9c37c257abcc7dd6c5722 Mon Sep 17 00:00:00 2001 From: Jianwen Zhang Date: Sun, 25 Aug 2024 09:50:16 +0000 Subject: [PATCH] clean up model_runner.py --- vllm/worker/model_runner.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/vllm/worker/model_runner.py b/vllm/worker/model_runner.py index 3f8076354092a..4b29aaafffe46 100644 --- a/vllm/worker/model_runner.py +++ b/vllm/worker/model_runner.py @@ -1218,8 +1218,7 @@ def capture_model(self, kv_caches: List[List[torch.Tensor]]) -> None: # Prepare dummy inputs. These will be reused for all batch sizes. max_batch_size = max(_BATCH_SIZES_TO_CAPTURE) input_tokens = torch.zeros(max_batch_size, dtype=torch.long).cuda() - input_positions = torch.zeros(max_batch_size, dtype=torch.long).cuda() - + input_positions = torch.zeros(max_batch_size, dtype=torch.long).cuda() # Prepare dummy previous_hidden_states only if needed by the model. # This is used by draft models such as EAGLE. previous_hidden_states = None