From ae7323227f274aa3d4f983b467799ef797e51f15 Mon Sep 17 00:00:00 2001 From: Dong Hande <45357817+DongHande@users.noreply.github.com> Date: Mon, 9 Oct 2023 07:12:03 -0500 Subject: [PATCH] def kv_seq_len --- optimum/bettertransformer/models/attention.py | 1 + 1 file changed, 1 insertion(+) diff --git a/optimum/bettertransformer/models/attention.py b/optimum/bettertransformer/models/attention.py index 69ff9ad8592..7089a55c627 100644 --- a/optimum/bettertransformer/models/attention.py +++ b/optimum/bettertransformer/models/attention.py @@ -695,6 +695,7 @@ def gpt_bigcode_wrapped_scaled_dot_product( # MHA models: (batch_size, num_heads, query_length, head_dim) query_shape = query.shape batch_size = query_shape[0] + kv_seq_len = key.shape[-2] if self.multi_query: query_length = query_shape[1]