From ef33315eeb4fcf37668206aa14c40a49d67d9fe5 Mon Sep 17 00:00:00 2001 From: "Jack (Xun) Cai" Date: Tue, 1 Oct 2024 21:01:10 -0400 Subject: [PATCH] #0: shortened flash decode tests to avoid potential timeout in fd ci (#13358) --- .../misc/test_scaled_dot_product_attention_decode.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/tt_eager/python_api_testing/unit_testing/misc/test_scaled_dot_product_attention_decode.py b/tests/tt_eager/python_api_testing/unit_testing/misc/test_scaled_dot_product_attention_decode.py index ccbd517cb36..068ff7fe282 100644 --- a/tests/tt_eager/python_api_testing/unit_testing/misc/test_scaled_dot_product_attention_decode.py +++ b/tests/tt_eager/python_api_testing/unit_testing/misc/test_scaled_dot_product_attention_decode.py @@ -455,10 +455,10 @@ def run_test_sdpa_decode_single_iter( # [16, 8, 1, 32768, 128, (8, 6), False, False], # Llama2-70B [8, 8, 1, 32768, 128, (8, 6), True, False], # Llama2-70B # [4, 8, 1, 32768, 128, (8, 6), True, False], # Llama2-70B - [32, 8, 1, 32768, 128, (8, 8), True, True], # Mixtral8x7b + [32, 8, 1, 8192, 128, (8, 8), True, True], # Mixtral8x7b # [32, 8, 1, 32768, 128, (8, 6), True, False], # Llama2-70B # [4, 32, 8, 32768, 128, (8, 8), True, False], # llama 3.1 8b - [4, 32, 8, 32768, 128, (8, 8), True, True], # llama 3.1 8b + [4, 32, 8, 8192, 128, (8, 8), True, True], # llama 3.1 8b [32, 32, 8, 8192, 128, (8, 8), True, False], # llama 3.1 8b # [4, 16, 4, 32768, 128, (8, 8), False, False], # llama 3.1 8b ), @@ -721,7 +721,7 @@ def to_contiguous_cache(paged_cache, batch, num_kv, max_num_blocks_per_seq, bloc "b, nh, nkv, s, d, grid_size, cur_pos_tensor", ( [32, 8, 1, 32768, 128, (8, 6), True], # Llama2-70B - [4, 32, 8, 32768, 128, (8, 8), True], # llama 3.1 8b + [4, 32, 8, 4096, 128, (8, 8), True], # llama 3.1 8b # [4, 16, 4, 32768, 128, (8, 8), True], # [32, 32, 8, 4096, 128, (8, 8), True], # llama 3.1 8b [8, 16, 4, 4096, 128, (8, 2), True], # llama 3.1 8b N300