diff --git a/tests/sweep_framework/sweep_utils/reduction_common.py b/tests/sweep_framework/sweep_utils/reduction_common.py new file mode 100644 index 00000000000..e110a9ec8fc --- /dev/null +++ b/tests/sweep_framework/sweep_utils/reduction_common.py @@ -0,0 +1,57 @@ +# SPDX-FileCopyrightText: © 2024 Tenstorrent Inc. + +# SPDX-License-Identifier: Apache-2.0 + +from typing import Optional, Tuple +from functools import partial + +import torch +import random +import ttnn +from tests.sweep_framework.sweep_utils.utils import gen_shapes +from tests.tt_eager.python_api_testing.sweep_tests.generation_funcs import gen_func_with_cast_tt + +from tests.ttnn.utils_for_testing import check_with_pcc, start_measuring_time, stop_measuring_time +from models.utility_functions import torch_random + +# Override the default timeout in seconds for hang detection. +TIMEOUT = 30 + + +def run_sum( + input_shape, + dim, + keepdim, + input_a_dtype, + input_a_layout, + input_a_memory_config, + output_memory_config, + device, +) -> list: + data_seed = random.randint(0, 20000000) + torch.manual_seed(data_seed) + + torch_input_tensor_a = gen_func_with_cast_tt( + partial(torch_random, low=-100, high=100, dtype=torch.float32), input_a_dtype + )(input_shape) + + dim = dim % len(input_shape) + + torch_output_tensor = torch.sum(torch_input_tensor_a, dim=dim, keepdim=keepdim) + + input_tensor_a = ttnn.from_torch( + torch_input_tensor_a, + dtype=input_a_dtype, + layout=input_a_layout, + device=device, + memory_config=input_a_memory_config, + ) + + start_time = start_measuring_time() + result = ttnn.sum(input_tensor_a, dim=dim, memory_config=output_memory_config) + output_tensor = ttnn.to_torch(result) + e2e_perf = stop_measuring_time(start_time) + + pcc = check_with_pcc(torch_output_tensor, output_tensor, 0.999) + # print(f"input_shape {input_shape} pcc {pcc}") + return [pcc, e2e_perf] diff --git a/tests/sweep_framework/sweeps/reduction/argmax/argmax.py b/tests/sweep_framework/sweeps/reduction/argmax/argmax.py index 913d4178865..573f5d5733f 100644 --- a/tests/sweep_framework/sweeps/reduction/argmax/argmax.py +++ b/tests/sweep_framework/sweeps/reduction/argmax/argmax.py @@ -27,10 +27,38 @@ parameters = { "nightly": { "input_shape": gen_shapes([1, 1, 1, 1], [2, 6, 128, 128], [1, 1, 1, 1], 32) + + gen_shapes([1, 1, 1, 1], [2, 9, 167, 128], [1, 1, 1, 1], 32) + + gen_shapes([1, 1, 1, 1], [2, 6, 69, 129], [1, 1, 1, 1], 15) + gen_shapes([1, 1, 1], [6, 128, 128], [1, 1, 1], 32) - + gen_shapes([1, 1], [128, 128], [1, 1], 32), - "dim": [0, 1, 2, 3, None], - "input_a_dtype": [ttnn.bfloat16, ttnn.bfloat8_b], + + gen_shapes([1, 1, 1], [6, 128, 128], [1, 2, 3], 3) + + gen_shapes([1, 1, 1], [6, 127, 257], [1, 1, 1], 16) + + gen_shapes([1, 1], [128, 128], [1, 1], 32) + + gen_shapes([1, 1], [8, 100], [2, 3], 7) + + gen_shapes([1, 1], [255, 255], [1, 1], 4) + + gen_shapes([1], [128], [1], 32) + + gen_shapes([1], [128], [1], 7) + + gen_shapes([1], [250], [3], 4), + "dim": [ + 0, + 1, + 2, + 3, + None, + [0, 1], + [0, 2], + [0, 3], + [1, 2], + [1, 3], + [2, 3], + [0, 1, 2], + [0, 1, 3], + [0, 1, 3], + [0, 2, 3], + [1, 2, 3], + [0, 1, 2, 3], + ], + "keepdim": [True, False], + "input_a_dtype": [ttnn.float32, ttnn.bfloat16, ttnn.bfloat8_b], "input_layout": [ttnn.ROW_MAJOR_LAYOUT, ttnn.TILE_LAYOUT], "input_a_memory_config": [ttnn.DRAM_MEMORY_CONFIG, ttnn.L1_MEMORY_CONFIG], "output_memory_config": [ttnn.DRAM_MEMORY_CONFIG, ttnn.L1_MEMORY_CONFIG], @@ -53,8 +81,21 @@ def invalidate_vector(test_vector) -> Tuple[bool, Optional[str]]: return True, "Absolute value of dim must be less or equal than the rank of input tensor" if test_vector["input_layout"] == ttnn.TILE_LAYOUT: return True, "Tiled layout not supported" - if test_vector["input_layout"] == ttnn.ROW_MAJOR_LAYOUT and test_vector["input_a_dtype"] == ttnn.bfloat8_b: - return True, "bfloat8_b is only supported on tiled layout" + if test_vector["input_a_dtype"] != ttnn.bfloat16: + return True, "Only BFLOAT16 is supported for inputs!" + if test_vector["input_layout"] == ttnn.ROW_MAJOR_LAYOUT and not ( + test_vector["input_a_dtype"] == ttnn.float32 or test_vector["input_a_dtype"] == ttnn.bfloat16 + ): + return True, "Row major is only supported for fp32 & fp16" + if not test_vector["keepdim"]: + return True, "keepdim = false is not supported" + + device = ttnn.open_device(device_id=0) + if test_vector["input_a_dtype"] == ttnn.float32 and ttnn.device.is_grayskull(device): + return True, "Dest Fp32 mode is not supported for arch grayskull" + ttnn.close_device(device) + del device + return False, None @@ -65,6 +106,7 @@ def invalidate_vector(test_vector) -> Tuple[bool, Optional[str]]: def run( input_shape, dim, + keepdim, input_a_dtype, input_layout, input_a_memory_config, @@ -83,7 +125,7 @@ def run( )(input_shape) golden_function = ttnn.get_golden_function(ttnn.argmax) - torch_output_tensor = golden_function(torch_input_tensor_a, dim=dim) + torch_output_tensor = golden_function(torch_input_tensor_a, dim=dim, keepdim=keepdim) input_tensor_a = ttnn.from_torch( torch_input_tensor_a, diff --git a/tests/sweep_framework/sweeps/reduction/backward/prod_bw/prod_bw.py b/tests/sweep_framework/sweeps/reduction/backward/prod_bw/prod_bw.py index 3c60e10d148..6273d857e59 100644 --- a/tests/sweep_framework/sweeps/reduction/backward/prod_bw/prod_bw.py +++ b/tests/sweep_framework/sweeps/reduction/backward/prod_bw/prod_bw.py @@ -27,11 +27,41 @@ parameters = { "xfail": { "input_shape": gen_shapes([1, 1, 32, 32], [6, 12, 256, 256], [1, 1, 32, 32], 16) - + gen_shapes([1, 1, 1, 1], [6, 12, 256, 256], [1, 1, 1, 1], 2), - "dim": [0, 1, 2, 3], - "grad_dtype": [ttnn.bfloat16, ttnn.bfloat8_b], - "input_a_dtype": [ttnn.bfloat16], - "input_layout": [ttnn.TILE_LAYOUT], + + gen_shapes([1, 1, 1, 1], [6, 12, 256, 256], [1, 1, 1, 1], 2) + + gen_shapes([3, 4, 5, 6], [6, 12, 256, 256], [7, 8, 9, 10], 2) + + gen_shapes([1, 1, 1, 1], [6, 12, 187, 188], [1, 1, 1, 1], 7) + + gen_shapes([1, 32, 64], [6, 48, 128], [1, 1, 1], 2) + + gen_shapes([1, 32, 64], [6, 77, 128], [1, 1, 1], 7) + + gen_shapes([1, 32, 64], [6, 10222, 1023], [1, 1, 1], 8) + + gen_shapes([1, 1], [6, 6], [1, 1], 2) + + gen_shapes([1, 1], [7, 7], [1, 2], 3) + + gen_shapes([1, 1], [8, 8], [1, 3], 4) + + gen_shapes([1], [4], [1], 2) + + gen_shapes([1], [14], [11], 12) + + gen_shapes([1], [24], [21], 22), + "dim": [ + 0, + 1, + 2, + 3, + None, + [0, 1], + [0, 2], + [0, 3], + [1, 2], + [1, 3], + [2, 3], + [0, 1, 2], + [0, 1, 3], + [0, 1, 3], + [0, 2, 3], + [1, 2, 3], + [0, 1, 2, 3], + ], + "keepdim": [True, False], + "grad_dtype": [ttnn.float32, ttnn.bfloat16, ttnn.bfloat8_b], + "input_a_dtype": [ttnn.bfloat16, ttnn.bfloat8_b], + "input_layout": [ttnn.ROW_MAJOR_LAYOUT, ttnn.TILE_LAYOUT], "grad_memory_config": [ttnn.DRAM_MEMORY_CONFIG, ttnn.L1_MEMORY_CONFIG], "input_a_memory_config": [ttnn.DRAM_MEMORY_CONFIG, ttnn.L1_MEMORY_CONFIG], "output_memory_config": [ttnn.DRAM_MEMORY_CONFIG, ttnn.L1_MEMORY_CONFIG], @@ -45,10 +75,26 @@ def invalidate_vector(test_vector) -> Tuple[bool, Optional[str]]: if test_vector["input_layout"] == ttnn.ROW_MAJOR_LAYOUT: return True, "Unary operation requires tensor to be in Tile layout when working with non-sharded input tensor" - if test_vector["input_layout"] == ttnn.ROW_MAJOR_LAYOUT and ( - test_vector["grad_dtype"] == ttnn.bfloat8_b or test_vector["input_a_dtype"] == ttnn.bfloat8_b - ): - return True, "bfloat8_b is only supported on tiled layout" + if test_vector["input_layout"] == ttnn.ROW_MAJOR_LAYOUT: + if (test_vector["input_a_dtype"] == ttnn.float32 and test_vector["grad_dtype"] == ttnn.float32) or ( + test_vector["input_a_dtype"] == ttnn.bfloat16 or test_vector["grad_dtype"] == ttnn.bfloat16 + ): + return False, None + else: + return True, "Row major is only supported for fp32 & fp16" + if not test_vector["keepdim"]: + return True, "keepdim = false is not supported" + if not isinstance(test_vector["dim"], int): + return True, "dim can only be integer value" + + device = ttnn.open_device(device_id=0) + if ( + test_vector["input_a_dtype"] == ttnn.float32 or test_vector["grad_dtype"] == ttnn.float32 + ) and ttnn.device.is_grayskull(device): + return True, "Dest Fp32 mode is not supported for arch grayskull" + ttnn.close_device(device) + del device + return False, None @@ -59,6 +105,7 @@ def invalidate_vector(test_vector) -> Tuple[bool, Optional[str]]: def run( input_shape, dim, + keepdim, grad_dtype, input_a_dtype, input_layout, @@ -77,10 +124,7 @@ def run( torch_input_tensor_a.requires_grad = True torch_input_tensor_a.retain_grad() - max_dim = len(input_shape) - 1 - dim = random.randint(-max_dim - 1, max_dim) - - intermediate_result = torch.prod(torch_input_tensor_a, dim=dim, keepdim=True) + intermediate_result = torch.prod(torch_input_tensor_a, dim=dim, keepdim=keepdim) torch_grad_tensor = gen_func_with_cast_tt( partial(torch_random, low=-100, high=100, dtype=torch.float32), grad_dtype )(intermediate_result.shape) diff --git a/tests/sweep_framework/sweeps/reduction/mean/mean.py b/tests/sweep_framework/sweeps/reduction/mean/mean.py index b2c93e0c0d8..c5662275dbb 100644 --- a/tests/sweep_framework/sweeps/reduction/mean/mean.py +++ b/tests/sweep_framework/sweeps/reduction/mean/mean.py @@ -27,21 +27,74 @@ # Developers can create their own generator functions and pass them to the parameters as inputs. parameters = { "nightly": { - "input_shape": gen_shapes([1, 1, 1, 1], [2, 6, 128, 128], [1, 1, 32, 32], 32), - "input_a_dtype": [ttnn.bfloat16, ttnn.bfloat8_b], - "input_layout": [ttnn.TILE_LAYOUT], + "input_shape": gen_shapes([1, 1, 1, 1], [2, 6, 128, 128], [1, 1, 32, 32], 32) + + gen_shapes([1, 1, 1, 1], [2, 6, 110, 130], [1, 1, 32, 32], 31) + + gen_shapes([1, 1, 1, 1], [2, 6, 120, 140], [1, 1, 32, 32], 17) + + gen_shapes([1, 1, 1], [2, 6, 128], [1, 1, 32], 8) + + gen_shapes([1, 1, 1], [2, 7, 100], [1, 1, 64], 16) + + gen_shapes([1, 1, 1], [2, 8, 255], [1, 1, 122], 17) + + gen_shapes([1, 1], [2, 26], [1, 11], 2) + + gen_shapes([1, 1], [2, 36], [1, 12], 3) + + gen_shapes([1, 1], [2, 46], [1, 13], 4) + + gen_shapes([1], [2, 6], [1, 1], 2) + + gen_shapes([1], [2, 6], [1, 1], 2) + + gen_shapes([1], [10, 10], [2, 3], 2), + "dim": [ + 0, + 1, + 2, + 3, + None, + [0, 1], + [0, 2], + [0, 3], + [1, 2], + [1, 3], + [2, 3], + [0, 1, 2], + [0, 1, 3], + [0, 1, 3], + [0, 2, 3], + [1, 2, 3], + [0, 1, 2, 3], + ], + "keepdim": [True, False], + "input_a_dtype": [ttnn.float32, ttnn.bfloat16, ttnn.bfloat8_b], + "input_layout": [ttnn.ROW_MAJOR_LAYOUT, ttnn.TILE_LAYOUT], "input_a_memory_config": [ttnn.DRAM_MEMORY_CONFIG, ttnn.L1_MEMORY_CONFIG], "output_memory_config": [ttnn.DRAM_MEMORY_CONFIG, ttnn.L1_MEMORY_CONFIG], }, } +# Invalidate vector is called during the generation phase where each vector will be passed in. +# If invalidated, the vector will still be stored but will be skipped. +# Returns False, None if the vector is valid, and True, str with a reason for invalidation if it is invalid. +def invalidate_vector(test_vector) -> Tuple[bool, Optional[str]]: + if test_vector["input_layout"] == ttnn.ROW_MAJOR_LAYOUT and not ( + test_vector["input_a_dtype"] == ttnn.float32 or test_vector["input_a_dtype"] == ttnn.bfloat16 + ): + return True, "Row major is only supported for fp32 & fp16" + if not test_vector["keepdim"]: + return True, "keepdim = false is not supported" + + device = ttnn.open_device(device_id=0) + if test_vector["input_a_dtype"] == ttnn.float32 and ttnn.device.is_grayskull(device): + return True, "Dest Fp32 mode is not supported for arch grayskull" + ttnn.close_device(device) + del device + + return False, None + + # This is the run instructions for the test, defined by the developer. # The run function must take the above-defined parameters as inputs. # The runner will call this run function with each test vector, and the returned results from this function will be stored. # If you defined a mesh_device_fixture above, the object you yielded will be passed into this function as 'device'. Otherwise, it will be the default ttnn device opened by the infra. def run( input_shape, + dim, + keepdim, input_a_dtype, input_layout, input_a_memory_config, @@ -60,7 +113,7 @@ def run( )(input_shape) golden_function = ttnn.get_golden_function(ttnn.mean) - torch_output_tensor = golden_function(torch_input_tensor_a, dim=-1, keepdim=True) + torch_output_tensor = golden_function(torch_input_tensor_a, dim=dim, keepdim=keepdim) input_tensor_a = ttnn.from_torch( torch_input_tensor_a, @@ -71,7 +124,7 @@ def run( ) start_time = start_measuring_time() - output_tensor = ttnn.mean(input_tensor_a, dim=-1, memory_config=output_memory_config) + output_tensor = ttnn.mean(input_tensor_a, dim=dim, memory_config=output_memory_config) output_tensor = ttnn.to_torch(output_tensor) e2e_perf = stop_measuring_time(start_time) diff --git a/tests/sweep_framework/sweeps/reduction/prod.py b/tests/sweep_framework/sweeps/reduction/prod.py index e5a098dcffa..39add1f91fd 100644 --- a/tests/sweep_framework/sweeps/reduction/prod.py +++ b/tests/sweep_framework/sweeps/reduction/prod.py @@ -30,16 +30,59 @@ + gen_shapes([32, 32], [256, 256], [32, 32], 2) + gen_shapes([1, 1, 1, 1], [6, 12, 256, 256], [1, 1, 1, 1], 2) + gen_shapes([1, 1, 1], [12, 256, 256], [1, 1, 1], 2) - + gen_shapes([1, 1], [256, 256], [1, 1], 2), - "dim": [0, 1, 2, 3], - "input_a_dtype": [ttnn.bfloat16, ttnn.bfloat8_b], - "input_a_layout": [ttnn.TILE_LAYOUT], + + gen_shapes([1, 1], [256, 256], [1, 1], 2) + + gen_shapes([1], [256], [1], 8) + + gen_shapes([1, 1, 1, 1], [6, 12, 200, 255], [1, 1, 1, 1], 5) + + gen_shapes([1, 1, 1], [12, 555, 128], [1, 1, 1], 4) + + gen_shapes([1, 1], [32, 32], [1, 1], 32), + "dim": [ + 0, + 1, + 2, + 3, + None, + [0, 1], + [0, 2], + [0, 3], + [1, 2], + [1, 3], + [2, 3], + [0, 1, 2], + [0, 1, 3], + [0, 1, 3], + [0, 2, 3], + [1, 2, 3], + [0, 1, 2, 3], + ], + "keepdim": [True, False], + "input_a_dtype": [ttnn.float32, ttnn.bfloat16, ttnn.bfloat8_b], + "input_a_layout": [ttnn.ROW_MAJOR_LAYOUT, ttnn.TILE_LAYOUT], "input_a_memory_config": [ttnn.DRAM_MEMORY_CONFIG, ttnn.L1_MEMORY_CONFIG], "output_memory_config": [ttnn.DRAM_MEMORY_CONFIG, ttnn.L1_MEMORY_CONFIG], }, } +# Invalidate vector is called during the generation phase where each vector will be passed in. +# If invalidated, the vector will still be stored but will be skipped. +# Returns False, None if the vector is valid, and True, str with a reason for invalidation if it is invalid. +def invalidate_vector(test_vector) -> Tuple[bool, Optional[str]]: + if test_vector["input_a_layout"] == ttnn.ROW_MAJOR_LAYOUT and not ( + test_vector["input_a_dtype"] == ttnn.float32 or test_vector["input_a_dtype"] == ttnn.bfloat16 + ): + return True, "Row major is only supported for fp32 & fp16" + if not test_vector["keepdim"]: + return True, "keepdim = false is not supported" + + device = ttnn.open_device(device_id=0) + if test_vector["input_a_dtype"] == ttnn.float32 and ttnn.device.is_grayskull(device): + return True, "Dest Fp32 mode is not supported for arch grayskull" + ttnn.close_device(device) + del device + + return False, None + + # This is the run instructions for the test, defined by the developer. # The run function must take the above-defined parameters as inputs. # The runner will call this run function with each test vector, and the returned results from this function will be stored. @@ -47,6 +90,7 @@ def run( input_shape, dim, + keepdim, input_a_dtype, input_a_layout, input_a_memory_config, @@ -63,7 +107,7 @@ def run( dim = dim % len(input_shape) - torch_output_tensor = torch.prod(torch_input_tensor_a, dim=dim, keepdim=True) + torch_output_tensor = torch.prod(torch_input_tensor_a, dim=dim, keepdim=keepdim) input_tensor_a = ttnn.from_torch( torch_input_tensor_a, diff --git a/tests/sweep_framework/sweeps/reduction/std/std.py b/tests/sweep_framework/sweeps/reduction/std/std.py index a39657d49e0..4f95723031b 100644 --- a/tests/sweep_framework/sweeps/reduction/std/std.py +++ b/tests/sweep_framework/sweeps/reduction/std/std.py @@ -27,21 +27,74 @@ # Developers can create their own generator functions and pass them to the parameters as inputs. parameters = { "xfail": { - "input_shape": gen_shapes([1, 1, 1, 1], [2, 6, 128, 128], [1, 1, 32, 32], 32), - "input_a_dtype": [ttnn.bfloat16, ttnn.bfloat8_b], - "input_layout": [ttnn.TILE_LAYOUT], + "input_shape": gen_shapes([1, 1, 1, 1], [2, 6, 128, 128], [1, 1, 32, 32], 32) + + gen_shapes([1, 1, 1, 1], [2, 6, 254, 129], [1, 1, 20, 33], 33) + + gen_shapes([1, 1, 1, 1], [2, 7, 255, 130], [1, 1, 21, 34], 34) + + gen_shapes([1, 1, 1], [2, 6, 254], [1, 1, 32], 8) + + gen_shapes([1, 1, 1], [4, 12, 255], [1, 1, 32], 16) + + gen_shapes([1, 1, 1], [8, 18, 256], [1, 1, 32], 32) + + gen_shapes([1, 1], [2, 6], [1, 1], 2) + + gen_shapes([1, 1], [3, 7], [1, 1], 2) + + gen_shapes([1, 1], [4, 8], [1, 1], 2) + + gen_shapes([1], [32], [1], 4) + + gen_shapes([1], [33], [1], 5) + + gen_shapes([1], [34], [1], 6), + "dim": [ + 0, + 1, + 2, + 3, + None, + [0, 1], + [0, 2], + [0, 3], + [1, 2], + [1, 3], + [2, 3], + [0, 1, 2], + [0, 1, 3], + [0, 1, 3], + [0, 2, 3], + [1, 2, 3], + [0, 1, 2, 3], + ], + "keepdim": [True, False], + "input_a_dtype": [ttnn.float32, ttnn.bfloat16, ttnn.bfloat8_b], + "input_layout": [ttnn.ROW_MAJOR_LAYOUT, ttnn.TILE_LAYOUT], "input_a_memory_config": [ttnn.DRAM_MEMORY_CONFIG, ttnn.L1_MEMORY_CONFIG], "output_memory_config": [ttnn.DRAM_MEMORY_CONFIG, ttnn.L1_MEMORY_CONFIG], }, } +# Invalidate vector is called during the generation phase where each vector will be passed in. +# If invalidated, the vector will still be stored but will be skipped. +# Returns False, None if the vector is valid, and True, str with a reason for invalidation if it is invalid. +def invalidate_vector(test_vector) -> Tuple[bool, Optional[str]]: + if test_vector["input_layout"] == ttnn.ROW_MAJOR_LAYOUT and not ( + test_vector["input_a_dtype"] == ttnn.float32 or test_vector["input_a_dtype"] == ttnn.bfloat16 + ): + return True, "Row major is only supported for fp32 & fp16" + if not test_vector["keepdim"]: + return True, "keepdim = false is not supported" + + device = ttnn.open_device(device_id=0) + if test_vector["input_a_dtype"] == ttnn.float32 and ttnn.device.is_grayskull(device): + return True, "Dest Fp32 mode is not supported for arch grayskull" + ttnn.close_device(device) + del device + + return False, None + + # This is the run instructions for the test, defined by the developer. # The run function must take the above-defined parameters as inputs. # The runner will call this run function with each test vector, and the returned results from this function will be stored. # If you defined a mesh_device_fixture above, the object you yielded will be passed into this function as 'device'. Otherwise, it will be the default ttnn device opened by the infra. def run( input_shape, + dim, + keepdim, input_a_dtype, input_layout, input_a_memory_config, @@ -60,7 +113,7 @@ def run( )(input_shape) golden_function = ttnn.get_golden_function(ttnn.std) - torch_output_tensor = golden_function(torch_input_tensor_a, dim=-1, keepdim=True) + torch_output_tensor = golden_function(torch_input_tensor_a, dim=dim, keepdim=keepdim) input_tensor_a = ttnn.from_torch( torch_input_tensor_a, @@ -71,7 +124,7 @@ def run( ) start_time = start_measuring_time() - output_tensor = ttnn.std(input_tensor_a, dim=-1, memory_config=output_memory_config) + output_tensor = ttnn.std(input_tensor_a, dim=dim, memory_config=output_memory_config) output_tensor = ttnn.to_torch(output_tensor) e2e_perf = stop_measuring_time(start_time) diff --git a/tests/sweep_framework/sweeps/reduction/sum.py b/tests/sweep_framework/sweeps/reduction/sum.py index cddaa82be78..7e89c43be05 100644 --- a/tests/sweep_framework/sweeps/reduction/sum.py +++ b/tests/sweep_framework/sweeps/reduction/sum.py @@ -13,6 +13,7 @@ from tests.ttnn.utils_for_testing import check_with_pcc, start_measuring_time, stop_measuring_time from models.utility_functions import torch_random +from tests.sweep_framework.sweep_utils.reduction_common import run_sum # Override the default timeout in seconds for hang detection. TIMEOUT = 30 @@ -30,16 +31,59 @@ + gen_shapes([32, 32], [256, 256], [32, 32], 2) + gen_shapes([1, 1, 1, 1], [6, 12, 256, 256], [1, 1, 1, 1], 2) + gen_shapes([1, 1, 1], [12, 256, 256], [1, 1, 1], 2) - + gen_shapes([1, 1], [256, 256], [1, 1], 2), - "dim": [0, 1, 2, 3], - "input_a_dtype": [ttnn.bfloat16, ttnn.bfloat8_b], - "input_a_layout": [ttnn.TILE_LAYOUT], + + gen_shapes([1, 1], [256, 256], [1, 1], 2) + + gen_shapes([1, 1, 1, 1], [6, 12, 100, 100], [1, 1, 1, 1], 2) + + gen_shapes([1, 1, 1], [12, 128, 128], [1, 1, 1], 7) + + gen_shapes([1, 1], [11, 11], [1, 1], 2) + + gen_shapes([1], [256], [16], 2), + "dim": [ + 0, + 1, + 2, + 3, + None, + [0, 1], + [0, 2], + [0, 3], + [1, 2], + [1, 3], + [2, 3], + [0, 1, 2], + [0, 1, 3], + [0, 1, 3], + [0, 2, 3], + [1, 2, 3], + [0, 1, 2, 3], + ], + "keepdim": [True, False], + "input_a_dtype": [ttnn.float32, ttnn.bfloat16, ttnn.bfloat8_b], + "input_a_layout": [ttnn.ROW_MAJOR_LAYOUT, ttnn.TILE_LAYOUT], "input_a_memory_config": [ttnn.DRAM_MEMORY_CONFIG, ttnn.L1_MEMORY_CONFIG], "output_memory_config": [ttnn.DRAM_MEMORY_CONFIG, ttnn.L1_MEMORY_CONFIG], }, } +# Invalidate vector is called during the generation phase where each vector will be passed in. +# If invalidated, the vector will still be stored but will be skipped. +# Returns False, None if the vector is valid, and True, str with a reason for invalidation if it is invalid. +def invalidate_vector(test_vector) -> Tuple[bool, Optional[str]]: + if test_vector["input_a_layout"] == ttnn.ROW_MAJOR_LAYOUT and not ( + test_vector["input_a_dtype"] == ttnn.float32 or test_vector["input_a_dtype"] == ttnn.bfloat16 + ): + return True, "Row major is only supported for fp32 & fp16" + if not test_vector["keepdim"]: + return True, "keepdim = false is not supported" + + device = ttnn.open_device(device_id=0) + if test_vector["input_a_dtype"] == ttnn.float32 and ttnn.device.is_grayskull(device): + return True, "Dest Fp32 mode is not supported for arch grayskull" + ttnn.close_device(device) + del device + + return False, None + + # This is the run instructions for the test, defined by the developer. # The run function must take the above-defined parameters as inputs. # The runner will call this run function with each test vector, and the returned results from this function will be stored. @@ -47,6 +91,7 @@ def run( input_shape, dim, + keepdim, input_a_dtype, input_a_layout, input_a_memory_config, @@ -54,31 +99,23 @@ def run( *, device, ) -> list: - data_seed = random.randint(0, 20000000) - torch.manual_seed(data_seed) + return run_sum( + input_shape, dim, keepdim, input_a_dtype, input_a_layout, input_a_memory_config, output_memory_config, device + ) - torch_input_tensor_a = gen_func_with_cast_tt( - partial(torch_random, low=-100, high=100, dtype=torch.float32), input_a_dtype - )(input_shape) - dim = dim % len(input_shape) - # print(f"dim {dim} input_shape {input_shape} input_a_dtype {input_a_dtype}") +import pytest - torch_output_tensor = torch.sum(torch_input_tensor_a, dim=dim, keepdim=True) - input_tensor_a = ttnn.from_torch( - torch_input_tensor_a, - dtype=input_a_dtype, - layout=input_a_layout, - device=device, - memory_config=input_a_memory_config, +@pytest.mark.parametrize( + "input_shape, dim, input_a_dtype, input_a_layout, input_a_memory_config, output_memory_config", + [ + ([7, 32, 4, 96], 3, True, ttnn.float32, ttnn.TILE_LAYOUT, ttnn.L1_MEMORY_CONFIG, ttnn.L1_MEMORY_CONFIG), + ], +) +def test_reduction_sum_localrun_fail_only( + device, input_shape, dim, keepdim, input_a_dtype, input_a_layout, input_a_memory_config, output_memory_config +): + run_sum( + input_shape, dim, keepdim, input_a_dtype, input_a_layout, input_a_memory_config, output_memory_config, device ) - - start_time = start_measuring_time() - result = ttnn.sum(input_tensor_a, dim=dim, memory_config=output_memory_config) - output_tensor = ttnn.to_torch(result) - e2e_perf = stop_measuring_time(start_time) - - pcc = check_with_pcc(torch_output_tensor, output_tensor, 0.999) - # print(f"input_shape {input_shape} pcc {pcc}") - return [pcc, e2e_perf] diff --git a/tests/sweep_framework/sweeps/reduction/topk/topk.py b/tests/sweep_framework/sweeps/reduction/topk/topk.py index e38a9134e5c..d65b9e498d9 100644 --- a/tests/sweep_framework/sweeps/reduction/topk/topk.py +++ b/tests/sweep_framework/sweeps/reduction/topk/topk.py @@ -28,12 +28,36 @@ parameters = { "nightly": { "input_shape": gen_shapes([1, 1, 32, 64], [2, 6, 128, 128], [1, 1, 32, 64], 64) - + gen_shapes([1, 32, 64], [12, 256, 1024], [1, 32, 64], 8) - + gen_shapes([32, 64], [256, 1024], [32, 64], 8), - "dim": [-1, -2, -3, -4], - "largest": [True], + + gen_shapes([1, 1, 33, 65], [2, 6, 127, 129], [1, 1, 33, 63], 128) + + gen_shapes([1, 1, 31, 63], [2, 6, 128, 128], [1, 1, 32, 64], 7) + + gen_shapes([1, 32, 64], [12, 200, 1025], [1, 32, 64], 8) + + gen_shapes([1, 32, 64], [12, 256, 1023], [1, 32, 164], 9) + + gen_shapes([1, 7, 20], [12, 300, 1024], [1, 32, 64], 10) + + gen_shapes([32, 64], [256, 1024], [32, 64], 8) + + gen_shapes([32, 6], [256, 404], [32, 264], 18) + + gen_shapes([32, 17], [256, 124], [32, 624], 28), + "dim": [ + 0, + 1, + 2, + 3, + None, + [0, 1], + [0, 2], + [0, 3], + [1, 2], + [1, 3], + [2, 3], + [0, 1, 2], + [0, 1, 3], + [0, 1, 3], + [0, 2, 3], + [1, 2, 3], + [0, 1, 2, 3], + ], + "largest": [True, False], "k": [32], # only k = 32 is supported for now - "input_a_dtype": [ttnn.bfloat16, ttnn.bfloat8_b], + "input_a_dtype": [ttnn.float32, ttnn.bfloat16, ttnn.bfloat8_b], "input_layout": [ttnn.TILE_LAYOUT, ttnn.ROW_MAJOR_LAYOUT], "input_a_memory_config": [ttnn.DRAM_MEMORY_CONFIG, ttnn.L1_MEMORY_CONFIG], "output_memory_config": [ttnn.DRAM_MEMORY_CONFIG, ttnn.L1_MEMORY_CONFIG], @@ -42,10 +66,28 @@ "input_shape": gen_shapes([1, 1, 32, 64], [6, 12, 256, 1024], [1, 1, 32, 64], 64) + gen_shapes([1, 32, 64], [12, 256, 1024], [1, 32, 64], 8) + gen_shapes([32, 64], [256, 1024], [32, 64], 8), - "dim": [-1, -2, -3, -4], + "dim": [ + 0, + 1, + 2, + 3, + None, + [0, 1], + [0, 2], + [0, 3], + [1, 2], + [1, 3], + [2, 3], + [0, 1, 2], + [0, 1, 3], + [0, 1, 3], + [0, 2, 3], + [1, 2, 3], + [0, 1, 2, 3], + ], "largest": [True, False], "k": [32], - "input_a_dtype": [ttnn.bfloat16, ttnn.bfloat8_b], + "input_a_dtype": [ttnn.float32, ttnn.bfloat16, ttnn.bfloat8_b], "input_layout": [ttnn.TILE_LAYOUT, ttnn.ROW_MAJOR_LAYOUT], "input_a_memory_config": [ttnn.DRAM_MEMORY_CONFIG, ttnn.L1_MEMORY_CONFIG], "output_memory_config": [ttnn.DRAM_MEMORY_CONFIG, ttnn.L1_MEMORY_CONFIG], @@ -65,8 +107,17 @@ def invalidate_vector(test_vector) -> Tuple[bool, Optional[str]]: return True, "Absolute value of dim must be less or equal than the rank of input tensor" if test_vector["input_layout"] == ttnn.ROW_MAJOR_LAYOUT: return True, "Unary operation requires tensor to be in Tile layout when working with non-sharded input tensor" - if test_vector["input_layout"] == ttnn.ROW_MAJOR_LAYOUT and test_vector["input_a_dtype"] == ttnn.bfloat8_b: - return True, "bfloat8_b is only supported on tiled layout" + if test_vector["input_layout"] == ttnn.ROW_MAJOR_LAYOUT and not ( + test_vector["input_a_dtype"] == ttnn.float32 or test_vector["input_a_dtype"] == ttnn.bfloat16 + ): + return True, "Row major is only supported for fp32 & fp16" + + device = ttnn.open_device(device_id=0) + if test_vector["input_a_dtype"] == ttnn.float32 and ttnn.device.is_grayskull(device): + return True, "Dest Fp32 mode is not supported for arch grayskull" + ttnn.close_device(device) + del device + return False, None diff --git a/tests/sweep_framework/sweeps/reduction/var/var.py b/tests/sweep_framework/sweeps/reduction/var/var.py index 27f5dc70157..ee158d18869 100644 --- a/tests/sweep_framework/sweeps/reduction/var/var.py +++ b/tests/sweep_framework/sweeps/reduction/var/var.py @@ -27,21 +27,77 @@ # Developers can create their own generator functions and pass them to the parameters as inputs. parameters = { "xfail": { - "input_shape": gen_shapes([1, 1, 1, 1], [2, 6, 128, 128], [1, 1, 32, 32], 32), - "input_a_dtype": [ttnn.bfloat16, ttnn.bfloat8_b], - "input_layout": [ttnn.TILE_LAYOUT], + "input_shape": gen_shapes([1, 1, 1, 1], [2, 6, 128, 128], [1, 1, 32, 32], 32) + + gen_shapes([1, 1, 1, 1], [2, 6, 128, 255], [1, 1, 132, 132], 42) + + gen_shapes([1, 1, 1, 1], [2, 6, 128, 255], [1, 1, 132, 132], 52) + + gen_shapes([1, 1, 1], [2, 6, 128], [1, 1, 32], 32) + + gen_shapes([1, 1, 1], [2, 16, 129], [1, 1, 64], 16) + + gen_shapes([1, 1, 1], [2, 26, 130], [1, 1, 128], 8) + + gen_shapes([1, 1], [2, 6], [1, 1], 4) + + gen_shapes([1, 1], [2, 16], [1, 1], 5) + + gen_shapes([1, 1], [2, 26], [1, 1], 6) + + gen_shapes([1], [8], [1], 2) + + gen_shapes([1], [18], [1], 2) + + gen_shapes([1], [28], [1], 2), + "dim": [ + 0, + 1, + 2, + 3, + None, + [0, 1], + [0, 2], + [0, 3], + [1, 2], + [1, 3], + [2, 3], + [0, 1, 2], + [0, 1, 3], + [0, 1, 3], + [0, 2, 3], + [1, 2, 3], + [0, 1, 2, 3], + ], + "keepdim": [True, False], + "input_a_dtype": [ttnn.float32, ttnn.bfloat16, ttnn.bfloat8_b], + "input_layout": [ttnn.ROW_MAJOR_LAYOUT, ttnn.TILE_LAYOUT], "input_a_memory_config": [ttnn.DRAM_MEMORY_CONFIG, ttnn.L1_MEMORY_CONFIG], "output_memory_config": [ttnn.DRAM_MEMORY_CONFIG, ttnn.L1_MEMORY_CONFIG], }, } +# Invalidate vector is called during the generation phase where each vector will be passed in. +# If invalidated, the vector will still be stored but will be skipped. +# Returns False, None if the vector is valid, and True, str with a reason for invalidation if it is invalid. +def invalidate_vector(test_vector) -> Tuple[bool, Optional[str]]: + if test_vector["input_layout"] == ttnn.ROW_MAJOR_LAYOUT and not ( + test_vector["input_a_dtype"] == ttnn.float32 or test_vector["input_a_dtype"] == ttnn.bfloat16 + ): + return True, "Row major is only supported for fp32 & fp16" + if not test_vector["keepdim"]: + return True, "keepdim = false is not supported" + + if len(test_vector["input_shape"]) < 2: + return True, "For var with scalar(1-D) input, degrees of freedom will be <= 0." + + device = ttnn.open_device(device_id=0) + if test_vector["input_a_dtype"] == ttnn.float32 and ttnn.device.is_grayskull(device): + return True, "Dest Fp32 mode is not supported for arch grayskull" + ttnn.close_device(device) + del device + + return False, None + + # This is the run instructions for the test, defined by the developer. # The run function must take the above-defined parameters as inputs. # The runner will call this run function with each test vector, and the returned results from this function will be stored. # If you defined a mesh_device_fixture above, the object you yielded will be passed into this function as 'device'. Otherwise, it will be the default ttnn device opened by the infra. def run( input_shape, + dim, + keepdim, input_a_dtype, input_layout, input_a_memory_config, @@ -60,7 +116,7 @@ def run( )(input_shape) golden_function = ttnn.get_golden_function(ttnn.var) - torch_output_tensor = golden_function(torch_input_tensor_a, dim=-1, keepdim=True) + torch_output_tensor = golden_function(torch_input_tensor_a, dim=dim, keepdim=keepdim) input_tensor_a = ttnn.from_torch( torch_input_tensor_a, @@ -71,7 +127,7 @@ def run( ) start_time = start_measuring_time() - output_tensor = ttnn.var(input_tensor_a, dim=-1, memory_config=output_memory_config) + output_tensor = ttnn.var(input_tensor_a, dim=dim, memory_config=output_memory_config) output_tensor = ttnn.to_torch(output_tensor) e2e_perf = stop_measuring_time(start_time)