Sharded sweep tests (#15246)

### Ticket [Link to Github Issue](#11512) ### Problem description We need sweep tests testing how ops behave when input is sharded. ### What's changed Added sweep tests for multiple ops when input is sharded: isfinite isinf isnan isposinf isneginf lgamma mish logit ### Checklist - [X] Post commit CI passes (https://github.com/tenstorrent/tt-metal/actions/runs/12084077014) - [X] Sweep tests pass
tenstorrent · Dec 2, 2024 · db8c8d4 · db8c8d4
1 parent ab3dc0c
commit db8c8d4
Show file tree

Hide file tree

Showing 22 changed files with 1,186 additions and 10 deletions.
diff --git a/.github/workflows/ttnn-run-sweeps.yaml b/.github/workflows/ttnn-run-sweeps.yaml
@@ -172,15 +172,23 @@ on:
           - eltwise.binary_complex.add_bw.add_bw
           - eltwise.binary_complex.sub_bw.sub_bw
           - eltwise.binary_complex.mul_bw.mul_bw
-          - eltwise.unary.lgamma
-          - eltwise.unary.logit
-          - eltwise.unary.mish
-          - eltwise.unary.multigammaln
-          - eltwise.unary.isfinite
-          - eltwise.unary.isinf
-          - eltwise.unary.isnan
-          - eltwise.unary.isneginf
-          - eltwise.unary.isposinf
+          - eltwise.unary.lgamma.lgamma
+          - eltwise.unary.lgamma.lgamma_sharded
+          - eltwise.unary.logit.logit
+          - eltwise.unary.logit.logit_sharded
+          - eltwise.unary.mish.mish
+          - eltwise.unary.mish.mish_sharded
+          - eltwise.unary.multigammaln.multigammaln
+          - eltwise.unary.isfinite.isfinite
+          - eltwise.unary.isfinite.isfinite_sharded
+          - eltwise.unary.isinf.isinf
+          - eltwise.unary.isinf.isinf_sharded
+          - eltwise.unary.isnan.isnan
+          - eltwise.unary.isnan.isnan_sharded
+          - eltwise.unary.isneginf.isneginf
+          - eltwise.unary.isneginf.isneginf_sharded
+          - eltwise.unary.isposinf.isposinf
+          - eltwise.unary.isposinf.isposinf_sharded
           - eltwise.binary.add.add_all_pytorch2
           - eltwise.binary.add.add_set2_pytorch2
           - eltwise.binary.add.add_different_memory_configs

diff --git a/tests/sweep_framework/sweep_utils/sharding_utils.py b/tests/sweep_framework/sweep_utils/sharding_utils.py
@@ -0,0 +1,159 @@
+# SPDX-FileCopyrightText: © 2024 Tenstorrent Inc.
+
+# SPDX-License-Identifier: Apache-2.0
+
+import os
+import ttnn
+import itertools
+import random
+from tests.tt_eager.python_api_testing.sweep_tests.generation_funcs import _gen_reshape_args_from_volume
+
+
+def gen_sharded_spec_unary(num_shapes, max_tensor_size=4 * 1024 * 1024, layouts=["TILE_LAYOUT", "ROW_MAJOR_LAYOUT"]):
+    # device.compute_with_storage_grid_size()
+    y = 8
+    x = 8
+
+    # ["BLOCK", "WIDTH", "HEIGHT", "tensor_wh"]
+    sharding_strategy_list = ["BLOCK", "WIDTH", "HEIGHT", "tensor_wh"]
+    shard_orientation_list = ["COL_MAJOR", "ROW_MAJOR"]
+    spec_list = []
+
+    for sharding_strategy, shard_orientation, rank, layout in itertools.product(
+        sharding_strategy_list, shard_orientation_list, [4, 3, 2], layouts
+    ):
+        if sharding_strategy == "tensor_wh":
+            tensor_hw_as_shard_shape = True
+            sharding_strategy = "BLOCK"
+        else:
+            tensor_hw_as_shard_shape = False
+
+        for _ in range(num_shapes):
+            if tensor_hw_as_shard_shape:
+                # Gets stuck:
+                # X 8 Y 8 input_shape [1, 17792, 8] DataType.BFLOAT8_B Layout.TILE ShardStrategy.BLOCK ShardOrientation.COL_MAJOR tensor_hw_as_shard_shape True
+
+                if layout == "TILE_LAYOUT":
+                    # In shard mode ShardMode::PHYSICAL, physical shard shape {12, 13312} is not compatible with alignment Alignment([32, 32])!
+                    min_shard_size_x = 32
+                    min_shard_size_y = 32
+                else:  # if layout == "ROW_MAJOR_LAYOUT":
+                    # Shard Size must be multiple of input_tile_size (width * height is multiple of 1024)
+                    min_shard_size_x = random.choice([1, 2, 4, 8, 16, 32, 64, 128, 256, 512, 1024])
+                    min_shard_size_y = 1024 // min_shard_size_x
+
+                rest_volume = random.randint(1, max_tensor_size // (min_shard_size_x * min_shard_size_y * x * y))
+                input_shape = random.choice(_gen_reshape_args_from_volume(rest_volume, step=1, out_dims=rank))
+                input_shape = list(input_shape["reshape_dims"])
+                input_shape[-2] = input_shape[-2] * min_shard_size_x
+                input_shape[-1] = input_shape[-1] * min_shard_size_y
+
+                # Shard width should be multiple of 16 to satisfy L1 alignment (width = multiple 8 for bfloat16)
+                while input_shape[-1] % 16 != 0:
+                    input_shape[-1] *= 2
+                    input_shape[-2] //= 2
+
+                if shard_orientation == "COL_MAJOR":
+                    tmp = input_shape[-2]
+                    input_shape[-2] = input_shape[-1]
+                    input_shape[-1] = tmp
+
+            elif sharding_strategy == "BLOCK":
+                min_shard_size_y = 32 * y
+                min_shard_size_x = 32 * x
+
+                rest_volume = random.randint(1, max_tensor_size // (min_shard_size_x * min_shard_size_y))
+                physical_shape = random.choice(_gen_reshape_args_from_volume(rest_volume, step=1, out_dims=2))
+                physical_shape = list(physical_shape["reshape_dims"])
+                physical_shape[1] *= min_shard_size_y
+                physical_shape[0] *= min_shard_size_x
+
+                input_shape = random.choice(_gen_reshape_args_from_volume(physical_shape[0], step=1, out_dims=rank - 1))
+                input_shape = list(input_shape["reshape_dims"])
+                input_shape.append(physical_shape[1])
+
+            elif sharding_strategy == "WIDTH" or sharding_strategy == "HEIGHT":
+                # if shard_width % total_cores != 0: raise RuntimeError("Invalid sharding core_grid")
+                # Shard Size must be multiple of input_tile_size
+
+                if layout == "TILE_LAYOUT":
+                    # In shard mode ShardMode::PHYSICAL, physical shard shape {12, 13312} is not compatible with alignment Alignment([32, 32])!
+                    min_shard_size_x = 32
+                    min_shard_size_y = 32 * x * y
+                else:  # if layout == "ROW_MAJOR_LAYOUT":
+                    # Shard Size must be multiple of input_tile_size
+                    # Shard width should be multiple of 16 to satisfy L1 alignment
+                    mul_32_y = random.choice([16, 32, 64, 128, 256, 512, 1024])
+                    mul_32_x = 1024 // mul_32_y
+
+                    if sharding_strategy == "HEIGHT":
+                        # Shard width should be multiple of 16 to satisfy L1 alignment
+                        while mul_32_x % 16 != 0:
+                            mul_32_x *= 2
+                            mul_32_y //= 2
+
+                    min_shard_size_x = mul_32_x
+                    min_shard_size_y = mul_32_y * x * y
+
+                rest_volume = random.randint(1, max_tensor_size // (min_shard_size_x * min_shard_size_y))
+                input_shape = random.choice(_gen_reshape_args_from_volume(rest_volume, step=1, out_dims=rank))
+                input_shape = list(input_shape["reshape_dims"])
+                input_shape[-2] = input_shape[-2] * min_shard_size_x
+                input_shape[-1] = input_shape[-1] * min_shard_size_y
+
+                if sharding_strategy == "HEIGHT":
+                    tmp = input_shape[-2]
+                    input_shape[-2] = input_shape[-1]
+                    input_shape[-1] = tmp
+
+                # print(input_shape)
+
+            spec_list.append(
+                {
+                    "input_shape": input_shape,
+                    "X": x,
+                    "Y": y,
+                    "sharding_strategy": sharding_strategy,
+                    "shard_orientation": shard_orientation,
+                    "tensor_hw_as_shard_shape": tensor_hw_as_shard_shape,
+                    "input_layout": layout,
+                }
+            )
+
+    return spec_list
+
+
+def parse_sharding_spec(input_spec):
+    input_shape = input_spec["input_shape"]
+    X = input_spec["X"]
+    Y = input_spec["Y"]
+    sharding_strategy = input_spec["sharding_strategy"]
+    shard_orientation = input_spec["shard_orientation"]
+    tensor_hw_as_shard_shape = input_spec["tensor_hw_as_shard_shape"]
+    input_layout = input_spec["input_layout"]
+
+    if sharding_strategy == "HEIGHT":
+        sharding_strategy = ttnn.ShardStrategy.HEIGHT
+    elif sharding_strategy == "WIDTH":
+        sharding_strategy = ttnn.ShardStrategy.WIDTH
+    else:  # sharding_strategy == "BLOCK":
+        sharding_strategy = ttnn.ShardStrategy.BLOCK
+
+    if shard_orientation == "COL_MAJOR":
+        shard_orientation = ttnn.ShardOrientation.COL_MAJOR
+    else:
+        shard_orientation = ttnn.ShardOrientation.ROW_MAJOR
+
+    if input_layout == "TILE_LAYOUT":
+        input_layout = ttnn.TILE_LAYOUT
+    else:
+        input_layout = ttnn.ROW_MAJOR_LAYOUT
+
+    return (
+        input_shape,
+        ttnn.CoreGrid(y=Y, x=X),
+        sharding_strategy,
+        shard_orientation,
+        tensor_hw_as_shard_shape,
+        input_layout,
+    )
diff --git a/...ramework/sweeps/eltwise/unary/isfinite.py → ...sweeps/eltwise/unary/isfinite/isfinite.py b/...ramework/sweeps/eltwise/unary/isfinite.py → ...sweeps/eltwise/unary/isfinite/isfinite.py
diff --git a/tests/sweep_framework/sweeps/eltwise/unary/isfinite/isfinite_sharded.py b/tests/sweep_framework/sweeps/eltwise/unary/isfinite/isfinite_sharded.py
@@ -0,0 +1,102 @@
+# SPDX-FileCopyrightText: © 2024 Tenstorrent Inc.
+
+# SPDX-License-Identifier: Apache-2.0
+
+from typing import Optional, Tuple
+from functools import partial
+
+import json
+import torch
+import random
+import ttnn
+import math
+from tests.sweep_framework.sweep_utils.utils import gen_shapes, sanitize_shape_rm
+from tests.sweep_framework.sweep_utils.sharding_utils import gen_sharded_spec_unary, parse_sharding_spec
+from tests.tt_eager.python_api_testing.sweep_tests.generation_funcs import gen_rand_inf
+
+from tests.ttnn.utils_for_testing import check_with_pcc, start_measuring_time, stop_measuring_time
+from models.utility_functions import torch_random
+
+# Override the default timeout in seconds for hang detection.
+TIMEOUT = 120
+
+random.seed(0)
+
+
+# Parameters provided to the test vector generator are defined here.
+# They are defined as dict-type suites that contain the arguments to the run function as keys, and lists of possible inputs as values.
+# Each suite has a key name (in this case "suite_1" and "suite_2") which will associate the test vectors to this specific suite of inputs.
+# Developers can create their own generator functions and pass them to the parameters as inputs.
+parameters = {
+    "nightly": {
+        "input_spec": gen_sharded_spec_unary(16, layouts=["TILE_LAYOUT"]),
+        "input_a_dtype": [ttnn.bfloat16, ttnn.bfloat8_b],
+    },
+}
+
+
+# Invalidate vector is called during the generation phase where each vector will be passed in.
+# If invalidated, the vector will still be stored but will be skipped.
+# Returns False, None if the vector is valid, and True, str with a reason for invalidation if it is invalid.
+def invalidate_vector(test_vector) -> Tuple[bool, Optional[str]]:
+    input_shape, X, Y, sharding_strategy, _, _, input_layout = test_vector["input_spec"].values()
+    pre_sharded_height = math.prod(input_shape[:-1])
+    pre_sharded_width = input_shape[-1]
+
+    if input_layout == "ROW_MAJOR_LAYOUT" and test_vector["input_a_dtype"] == ttnn.bfloat8_b:
+        return True, "bfloat8_b is only supported on tiled layout"
+
+    return False, None
+
+
+# This is the run instructions for the test, defined by the developer.
+# The run function must take the above-defined parameters as inputs.
+# The runner will call this run function with each test vector, and the returned results from this function will be stored.
+# If you defined a mesh_device_fixture above, the object you yielded will be passed into this function as 'device'. Otherwise, it will be the default ttnn device opened by the infra.
+def run(
+    input_spec,
+    input_a_dtype,
+    *,
+    device,
+) -> list:
+    data_seed = random.randint(0, 20000000)
+    torch.manual_seed(data_seed)
+
+    (
+        input_shape,
+        core_grid,
+        sharding_strategy,
+        shard_orientation,
+        tensor_hw_as_shard_shape,
+        input_layout,
+    ) = parse_sharding_spec(input_spec)
+
+    if input_layout == ttnn.ROW_MAJOR_LAYOUT:
+        input_shape = sanitize_shape_rm(input_shape)
+
+    torch_input_tensor_a = gen_rand_inf(input_shape, low=-100, high=100)
+    torch_output_tensor = torch.isfinite(torch_input_tensor_a)
+
+    sharded_config = ttnn.create_sharded_memory_config_(
+        shape=input_shape,
+        core_grid=core_grid,
+        strategy=sharding_strategy,
+        orientation=shard_orientation,
+        use_height_and_width_as_shard_shape=tensor_hw_as_shard_shape,
+    )
+
+    input_tensor_a = ttnn.from_torch(
+        torch_input_tensor_a,
+        dtype=input_a_dtype,
+        layout=input_layout,
+        device=device,
+        memory_config=sharded_config,
+    )
+
+    start_time = start_measuring_time()
+    output_tensor = ttnn.isfinite(input_tensor_a, memory_config=sharded_config)
+    e2e_perf = stop_measuring_time(start_time)
+    output_tensor = ttnn.to_torch(output_tensor)
+
+    pcc = check_with_pcc(torch_output_tensor, output_tensor, 0.999)
+    return [pcc, e2e_perf]
diff --git a/...p_framework/sweeps/eltwise/unary/isinf.py → ...ework/sweeps/eltwise/unary/isinf/isinf.py b/...p_framework/sweeps/eltwise/unary/isinf.py → ...ework/sweeps/eltwise/unary/isinf/isinf.py
diff --git a/tests/sweep_framework/sweeps/eltwise/unary/isinf/isinf_sharded.py b/tests/sweep_framework/sweeps/eltwise/unary/isinf/isinf_sharded.py
@@ -0,0 +1,102 @@
+# SPDX-FileCopyrightText: © 2024 Tenstorrent Inc.
+
+# SPDX-License-Identifier: Apache-2.0
+
+from typing import Optional, Tuple
+from functools import partial
+
+import json
+import torch
+import random
+import ttnn
+import math
+from tests.sweep_framework.sweep_utils.utils import gen_shapes, sanitize_shape_rm
+from tests.sweep_framework.sweep_utils.sharding_utils import gen_sharded_spec_unary, parse_sharding_spec
+from tests.tt_eager.python_api_testing.sweep_tests.generation_funcs import gen_rand_inf
+
+from tests.ttnn.utils_for_testing import check_with_pcc, start_measuring_time, stop_measuring_time
+from models.utility_functions import torch_random
+
+# Override the default timeout in seconds for hang detection.
+TIMEOUT = 120
+
+random.seed(0)
+
+
+# Parameters provided to the test vector generator are defined here.
+# They are defined as dict-type suites that contain the arguments to the run function as keys, and lists of possible inputs as values.
+# Each suite has a key name (in this case "suite_1" and "suite_2") which will associate the test vectors to this specific suite of inputs.
+# Developers can create their own generator functions and pass them to the parameters as inputs.
+parameters = {
+    "nightly": {
+        "input_spec": gen_sharded_spec_unary(16, layouts=["TILE_LAYOUT"]),
+        "input_a_dtype": [ttnn.bfloat16, ttnn.bfloat8_b],
+    },
+}
+
+
+# Invalidate vector is called during the generation phase where each vector will be passed in.
+# If invalidated, the vector will still be stored but will be skipped.
+# Returns False, None if the vector is valid, and True, str with a reason for invalidation if it is invalid.
+def invalidate_vector(test_vector) -> Tuple[bool, Optional[str]]:
+    input_shape, X, Y, sharding_strategy, _, _, input_layout = test_vector["input_spec"].values()
+    pre_sharded_height = math.prod(input_shape[:-1])
+    pre_sharded_width = input_shape[-1]
+
+    if input_layout == "ROW_MAJOR_LAYOUT" and test_vector["input_a_dtype"] == ttnn.bfloat8_b:
+        return True, "bfloat8_b is only supported on tiled layout"
+
+    return False, None
+
+
+# This is the run instructions for the test, defined by the developer.
+# The run function must take the above-defined parameters as inputs.
+# The runner will call this run function with each test vector, and the returned results from this function will be stored.
+# If you defined a mesh_device_fixture above, the object you yielded will be passed into this function as 'device'. Otherwise, it will be the default ttnn device opened by the infra.
+def run(
+    input_spec,
+    input_a_dtype,
+    *,
+    device,
+) -> list:
+    data_seed = random.randint(0, 20000000)
+    torch.manual_seed(data_seed)
+
+    (
+        input_shape,
+        core_grid,
+        sharding_strategy,
+        shard_orientation,
+        tensor_hw_as_shard_shape,
+        input_layout,
+    ) = parse_sharding_spec(input_spec)
+
+    if input_layout == ttnn.ROW_MAJOR_LAYOUT:
+        input_shape = sanitize_shape_rm(input_shape)
+
+    torch_input_tensor_a = gen_rand_inf(input_shape, low=-100, high=100)
+    torch_output_tensor = torch.isinf(torch_input_tensor_a)
+
+    sharded_config = ttnn.create_sharded_memory_config_(
+        shape=input_shape,
+        core_grid=core_grid,
+        strategy=sharding_strategy,
+        orientation=shard_orientation,
+        use_height_and_width_as_shard_shape=tensor_hw_as_shard_shape,
+    )
+
+    input_tensor_a = ttnn.from_torch(
+        torch_input_tensor_a,
+        dtype=input_a_dtype,
+        layout=input_layout,
+        device=device,
+        memory_config=sharded_config,
+    )
+
+    start_time = start_measuring_time()
+    output_tensor = ttnn.isinf(input_tensor_a, memory_config=sharded_config)
+    e2e_perf = stop_measuring_time(start_time)
+    output_tensor = ttnn.to_torch(output_tensor)
+
+    pcc = check_with_pcc(torch_output_tensor, output_tensor, 0.999)
+    return [pcc, e2e_perf]
diff --git a/...p_framework/sweeps/eltwise/unary/isnan.py → ...ework/sweeps/eltwise/unary/isnan/isnan.py b/...p_framework/sweeps/eltwise/unary/isnan.py → ...ework/sweeps/eltwise/unary/isnan/isnan.py