#15266: add sweeps based on traces (#15274)

### Ticket Link to Github Issue #15266 ### Problem description We need a way to track progress towards generality for mlir / pytorch2.0 ### What's changed Add sweeps based on traces for reduce, fused, and matmul ops. Running these and seeing the results will help with tracking. Tested locally that all the tests run and failures are expected. ### Checklist - [x] Post commit CI passes. Doesn't touch anything in post commit. Between https://github.com/tenstorrent/tt-metal/actions/runs/11942061016 and https://github.com/tenstorrent/tt-metal/actions/runs/11942440734 everything passed. - [ ] Blackhole Post commit (if applicable) N/A - [ ] Model regression CI testing passes (if applicable) N/A - [ ] Device performance regression CI testing passes (if applicable) N/A - [x] New/Existing tests provide coverage for changes
tenstorrent · Nov 21, 2024 · 8ffb0b3 · 8ffb0b3
1 parent aa01296
commit 8ffb0b3
Show file tree

Hide file tree

Showing 9 changed files with 752 additions and 0 deletions.
diff --git a/.github/workflows/ttnn-run-sweeps.yaml b/.github/workflows/ttnn-run-sweeps.yaml
@@ -267,6 +267,8 @@ on:
           - eltwise.ternary_backward.addcdiv_bw
           - embedding.embedding
           - embedding_bw.embedding_bw
+          - fused.softmax_traces
+          - fused.layer_norm_traces
           - reduction.backward.prod_bw.prod_bw
           - reduction.topk.topk
           - reduction.argmax.argmax
@@ -275,6 +277,11 @@ on:
           - reduction.var.var
           - reduction.std.std
           - reduction.mean.mean
+          - reduction.traces.argmax_traces
+          - reduction.traces.max_traces
+          - reduction.traces.mean_traces
+          - reduction.traces.sum_traces
+          - reduction.traces.topk_traces
           - matmul.full.matmul_default_block_sharded
           - matmul.full.matmul_default_height_sharded
           - matmul.full.matmul_default_interleaved
@@ -285,6 +292,7 @@ on:
           - matmul.short.matmul_user_program_config_mcast_1d
           - matmul.short.matmul_user_program_config_mcast_2d
           - matmul.short.matmul_user_program_config
+          - matmul.short.matmul_traces
           - matmul.short.matmul
           - losses.l1_loss
           - losses.mse_loss

diff --git a/tests/sweep_framework/sweeps/fused/layer_norm_traces.py b/tests/sweep_framework/sweeps/fused/layer_norm_traces.py
@@ -0,0 +1,133 @@
+# SPDX-FileCopyrightText: © 2023 Tenstorrent Inc.
+
+# SPDX-License-Identifier: Apache-2.0
+
+from typing import Optional, Tuple
+
+import torch
+
+import ttnn
+
+from tests.ttnn.utils_for_testing import check_with_pcc, start_measuring_time, stop_measuring_time
+from models.utility_functions import torch_random
+
+TIMEOUT = 15
+
+parameters = {
+    "default": {
+        "params": [
+            ((1, 1, 1024), [1024], 1e-05),
+            ((1, 1, 768), [768], 1e-05),
+            ((1, 10, 768), [768], 1e-05),
+            ((1, 1024, 160), [160], 1e-05),
+            ((1, 1024), [1024], 1e-12),
+            ((1, 12, 128), [128], 1e-12),
+            ((1, 12, 768), [768], 1e-12),
+            ((1, 1200, 320), [320], 1e-05),
+            ((1, 1370, 1280), [1280], 1e-06),
+            ((1, 14, 128), [128], 1e-12),
+            ((1, 14, 14, 1024), [1024], 1e-05),
+            ((1, 14, 14, 384), [384], 1e-05),
+            ((1, 14, 14, 512), [512], 1e-05),
+            ((1, 14, 14, 768), [768], 1e-05),
+            ((1, 14, 768), [768], 1e-12),
+            ((1, 1445, 192), [192], 1e-12),
+            ((1, 1500, 768), [768], 1e-05),
+            ((1, 16, 16, 384), [384], 1e-05),
+            ((1, 16, 16, 512), [512], 1e-05),
+            ((1, 16, 768), [768], 1e-12),
+            ((1, 16384, 32), [32], 1e-05),
+            ((1, 19, 1024), [1024], 1e-05),
+            ((1, 19200, 64), [64], 1e-05),
+            ((1, 196, 768), [768], 1e-06),
+            ((1, 197, 1024), [1024], 1e-06),
+            ((1, 197, 1024), [1024], 1e-12),
+            ((1, 197, 768), [768], 1e-06),
+            ((1, 197, 768), [768], 1e-12),
+            ((1, 2048, 768), [768], 1e-05),
+            ((1, 24, 768), [768], 1e-05),
+            ((1, 25, 768), [768], 1e-12),
+            ((1, 256, 1024), [1024], 1e-12),
+            ((1, 256, 1280), [1280], 1e-05),
+            ((1, 256, 160), [160], 1e-05),
+            ((1, 256, 256), [256], 1e-05),
+            ((1, 256, 32), [32], 1e-05),
+            ((1, 256, 512), [512], 1e-05),
+            ((1, 256, 64), [64], 1e-05),
+            ((1, 28, 28, 192), [192], 1e-05),
+            ((1, 28, 28, 256), [256], 1e-05),
+            ((1, 28, 28, 384), [384], 1e-05),
+            ((1, 28, 28, 512), [512], 1e-05),
+            ((1, 300, 128), [128], 1e-05),
+            ((1, 300, 320), [320], 1e-05),
+            ((1, 300, 512), [512], 1e-05),
+            ((1, 300, 64), [64], 1e-05),
+            ((1, 32, 1536), [1536], 1e-05),
+            ((1, 32, 32, 192), [192], 1e-05),
+            ((1, 32, 32, 256), [256], 1e-05),
+            ((1, 4, 768), [768], 1e-05),
+            ((1, 4096, 320), [320], 1e-05),
+            ((1, 4096, 64), [64], 1e-05),
+            ((1, 45, 768), [768], 1e-05),
+            ((1, 4800, 128), [128], 1e-05),
+            ((1, 5, 1024), [1024], 1e-05),
+            ((1, 50, 1024), [1024], 1e-06),
+            ((1, 50, 768), [768], 1e-05),
+            ((1, 50, 768), [768], 1e-06),
+            ((1, 56, 56, 128), [128], 1e-05),
+            ((1, 56, 56, 96), [96], 1e-05),
+            ((1, 59, 1024), [1024], 1e-05),
+            ((1, 64, 64, 128), [128], 1e-05),
+            ((1, 64, 64, 96), [96], 1e-05),
+            ((1, 7, 4544), [4544], 1e-05),
+            ((1, 7, 7, 1024), [1024], 1e-05),
+            ((1, 7, 7, 1536), [1536], 1e-05),
+            ((1, 7, 7, 2048), [2048], 1e-05),
+            ((1, 7, 7, 768), [768], 1e-05),
+            ((1, 7, 768), [768], 1e-05),
+            ((1, 768), [768], 1e-05),
+            ((1, 768), [768], 1e-12),
+            ((1, 8, 768), [768], 1e-12),
+            ((1, 8, 8, 1024), [1024], 1e-05),
+            ((1, 8, 8, 768), [768], 1e-05),
+            ((1, 9, 1024), [1024], 1e-12),
+            ((1, 9, 128), [128], 1e-12),
+            ((1, 9, 2048), [2048], 1e-12),
+            ((1, 9, 4096), [4096], 1e-12),
+            ((1, 9, 768), [768], 1e-12),
+            ((1, 100, 1280), [1280], 1e-05),
+            ((1, 100, 640), [640], 1e-05),
+            ((1, 500, 1280), [1280], 1e-05),
+            ((1, 500, 320), [320], 1e-05),
+            ((1, 500, 640), [640], 1e-05),
+            ((100, 1, 256), [256], 1e-05),
+            ((2, 7, 512), [512], 1e-05),
+            ((920, 1, 256), [256], 1e-05),
+        ],
+    }
+}
+
+
+def run(
+    params,
+    *,
+    device,
+) -> list:
+    [input_shape, normalized_shape, eps] = params
+    torch_input_tensor = torch.rand(input_shape, dtype=torch.float32)
+    torch_weight_tensor = torch.rand(normalized_shape, dtype=torch.float32)
+    torch_bias_tensor = torch.rand(normalized_shape, dtype=torch.float32)
+    torch_output_tensor = torch.layer_norm(
+        torch_input_tensor, normalized_shape, weight=torch_weight_tensor, bias=torch_bias_tensor, eps=eps
+    )
+
+    input_tensor = ttnn.from_torch(torch_input_tensor, dtype=ttnn.float32, layout=ttnn.TILE_LAYOUT, device=device)
+    weight_tensor = ttnn.from_torch(torch_weight_tensor, dtype=ttnn.float32, layout=ttnn.TILE_LAYOUT, device=device)
+    bias_tensor = ttnn.from_torch(torch_bias_tensor, dtype=ttnn.float32, layout=ttnn.TILE_LAYOUT, device=device)
+
+    start_time = start_measuring_time()
+    output_tensor = ttnn.layer_norm(input_tensor, weight=weight_tensor, bias=bias_tensor, epsilon=eps)
+    output_tensor = ttnn.to_torch(output_tensor)
+    e2e_perf = stop_measuring_time(start_time)
+    expected_pcc = 0.999
+    return [check_with_pcc(torch_output_tensor, output_tensor, expected_pcc), e2e_perf]
diff --git a/tests/sweep_framework/sweeps/fused/softmax_traces.py b/tests/sweep_framework/sweeps/fused/softmax_traces.py
@@ -0,0 +1,118 @@
+# SPDX-FileCopyrightText: © 2023 Tenstorrent Inc.
+
+# SPDX-License-Identifier: Apache-2.0
+
+from typing import Optional, Tuple
+
+import torch
+
+import ttnn
+
+from tests.ttnn.utils_for_testing import check_with_pcc, start_measuring_time, stop_measuring_time
+from models.utility_functions import torch_random
+
+TIMEOUT = 15
+
+parameters = {
+    "default": {
+        "params": [
+            ((1, 1, 16384, 256), -1, False),
+            ((1, 1, 19200, 300), -1, False),
+            ((1, 12, 1, 10), -1, False),
+            ((1, 12, 1, 1), -1, False),
+            ((1, 12, 1, 2), -1, False),
+            ((1, 12, 1, 46), -1, False),
+            ((1, 12, 1, 100 + 1), -1, False),
+            ((1, 12, 1, 1000 + 1), -1, False),
+            ((1, 12, 10, 10), -1, False),
+            ((1, 12, 12, 12), -1, False),
+            ((1, 12, 14, 14), -1, False),
+            ((1, 12, 16, 16), -1, False),
+            ((1, 12, 197, 197), -1, False),
+            ((1, 12, 25, 25), -1, False),
+            ((1, 12, 45, 45), -1, False),
+            ((1, 12, 7, 7), -1, False),
+            ((1, 12, 9, 9), -1, False),
+            ((1, 16, 1, 10), -1, False),
+            ((1, 16, 1, 1), -1, False),
+            ((1, 16, 1, 2), -1, False),
+            ((1, 16, 1, 6), -1, False),
+            ((1, 16, 1, 100 + 1), -1, False),
+            ((1, 16, 1, 1000 + 1), -1, False),
+            ((1, 16, 10, 10), -1, False),
+            ((1, 16, 197, 197), -1, False),
+            ((1, 16, 256, 256), -1, False),
+            ((1, 16, 32, 32), -1, False),
+            ((1, 16, 5, 5), -1, False),
+            ((1, 16, 9, 9), -1, False),
+            ((1, 2, 4096, 256), -1, False),
+            ((1, 2, 4800, 300), -1, False),
+            ((1, 24, 49, 49), -1, False),
+            ((1, 24, 64, 64), -1, False),
+            ((1, 3, 1445, 1445), -1, False),
+            ((1, 32, 49, 49), -1, False),
+            ((1, 32, 64, 64), -1, False),
+            ((1, 5, 1024, 256), -1, False),
+            ((1, 5, 1200, 300), -1, False),
+            ((1, 6, 1, 15), -1, False),
+            ((1, 6, 1, 17), -1, False),
+            ((1, 6, 1, 1), -1, False),
+            ((1, 6, 1, 2), -1, False),
+            ((1, 6, 1, 100 + 1), -1, False),
+            ((1, 6, 15, 15), -1, False),
+            ((1, 64, 9, 9), -1, False),
+            ((1, 71, 7, 7), -1, False),
+            ((1, 8, 1, 10), -1, False),
+            ((1, 8, 1, 1), -1, False),
+            ((1, 8, 1, 2), -1, False),
+            ((1, 8, 1, 100 + 1), -1, False),
+            ((1, 8, 10, 10), -1, False),
+            ((1, 8, 2048, 256), -1, False),
+            ((1, 8, 256, 2048), -1, False),
+            ((1, 8, 256, 256), -1, False),
+            ((1, 8, 300, 300), -1, False),
+            ((12, 24, 24), -1, False),
+            ((12, 50, 50), -1, False),
+            ((16, 1, 60), -1, False),
+            ((16, 1, 1000 + 1), -1, False),
+            ((16, 19, 19), -1, False),
+            ((16, 59, 59), -1, False),
+            ((16, 6, 49, 49), -1, False),
+            ((16, 6, 64, 64), -1, False),
+            ((16, 7, 7), -1, False),
+            ((16, 8, 49, 49), -1, False),
+            ((16, 8, 64, 64), -1, False),
+            ((4, 12, 49, 49), -1, False),
+            ((4, 12, 64, 64), -1, False),
+            ((4, 16, 49, 49), -1, False),
+            ((4, 16, 64, 64), -1, False),
+            ((64, 3, 49, 49), -1, False),
+            ((64, 3, 64, 64), -1, False),
+            ((64, 4, 49, 49), -1, False),
+            ((64, 4, 64, 64), -1, False),
+            ((8, 100, 100), -1, False),
+            ((8, 100, 920), -1, False),
+            ((8, 920, 920), -1, False),
+        ],
+    }
+}
+
+
+def run(
+    params,
+    *,
+    device,
+) -> list:
+    [input_shape, dim, half_to_float] = params
+    # TODO find out what half_to_float is supposed to mean in the provided traces
+    torch_input_tensor = torch.rand(input_shape, dtype=torch.float32)
+    torch_output_tensor = torch.softmax(torch_input_tensor, dim)
+
+    input_tensor = ttnn.from_torch(torch_input_tensor, dtype=ttnn.float32, layout=ttnn.TILE_LAYOUT, device=device)
+
+    start_time = start_measuring_time()
+    output_tensor = ttnn.softmax(input_tensor, dim)
+    output_tensor = ttnn.to_torch(output_tensor)
+    e2e_perf = stop_measuring_time(start_time)
+    expected_pcc = 0.989
+    return [check_with_pcc(torch_output_tensor, output_tensor, expected_pcc), e2e_perf]