From 8ffb0b3160f78c396b93bbf019ecc03e360d7183 Mon Sep 17 00:00:00 2001
From: Borys Bradel <164946524+bbradelTT@users.noreply.github.com>
Date: Thu, 21 Nov 2024 10:39:28 -0500
Subject: [PATCH] #15266: add sweeps based on traces (#15274)

### Ticket
Link to Github Issue #15266

### Problem description
We need a way to track progress towards generality for mlir / pytorch2.0

### What's changed
Add sweeps based on traces for reduce, fused, and matmul ops. Running
these and seeing the results will help with tracking.

Tested locally that all the tests run and failures are expected.

### Checklist
- [x] Post commit CI passes. Doesn't touch anything in post commit.
Between https://github.com/tenstorrent/tt-metal/actions/runs/11942061016
and https://github.com/tenstorrent/tt-metal/actions/runs/11942440734
everything passed.
- [ ] Blackhole Post commit (if applicable) N/A
- [ ] Model regression CI testing passes (if applicable) N/A
- [ ] Device performance regression CI testing passes (if applicable)
N/A
- [x] New/Existing tests provide coverage for changes
---
 .github/workflows/ttnn-run-sweeps.yaml        |   8 ++
 .../sweeps/fused/layer_norm_traces.py         | 133 +++++++++++++++++
 .../sweeps/fused/softmax_traces.py            | 118 +++++++++++++++
 .../sweeps/matmul/short/matmul_traces.py      | 136 ++++++++++++++++++
 .../sweeps/reduction/traces/argmax_traces.py  |  46 ++++++
 .../sweeps/reduction/traces/max_traces.py     |  44 ++++++
 .../sweeps/reduction/traces/mean_traces.py    | 127 ++++++++++++++++
 .../sweeps/reduction/traces/sum_traces.py     |  96 +++++++++++++
 .../sweeps/reduction/traces/topk_traces.py    |  44 ++++++
 9 files changed, 752 insertions(+)
 create mode 100644 tests/sweep_framework/sweeps/fused/layer_norm_traces.py
 create mode 100644 tests/sweep_framework/sweeps/fused/softmax_traces.py
 create mode 100644 tests/sweep_framework/sweeps/matmul/short/matmul_traces.py
 create mode 100644 tests/sweep_framework/sweeps/reduction/traces/argmax_traces.py
 create mode 100644 tests/sweep_framework/sweeps/reduction/traces/max_traces.py
 create mode 100644 tests/sweep_framework/sweeps/reduction/traces/mean_traces.py
 create mode 100644 tests/sweep_framework/sweeps/reduction/traces/sum_traces.py
 create mode 100644 tests/sweep_framework/sweeps/reduction/traces/topk_traces.py

diff --git a/.github/workflows/ttnn-run-sweeps.yaml b/.github/workflows/ttnn-run-sweeps.yaml
index f9e835ed3fe..e5817a80e19 100644
--- a/.github/workflows/ttnn-run-sweeps.yaml
+++ b/.github/workflows/ttnn-run-sweeps.yaml
@@ -267,6 +267,8 @@ on:
           - eltwise.ternary_backward.addcdiv_bw
           - embedding.embedding
           - embedding_bw.embedding_bw
+          - fused.softmax_traces
+          - fused.layer_norm_traces
           - reduction.backward.prod_bw.prod_bw
           - reduction.topk.topk
           - reduction.argmax.argmax
@@ -275,6 +277,11 @@ on:
           - reduction.var.var
           - reduction.std.std
           - reduction.mean.mean
+          - reduction.traces.argmax_traces
+          - reduction.traces.max_traces
+          - reduction.traces.mean_traces
+          - reduction.traces.sum_traces
+          - reduction.traces.topk_traces
           - matmul.full.matmul_default_block_sharded
           - matmul.full.matmul_default_height_sharded
           - matmul.full.matmul_default_interleaved
@@ -285,6 +292,7 @@ on:
           - matmul.short.matmul_user_program_config_mcast_1d
           - matmul.short.matmul_user_program_config_mcast_2d
           - matmul.short.matmul_user_program_config
+          - matmul.short.matmul_traces
           - matmul.short.matmul
           - losses.l1_loss
           - losses.mse_loss
diff --git a/tests/sweep_framework/sweeps/fused/layer_norm_traces.py b/tests/sweep_framework/sweeps/fused/layer_norm_traces.py
new file mode 100644
index 00000000000..c9641395119
--- /dev/null
+++ b/tests/sweep_framework/sweeps/fused/layer_norm_traces.py
@@ -0,0 +1,133 @@
+# SPDX-FileCopyrightText: © 2023 Tenstorrent Inc.
+
+# SPDX-License-Identifier: Apache-2.0
+
+from typing import Optional, Tuple
+
+import torch
+
+import ttnn
+
+from tests.ttnn.utils_for_testing import check_with_pcc, start_measuring_time, stop_measuring_time
+from models.utility_functions import torch_random
+
+TIMEOUT = 15
+
+parameters = {
+    "default": {
+        "params": [
+            ((1, 1, 1024), [1024], 1e-05),
+            ((1, 1, 768), [768], 1e-05),
+            ((1, 10, 768), [768], 1e-05),
+            ((1, 1024, 160), [160], 1e-05),
+            ((1, 1024), [1024], 1e-12),
+            ((1, 12, 128), [128], 1e-12),
+            ((1, 12, 768), [768], 1e-12),
+            ((1, 1200, 320), [320], 1e-05),
+            ((1, 1370, 1280), [1280], 1e-06),
+            ((1, 14, 128), [128], 1e-12),
+            ((1, 14, 14, 1024), [1024], 1e-05),
+            ((1, 14, 14, 384), [384], 1e-05),
+            ((1, 14, 14, 512), [512], 1e-05),
+            ((1, 14, 14, 768), [768], 1e-05),
+            ((1, 14, 768), [768], 1e-12),
+            ((1, 1445, 192), [192], 1e-12),
+            ((1, 1500, 768), [768], 1e-05),
+            ((1, 16, 16, 384), [384], 1e-05),
+            ((1, 16, 16, 512), [512], 1e-05),
+            ((1, 16, 768), [768], 1e-12),
+            ((1, 16384, 32), [32], 1e-05),
+            ((1, 19, 1024), [1024], 1e-05),
+            ((1, 19200, 64), [64], 1e-05),
+            ((1, 196, 768), [768], 1e-06),
+            ((1, 197, 1024), [1024], 1e-06),
+            ((1, 197, 1024), [1024], 1e-12),
+            ((1, 197, 768), [768], 1e-06),
+            ((1, 197, 768), [768], 1e-12),
+            ((1, 2048, 768), [768], 1e-05),
+            ((1, 24, 768), [768], 1e-05),
+            ((1, 25, 768), [768], 1e-12),
+            ((1, 256, 1024), [1024], 1e-12),
+            ((1, 256, 1280), [1280], 1e-05),
+            ((1, 256, 160), [160], 1e-05),
+            ((1, 256, 256), [256], 1e-05),
+            ((1, 256, 32), [32], 1e-05),
+            ((1, 256, 512), [512], 1e-05),
+            ((1, 256, 64), [64], 1e-05),
+            ((1, 28, 28, 192), [192], 1e-05),
+            ((1, 28, 28, 256), [256], 1e-05),
+            ((1, 28, 28, 384), [384], 1e-05),
+            ((1, 28, 28, 512), [512], 1e-05),
+            ((1, 300, 128), [128], 1e-05),
+            ((1, 300, 320), [320], 1e-05),
+            ((1, 300, 512), [512], 1e-05),
+            ((1, 300, 64), [64], 1e-05),
+            ((1, 32, 1536), [1536], 1e-05),
+            ((1, 32, 32, 192), [192], 1e-05),
+            ((1, 32, 32, 256), [256], 1e-05),
+            ((1, 4, 768), [768], 1e-05),
+            ((1, 4096, 320), [320], 1e-05),
+            ((1, 4096, 64), [64], 1e-05),
+            ((1, 45, 768), [768], 1e-05),
+            ((1, 4800, 128), [128], 1e-05),
+            ((1, 5, 1024), [1024], 1e-05),
+            ((1, 50, 1024), [1024], 1e-06),
+            ((1, 50, 768), [768], 1e-05),
+            ((1, 50, 768), [768], 1e-06),
+            ((1, 56, 56, 128), [128], 1e-05),
+            ((1, 56, 56, 96), [96], 1e-05),
+            ((1, 59, 1024), [1024], 1e-05),
+            ((1, 64, 64, 128), [128], 1e-05),
+            ((1, 64, 64, 96), [96], 1e-05),
+            ((1, 7, 4544), [4544], 1e-05),
+            ((1, 7, 7, 1024), [1024], 1e-05),
+            ((1, 7, 7, 1536), [1536], 1e-05),
+            ((1, 7, 7, 2048), [2048], 1e-05),
+            ((1, 7, 7, 768), [768], 1e-05),
+            ((1, 7, 768), [768], 1e-05),
+            ((1, 768), [768], 1e-05),
+            ((1, 768), [768], 1e-12),
+            ((1, 8, 768), [768], 1e-12),
+            ((1, 8, 8, 1024), [1024], 1e-05),
+            ((1, 8, 8, 768), [768], 1e-05),
+            ((1, 9, 1024), [1024], 1e-12),
+            ((1, 9, 128), [128], 1e-12),
+            ((1, 9, 2048), [2048], 1e-12),
+            ((1, 9, 4096), [4096], 1e-12),
+            ((1, 9, 768), [768], 1e-12),
+            ((1, 100, 1280), [1280], 1e-05),
+            ((1, 100, 640), [640], 1e-05),
+            ((1, 500, 1280), [1280], 1e-05),
+            ((1, 500, 320), [320], 1e-05),
+            ((1, 500, 640), [640], 1e-05),
+            ((100, 1, 256), [256], 1e-05),
+            ((2, 7, 512), [512], 1e-05),
+            ((920, 1, 256), [256], 1e-05),
+        ],
+    }
+}
+
+
+def run(
+    params,
+    *,
+    device,
+) -> list:
+    [input_shape, normalized_shape, eps] = params
+    torch_input_tensor = torch.rand(input_shape, dtype=torch.float32)
+    torch_weight_tensor = torch.rand(normalized_shape, dtype=torch.float32)
+    torch_bias_tensor = torch.rand(normalized_shape, dtype=torch.float32)
+    torch_output_tensor = torch.layer_norm(
+        torch_input_tensor, normalized_shape, weight=torch_weight_tensor, bias=torch_bias_tensor, eps=eps
+    )
+
+    input_tensor = ttnn.from_torch(torch_input_tensor, dtype=ttnn.float32, layout=ttnn.TILE_LAYOUT, device=device)
+    weight_tensor = ttnn.from_torch(torch_weight_tensor, dtype=ttnn.float32, layout=ttnn.TILE_LAYOUT, device=device)
+    bias_tensor = ttnn.from_torch(torch_bias_tensor, dtype=ttnn.float32, layout=ttnn.TILE_LAYOUT, device=device)
+
+    start_time = start_measuring_time()
+    output_tensor = ttnn.layer_norm(input_tensor, weight=weight_tensor, bias=bias_tensor, epsilon=eps)
+    output_tensor = ttnn.to_torch(output_tensor)
+    e2e_perf = stop_measuring_time(start_time)
+    expected_pcc = 0.999
+    return [check_with_pcc(torch_output_tensor, output_tensor, expected_pcc), e2e_perf]
diff --git a/tests/sweep_framework/sweeps/fused/softmax_traces.py b/tests/sweep_framework/sweeps/fused/softmax_traces.py
new file mode 100644
index 00000000000..e9cc6dbe7bb
--- /dev/null
+++ b/tests/sweep_framework/sweeps/fused/softmax_traces.py
@@ -0,0 +1,118 @@
+# SPDX-FileCopyrightText: © 2023 Tenstorrent Inc.
+
+# SPDX-License-Identifier: Apache-2.0
+
+from typing import Optional, Tuple
+
+import torch
+
+import ttnn
+
+from tests.ttnn.utils_for_testing import check_with_pcc, start_measuring_time, stop_measuring_time
+from models.utility_functions import torch_random
+
+TIMEOUT = 15
+
+parameters = {
+    "default": {
+        "params": [
+            ((1, 1, 16384, 256), -1, False),
+            ((1, 1, 19200, 300), -1, False),
+            ((1, 12, 1, 10), -1, False),
+            ((1, 12, 1, 1), -1, False),
+            ((1, 12, 1, 2), -1, False),
+            ((1, 12, 1, 46), -1, False),
+            ((1, 12, 1, 100 + 1), -1, False),
+            ((1, 12, 1, 1000 + 1), -1, False),
+            ((1, 12, 10, 10), -1, False),
+            ((1, 12, 12, 12), -1, False),
+            ((1, 12, 14, 14), -1, False),
+            ((1, 12, 16, 16), -1, False),
+            ((1, 12, 197, 197), -1, False),
+            ((1, 12, 25, 25), -1, False),
+            ((1, 12, 45, 45), -1, False),
+            ((1, 12, 7, 7), -1, False),
+            ((1, 12, 9, 9), -1, False),
+            ((1, 16, 1, 10), -1, False),
+            ((1, 16, 1, 1), -1, False),
+            ((1, 16, 1, 2), -1, False),
+            ((1, 16, 1, 6), -1, False),
+            ((1, 16, 1, 100 + 1), -1, False),
+            ((1, 16, 1, 1000 + 1), -1, False),
+            ((1, 16, 10, 10), -1, False),
+            ((1, 16, 197, 197), -1, False),
+            ((1, 16, 256, 256), -1, False),
+            ((1, 16, 32, 32), -1, False),
+            ((1, 16, 5, 5), -1, False),
+            ((1, 16, 9, 9), -1, False),
+            ((1, 2, 4096, 256), -1, False),
+            ((1, 2, 4800, 300), -1, False),
+            ((1, 24, 49, 49), -1, False),
+            ((1, 24, 64, 64), -1, False),
+            ((1, 3, 1445, 1445), -1, False),
+            ((1, 32, 49, 49), -1, False),
+            ((1, 32, 64, 64), -1, False),
+            ((1, 5, 1024, 256), -1, False),
+            ((1, 5, 1200, 300), -1, False),
+            ((1, 6, 1, 15), -1, False),
+            ((1, 6, 1, 17), -1, False),
+            ((1, 6, 1, 1), -1, False),
+            ((1, 6, 1, 2), -1, False),
+            ((1, 6, 1, 100 + 1), -1, False),
+            ((1, 6, 15, 15), -1, False),
+            ((1, 64, 9, 9), -1, False),
+            ((1, 71, 7, 7), -1, False),
+            ((1, 8, 1, 10), -1, False),
+            ((1, 8, 1, 1), -1, False),
+            ((1, 8, 1, 2), -1, False),
+            ((1, 8, 1, 100 + 1), -1, False),
+            ((1, 8, 10, 10), -1, False),
+            ((1, 8, 2048, 256), -1, False),
+            ((1, 8, 256, 2048), -1, False),
+            ((1, 8, 256, 256), -1, False),
+            ((1, 8, 300, 300), -1, False),
+            ((12, 24, 24), -1, False),
+            ((12, 50, 50), -1, False),
+            ((16, 1, 60), -1, False),
+            ((16, 1, 1000 + 1), -1, False),
+            ((16, 19, 19), -1, False),
+            ((16, 59, 59), -1, False),
+            ((16, 6, 49, 49), -1, False),
+            ((16, 6, 64, 64), -1, False),
+            ((16, 7, 7), -1, False),
+            ((16, 8, 49, 49), -1, False),
+            ((16, 8, 64, 64), -1, False),
+            ((4, 12, 49, 49), -1, False),
+            ((4, 12, 64, 64), -1, False),
+            ((4, 16, 49, 49), -1, False),
+            ((4, 16, 64, 64), -1, False),
+            ((64, 3, 49, 49), -1, False),
+            ((64, 3, 64, 64), -1, False),
+            ((64, 4, 49, 49), -1, False),
+            ((64, 4, 64, 64), -1, False),
+            ((8, 100, 100), -1, False),
+            ((8, 100, 920), -1, False),
+            ((8, 920, 920), -1, False),
+        ],
+    }
+}
+
+
+def run(
+    params,
+    *,
+    device,
+) -> list:
+    [input_shape, dim, half_to_float] = params
+    # TODO find out what half_to_float is supposed to mean in the provided traces
+    torch_input_tensor = torch.rand(input_shape, dtype=torch.float32)
+    torch_output_tensor = torch.softmax(torch_input_tensor, dim)
+
+    input_tensor = ttnn.from_torch(torch_input_tensor, dtype=ttnn.float32, layout=ttnn.TILE_LAYOUT, device=device)
+
+    start_time = start_measuring_time()
+    output_tensor = ttnn.softmax(input_tensor, dim)
+    output_tensor = ttnn.to_torch(output_tensor)
+    e2e_perf = stop_measuring_time(start_time)
+    expected_pcc = 0.989
+    return [check_with_pcc(torch_output_tensor, output_tensor, expected_pcc), e2e_perf]
diff --git a/tests/sweep_framework/sweeps/matmul/short/matmul_traces.py b/tests/sweep_framework/sweeps/matmul/short/matmul_traces.py
new file mode 100644
index 00000000000..c66e5548834
--- /dev/null
+++ b/tests/sweep_framework/sweeps/matmul/short/matmul_traces.py
@@ -0,0 +1,136 @@
+# SPDX-FileCopyrightText: © 2023 Tenstorrent Inc.
+
+# SPDX-License-Identifier: Apache-2.0
+
+from typing import Optional, Tuple
+
+import torch
+
+import ttnn
+
+from tests.ttnn.utils_for_testing import check_with_pcc, start_measuring_time, stop_measuring_time
+from models.utility_functions import torch_random
+
+TIMEOUT = 70
+
+parameters = {
+    "default": {
+        "params": [
+            (1, 1024, 1024, 1024),
+            (1, 1024, 1024, 3072),
+            (1, 1024, 1024, 32128),
+            (1, 1024, 1024, 4096),
+            (1, 1024, 1024, 512),
+            (1, 10, 10, 128),
+            (1, 128, 128, 9216),
+            (1, 2048, 2048, 512),
+            (1, 3072, 3072, 768),
+            (1, 384, 384, 512),
+            (1, 4096, 4096, 1024),
+            (1, 512, 512, 1024),
+            (1, 512, 512, 2048),
+            (1, 512, 512, 32128),
+            (1, 512, 512, 384),
+            (1, 512, 512, 50272),
+            (1, 512, 512, 512),
+            (1, 768, 768, 3072),
+            (1, 768, 768, 32128),
+            (1, 768, 768, 50257),
+            (1, 768, 768, 512),
+            (1, 768, 768, 51865),
+            (1, 768, 768, 768),
+            (10, 1024, 1024, 1024),
+            (10, 1024, 1024, 4096),
+            (10, 1, 1, 128),
+            (10, 2048, 2048, 512),
+            (10, 3072, 3072, 768),
+            (10, 4096, 4096, 1024),
+            (10, 512, 512, 2048),
+            (10, 512, 512, 512),
+            (10, 768, 768, 3072),
+            (10, 768, 768, 768),
+            (1024, 160, 160, 256),
+            (1024, 384, 384, 192),
+            (1024, 512, 512, 256),
+            (1024, 640, 640, 640),
+            (128, 1, 1, 9216),
+            (15, 1024, 1024, 512),
+            (15, 384, 384, 512),
+            (15, 512, 512, 1024),
+            (15, 512, 512, 384),
+            (1500, 768, 768, 768),
+            (16384, 32, 32, 256),
+            (19, 1024, 1024, 256008),
+            (196, 1024, 1024, 512),
+            (196, 768, 768, 384),
+            (197, 1024, 1024, 1024),
+            (197, 768, 768, 768),
+            (2, 512, 512, 1),
+            (2, 512, 512, 512),
+            (2048, 768, 768, 262),
+            (225, 512, 512, 12),
+            (225, 512, 512, 16),
+            (225, 512, 512, 24),
+            (225, 512, 512, 32),
+            (225, 512, 512, 3),
+            (225, 512, 512, 4),
+            (225, 512, 512, 6),
+            (225, 512, 512, 8),
+            (256, 1024, 1024, 512),
+            (256, 1280, 1280, 1280),
+            (256, 256, 256, 256),
+            (256, 768, 768, 384),
+            (32, 1536, 1536, 250880),
+            (4, 768, 768, 51865),
+            (4, 768, 768, 768),
+            (4096, 320, 320, 320),
+            (4096, 64, 64, 256),
+            (45, 768, 768, 50257),
+            (45, 768, 768, 768),
+            (49, 1536, 1536, 768),
+            (49, 2048, 2048, 1024),
+            (5, 1024, 1024, 1024),
+            (5, 1024, 1024, 3072),
+            (59, 1024, 1024, 512),
+            (59, 512, 512, 1024),
+            (59, 512, 512, 50272),
+            (64, 1280, 1280, 1280),
+            (64, 1536, 1536, 768),
+            (64, 2048, 2048, 1024),
+            (7, 18176, 18176, 4544),
+            (7, 4544, 4544, 18176),
+            (7, 4544, 4544, 4544),
+            (7, 4544, 4544, 4672),
+            (7, 4544, 4544, 65024),
+            (7, 768, 768, 2),
+            (768, 196, 196, 384),
+            (784, 384, 384, 192),
+            (784, 512, 512, 256),
+            (9, 768, 768, 1280),
+            (9, 768, 768, 320),
+            (9, 768, 768, 640),
+            (920, 256, 256, 256),
+        ],
+    }
+}
+
+
+def run(
+    params,
+    *,
+    device,
+) -> list:
+    [in0_h, in0_w, in1_h, in1_w] = params
+    torch_input_tensor0 = torch.rand([in0_h, in0_w], dtype=torch.float32)
+    torch_input_tensor1 = torch.rand([in1_h, in1_w], dtype=torch.float32)
+    torch_output_tensor = torch.matmul(torch_input_tensor0, torch_input_tensor1)
+
+    input_tensor0 = ttnn.from_torch(torch_input_tensor0, dtype=ttnn.float32, layout=ttnn.TILE_LAYOUT, device=device)
+    input_tensor1 = ttnn.from_torch(torch_input_tensor1, dtype=ttnn.float32, layout=ttnn.TILE_LAYOUT, device=device)
+
+    start_time = start_measuring_time()
+    output_tensor = ttnn.matmul(input_tensor0, input_tensor1)
+    output_tensor = ttnn.to_torch(output_tensor)
+    e2e_perf = stop_measuring_time(start_time)
+    expected_pcc = 0.99
+    return [check_with_pcc(torch_output_tensor, output_tensor, expected_pcc), e2e_perf]
diff --git a/tests/sweep_framework/sweeps/reduction/traces/argmax_traces.py b/tests/sweep_framework/sweeps/reduction/traces/argmax_traces.py
new file mode 100644
index 00000000000..07e7d542ad8
--- /dev/null
+++ b/tests/sweep_framework/sweeps/reduction/traces/argmax_traces.py
@@ -0,0 +1,46 @@
+# SPDX-FileCopyrightText: © 2023 Tenstorrent Inc.
+
+# SPDX-License-Identifier: Apache-2.0
+
+from typing import Optional, Tuple
+
+import torch
+
+import ttnn
+
+from tests.ttnn.utils_for_testing import check_with_pcc, start_measuring_time, stop_measuring_time
+from models.utility_functions import torch_random
+
+TIMEOUT = 15
+
+parameters = {
+    "default": {
+        "height": [1, 2],
+        "width": [7, 51865],
+        "dim": [-1],
+        "dtype": [ttnn.float32, ttnn.bfloat16],
+        "layout": [ttnn.TILE_LAYOUT, ttnn.ROW_MAJOR_LAYOUT],
+    }
+}
+
+
+def run(
+    height,
+    width,
+    dim,
+    dtype,
+    layout,
+    *,
+    device,
+) -> list:
+    torch_input_tensor = torch.rand([height, width], dtype=torch.float32)
+    torch_output_tensor = torch.argmax(torch_input_tensor, dim)
+
+    input_tensor = ttnn.from_torch(torch_input_tensor, dtype=dtype, layout=layout, device=device)
+
+    start_time = start_measuring_time()
+    output_tensor = ttnn.argmax(input_tensor, dim=dim)
+    output_tensor = ttnn.to_torch(output_tensor)
+    e2e_perf = stop_measuring_time(start_time)
+    expected_pcc = 0.9999
+    return [check_with_pcc(torch_output_tensor, output_tensor, expected_pcc), e2e_perf]
diff --git a/tests/sweep_framework/sweeps/reduction/traces/max_traces.py b/tests/sweep_framework/sweeps/reduction/traces/max_traces.py
new file mode 100644
index 00000000000..93bd1187937
--- /dev/null
+++ b/tests/sweep_framework/sweeps/reduction/traces/max_traces.py
@@ -0,0 +1,44 @@
+# SPDX-FileCopyrightText: © 2023 Tenstorrent Inc.
+
+# SPDX-License-Identifier: Apache-2.0
+
+from typing import Optional, Tuple
+
+import torch
+
+import ttnn
+
+from tests.ttnn.utils_for_testing import check_with_pcc, start_measuring_time, stop_measuring_time
+from models.utility_functions import torch_random
+
+TIMEOUT = 15
+
+parameters = {
+    "default": {
+        "height": [25],
+        "width": [4],
+        "dtype": [ttnn.float32, ttnn.bfloat16],
+        "layout": [ttnn.TILE_LAYOUT, ttnn.ROW_MAJOR_LAYOUT],
+    }
+}
+
+
+def run(
+    height,
+    width,
+    dtype,
+    layout,
+    *,
+    device,
+) -> list:
+    torch_input_tensor = torch.rand([height, width], dtype=torch.float32)
+    torch_output_tensor = torch.max(torch_input_tensor)
+
+    input_tensor = ttnn.from_torch(torch_input_tensor, dtype=dtype, layout=layout, device=device)
+
+    start_time = start_measuring_time()
+    output_tensor = ttnn.max(input_tensor)
+    output_tensor = ttnn.to_torch(output_tensor)
+    e2e_perf = stop_measuring_time(start_time)
+    expected_pcc = 0.9999
+    return [check_with_pcc(torch_output_tensor, output_tensor, expected_pcc), e2e_perf]
diff --git a/tests/sweep_framework/sweeps/reduction/traces/mean_traces.py b/tests/sweep_framework/sweeps/reduction/traces/mean_traces.py
new file mode 100644
index 00000000000..921bb7f4c97
--- /dev/null
+++ b/tests/sweep_framework/sweeps/reduction/traces/mean_traces.py
@@ -0,0 +1,127 @@
+# SPDX-FileCopyrightText: © 2023 Tenstorrent Inc.
+
+# SPDX-License-Identifier: Apache-2.0
+
+from typing import Optional, Tuple
+
+import torch
+
+import ttnn
+
+from tests.ttnn.utils_for_testing import check_with_pcc, start_measuring_time, stop_measuring_time
+from models.utility_functions import torch_random
+
+TIMEOUT = 15
+
+parameters = {
+    "default": {
+        "params": [
+            ((1, 1, 1024), (-1), True),
+            ((1, 1, 512), (-1), True),
+            ((1, 1, 768), (-1), True),
+            ((1, 10, 1024), (-1), True),
+            ((1, 10, 512), (-1), True),
+            ((1, 10, 768), (-1), True),
+            ((1, 1008, 7, 7), (-1, -2), True),
+            ((1, 1024, 7, 7), (-1, -2), True),
+            ((1, 1024, 7, 7), (2, 3), True),
+            ((1, 1024, 8, 8), (-1, -2), True),
+            ((1, 104, 28, 28), (-1, -2), True),
+            ((1, 1056, 48, 48), (-1, -2), True),
+            ((1, 120, 14, 14), (-1, -2), True),
+            ((1, 120, 28, 28), (-1, -2), True),
+            ((1, 120, 28, 28), (2, 3), True),
+            ((1, 120, 40, 40), (-1, -2), True),
+            ((1, 1232, 14, 14), (-1, -2), True),
+            ((1, 1280, 10, 10), (-1, -2), True),
+            ((1, 1280, 12, 12), (-1, -2), True),
+            ((1, 1280, 7, 7), (-1, -2), True),
+            ((1, 1280, 8, 8), (-1, -2), True),
+            ((1, 1280, 9, 9), (-1, -2), True),
+            ((1, 1392, 14, 14), (-1, -2), True),
+            ((1, 144, 14, 14), (-1, -2), True),
+            ((1, 144, 28, 28), (-1, -2), True),
+            ((1, 15, 512), (-1), True),
+            ((1, 1512, 7, 7), (-1, -2), True),
+            ((1, 1536, 8, 8), (-1, -2), True),
+            ((1, 16, 56, 56), (-1, -2), True),
+            ((1, 1664, 7, 7), (-1, -2), True),
+            ((1, 1920, 7, 7), (-1, -2), True),
+            ((1, 196, 1024), (1), False),
+            ((1, 196, 768), (1), False),
+            ((1, 2016, 7, 7), (-1, -2), True),
+            ((1, 2048, 10, 10), (-1, -2), True),
+            ((1, 2048, 7, 7), (-1, -2), True),
+            ((1, 208, 14, 14), (-1, -2), True),
+            ((1, 216, 28, 28), (-1, -2), True),
+            ((1, 2208, 7, 7), (-1, -2), True),
+            ((1, 224, 56, 56), (-1, -2), True),
+            ((1, 232, 56, 56), (-1, -2), True),
+            ((1, 240, 14, 14), (-1, -2), True),
+            ((1, 2520, 7, 7), (-1, -2), True),
+            ((1, 256, 56, 56), (2, 3), True),
+            ((1, 288, 7, 7), (-1, -2), True),
+            ((1, 2904, 24, 24), (-1, -2), True),
+            ((1, 3024, 7, 7), (-1, -2), True),
+            ((1, 320, 14, 14), (-1, -2), True),
+            ((1, 336, 14, 14), (-1, -2), True),
+            ((1, 3712, 7, 7), (-1, -2), True),
+            ((1, 400, 7, 7), (-1, -2), True),
+            ((1, 440, 7, 7), (-1, -2), True),
+            ((1, 448, 28, 28), (-1, -2), True),
+            ((1, 48, 56, 56), (-1, -2), True),
+            ((1, 480, 10, 10), (-1, -2), True),
+            ((1, 480, 14, 14), (-1, -2), True),
+            ((1, 480, 14, 14), (2, 3), True),
+            ((1, 480, 20, 20), (-1, -2), True),
+            ((1, 512, 256), (2), False),
+            ((1, 512, 28, 28), (2, 3), True),
+            ((1, 512, 7, 7), (-1, -2), True),
+            ((1, 528, 96, 96), (-1, -2), True),
+            ((1, 576, 14, 14), (-1, -2), True),
+            ((1, 576, 7, 7), (-1, -2), True),
+            ((1, 64, 56, 56), (-1, -2), True),
+            ((1, 672, 10, 10), (-1, -2), True),
+            ((1, 672, 14, 14), (-1, -2), True),
+            ((1, 672, 14, 14), (2, 3), True),
+            ((1, 672, 20, 20), (-1, -2), True),
+            ((1, 672, 7, 7), (-1, -2), True),
+            ((1, 672, 7, 7), (2, 3), True),
+            ((1, 696, 28, 28), (-1, -2), True),
+            ((1, 72, 28, 28), (-1, -2), True),
+            ((1, 72, 28, 28), (2, 3), True),
+            ((1, 72, 40, 40), (-1, -2), True),
+            ((1, 72, 56, 56), (-1, -2), True),
+            ((1, 7392, 12, 12), (-1, -2), True),
+            ((1, 768, 14, 14), (2, 3), True),
+            ((1, 768, 7, 7), (-1, -2), True),
+            ((1, 768, 8, 8), (-1, -2), True),
+            ((1, 784, 7, 7), (-1, -2), True),
+            ((1, 888, 7, 7), (-1, -2), True),
+            ((1, 896, 14, 14), (-1, -2), True),
+            ((1, 912, 7, 7), (-1, -2), True),
+            ((1, 96, 14, 14), (-1, -2), True),
+            ((1, 960, 7, 7), (-1, -2), True),
+            ((1, 960, 7, 7), (2, 3), True),
+        ],
+    }
+}
+
+
+def run(
+    params,
+    *,
+    device,
+) -> list:
+    [input_shape, dim, keepdim] = params
+    torch_input_tensor = torch.rand(input_shape, dtype=torch.float32)
+    torch_output_tensor = torch.mean(torch_input_tensor, dim, keepdim)
+
+    input_tensor = ttnn.from_torch(torch_input_tensor, dtype=ttnn.float32, layout=ttnn.TILE_LAYOUT, device=device)
+
+    start_time = start_measuring_time()
+    output_tensor = ttnn.mean(input_tensor, dim=dim, keepdim=keepdim)
+    output_tensor = ttnn.to_torch(output_tensor)
+    e2e_perf = stop_measuring_time(start_time)
+    expected_pcc = 0.9999
+    return [check_with_pcc(torch_output_tensor, output_tensor, expected_pcc), e2e_perf]
diff --git a/tests/sweep_framework/sweeps/reduction/traces/sum_traces.py b/tests/sweep_framework/sweeps/reduction/traces/sum_traces.py
new file mode 100644
index 00000000000..94b0c4c4c15
--- /dev/null
+++ b/tests/sweep_framework/sweeps/reduction/traces/sum_traces.py
@@ -0,0 +1,96 @@
+# SPDX-FileCopyrightText: © 2023 Tenstorrent Inc.
+
+# SPDX-License-Identifier: Apache-2.0
+
+from typing import Optional, Tuple
+
+import torch
+
+import ttnn
+
+from tests.ttnn.utils_for_testing import check_with_pcc, start_measuring_time, stop_measuring_time
+from models.utility_functions import torch_random
+
+TIMEOUT = 15
+
+parameters = {
+    "default": {
+        "params": [
+            ((1, 1, 768), (0, 1), True),
+            ((1, 1000), (0), True),
+            ((1, 1024, 256), (0, 1), True),
+            ((1, 1024, 7, 7), (2, 3), True),
+            ((1, 10), (0), True),
+            ((1, 12, 16), (1), False),
+            ((1, 12, 16), (2), False),
+            ((1, 120, 28, 28), (2, 3), True),
+            ((1, 128), (0), True),
+            ((1, 12), (0), True),
+            ((1, 16384, 256), (0, 1), True),
+            ((1, 197, 1024), (0, 1), True),
+            ((1, 197, 768), (0, 1), True),
+            ((1, 21843), (0), True),
+            ((1, 256, 256), (0, 1), True),
+            ((1, 256, 56, 56), (2, 3), True),
+            ((1, 3), (0), True),
+            ((1, 4096, 256), (0, 1), True),
+            ((1, 480, 14, 14), (2, 3), True),
+            ((1, 512, 28, 28), (2, 3), True),
+            ((1, 512), (1), True),
+            ((1, 64), (0), True),
+            ((1, 672, 14, 14), (2, 3), True),
+            ((1, 672, 7, 7), (2, 3), True),
+            ((1, 72, 28, 28), (2, 3), True),
+            ((1, 768, 14, 14), (2, 3), True),
+            ((1, 768, 384), (0, 1), True),
+            ((1, 784), (0), True),
+            ((1, 960, 7, 7), (2, 3), True),
+            ((1024, 160), (0), True),
+            ((1024, 640), (0), True),
+            ((14, 2048), (0), True),
+            ((14, 512), (0), True),
+            ((16384, 128), (0), True),
+            ((16384, 32), (0), True),
+            ((196, 3072), (0), True),
+            ((196, 768), (0), True),
+            ((197, 1024), (0), True),
+            ((197, 3072), (0), True),
+            ((197, 4096), (0), True),
+            ((197, 768), (0), True),
+            ((2, 512), (1), True),
+            ((2, 7, 512), (0), True),
+            ((256, 1024), (0), True),
+            ((256, 160), (0), True),
+            ((256, 256), (0), True),
+            ((256, 32), (0), True),
+            ((256, 512), (0), True),
+            ((256, 64), (0), True),
+            ((4096, 256), (0), True),
+            ((4096, 64), (0), True),
+            ((50, 3072), (0), True),
+            ((50, 768), (0), True),
+            ((768, 196), (0), True),
+            ((2, 1), None, False),
+            ((1), None, False),
+        ],
+    }
+}
+
+
+def run(
+    params,
+    *,
+    device,
+) -> list:
+    [input_shape, dim, keepdim] = params
+    torch_input_tensor = torch.rand(input_shape, dtype=torch.float32)
+    torch_output_tensor = torch.sum(torch_input_tensor, dim, keepdim)
+
+    input_tensor = ttnn.from_torch(torch_input_tensor, dtype=ttnn.float32, layout=ttnn.TILE_LAYOUT, device=device)
+
+    start_time = start_measuring_time()
+    output_tensor = ttnn.sum(input_tensor, dim=dim, keepdim=keepdim)
+    output_tensor = ttnn.to_torch(output_tensor)
+    e2e_perf = stop_measuring_time(start_time)
+    expected_pcc = 0.9999
+    return [check_with_pcc(torch_output_tensor, output_tensor, expected_pcc), e2e_perf]
diff --git a/tests/sweep_framework/sweeps/reduction/traces/topk_traces.py b/tests/sweep_framework/sweeps/reduction/traces/topk_traces.py
new file mode 100644
index 00000000000..51de7b89117
--- /dev/null
+++ b/tests/sweep_framework/sweeps/reduction/traces/topk_traces.py
@@ -0,0 +1,44 @@
+# SPDX-FileCopyrightText: © 2023 Tenstorrent Inc.
+
+# SPDX-License-Identifier: Apache-2.0
+
+from typing import Optional, Tuple
+
+import torch
+
+import ttnn
+
+from tests.ttnn.utils_for_testing import check_with_pcc, start_measuring_time, stop_measuring_time
+from models.utility_functions import torch_random
+
+TIMEOUT = 15
+
+parameters = {
+    "default": {
+        "params": [
+            ((1, 5), 3),
+            ((1, 32), 3),
+            ((1, 50), 50),
+            ((1, 50), 50257),
+        ],
+    }
+}
+
+
+def run(
+    params,
+    *,
+    device,
+) -> list:
+    [input_shape, k] = params
+    torch_input_tensor = torch.rand(input_shape, dtype=torch.float32)
+    torch_output_tensor = torch.topk(torch_input_tensor, k)
+
+    input_tensor = ttnn.from_torch(torch_input_tensor, dtype=ttnn.float32, layout=ttnn.TILE_LAYOUT, device=device)
+
+    start_time = start_measuring_time()
+    output_tensor = ttnn.topk(input_tensor, k)
+    output_tensor = ttnn.to_torch(output_tensor)
+    e2e_perf = stop_measuring_time(start_time)
+    expected_pcc = 0.9999
+    return [check_with_pcc(torch_output_tensor, output_tensor, expected_pcc), e2e_perf]