From 2d9add2b56d87f18829e0b12bf3c87c343dec4f7 Mon Sep 17 00:00:00 2001 From: Yu Gao <145494740+yugaoTT@users.noreply.github.com> Date: Fri, 15 Nov 2024 16:54:33 -0500 Subject: [PATCH] Add entry to MM benchmark (#15117) --- .../unit_tests/benchmarks/test_benchmark.py | 60 +------------------ 1 file changed, 2 insertions(+), 58 deletions(-) diff --git a/tests/ttnn/unit_tests/benchmarks/test_benchmark.py b/tests/ttnn/unit_tests/benchmarks/test_benchmark.py index 5d12ede9cc4..102c9f1bf5b 100644 --- a/tests/ttnn/unit_tests/benchmarks/test_benchmark.py +++ b/tests/ttnn/unit_tests/benchmarks/test_benchmark.py @@ -15,64 +15,6 @@ import os -@run_for_wormhole_b0() -# fmt: off -@pytest.mark.parametrize("height,width,average_time", [ - (1024, 1024, 1), -]) -@pytest.mark.parametrize("dtype", [ttnn.bfloat8_b, ttnn.bfloat16]) -# fmt: on -def test_benchmark_ttnn_add(device, use_program_cache, height, width, dtype, average_time): - torch.manual_seed(0) - - torch_input_tensor_a = torch.rand((height, width)) - torch_input_tensor_b = torch.rand((height, width)) - - input_tensor_a = ttnn.from_torch(torch_input_tensor_a, layout=ttnn.TILE_LAYOUT, device=device, dtype=dtype) - input_tensor_b = ttnn.from_torch(torch_input_tensor_b, layout=ttnn.TILE_LAYOUT, device=device, dtype=dtype) - ttnn.matmul(input_tensor_a, input_tensor_b) - total_time = 0 - for i in range(3): - start = time.time() - output = ttnn.add(input_tensor_a, input_tensor_b) - end = time.time() - duration = end - start - total_time = total_time + duration - print(f"ttnn.add: {duration} seconds") - ttnn.to_torch(output) - total_time = total_time / 3 - assert total_time <= average_time - - -@run_for_wormhole_b0() -# fmt: off -@pytest.mark.parametrize("m_size,k_size,n_size,average_time", [ - (384, 1024, 1024, 1), -]) -@pytest.mark.parametrize("dtype", [ttnn.bfloat8_b, ttnn.bfloat16]) -# fmt: on -def test_benchmark_ttnn_matmul(device, use_program_cache, m_size, k_size, n_size, dtype, average_time): - torch.manual_seed(0) - - torch_input_tensor_a = torch.rand((m_size, k_size)) - torch_input_tensor_b = torch.rand((k_size, n_size)) - - input_tensor_a = ttnn.from_torch(torch_input_tensor_a, layout=ttnn.TILE_LAYOUT, device=device, dtype=dtype) - input_tensor_b = ttnn.from_torch(torch_input_tensor_b, layout=ttnn.TILE_LAYOUT, device=device, dtype=dtype) - ttnn.matmul(input_tensor_a, input_tensor_b) - total_time = 0 - for i in range(3): - start = time.time() - output = ttnn.matmul(input_tensor_a, input_tensor_b) - end = time.time() - duration = end - start - total_time = total_time + duration - print(f"ttnn.matmul: {duration} seconds") - ttnn.to_torch(output) - total_time = total_time / 3 - assert total_time <= average_time - - SUBBLOCK_HW_CHOICES = [ (4, 2), (2, 4), @@ -197,10 +139,12 @@ def get_device_freq(): matmul_configs = [ (ttnn.bfloat16, ttnn.MathFidelity.HiFi2, False), (ttnn.bfloat16, ttnn.MathFidelity.HiFi4, False), + (ttnn.bfloat8_b, ttnn.MathFidelity.HiFi2, False), (ttnn.bfloat8_b, ttnn.MathFidelity.LoFi, False), (ttnn.bfloat4_b, ttnn.MathFidelity.LoFi, False), (ttnn.bfloat16, ttnn.MathFidelity.HiFi2, True), (ttnn.bfloat16, ttnn.MathFidelity.HiFi4, True), + (ttnn.bfloat8_b, ttnn.MathFidelity.HiFi2, True), (ttnn.bfloat8_b, ttnn.MathFidelity.LoFi, True), (ttnn.bfloat4_b, ttnn.MathFidelity.LoFi, True), ]