diff --git a/tests/ttnn/unit_tests/operations/test_matmul.py b/tests/ttnn/unit_tests/operations/test_matmul.py index b1db5d07116..b24f9401278 100644 --- a/tests/ttnn/unit_tests/operations/test_matmul.py +++ b/tests/ttnn/unit_tests/operations/test_matmul.py @@ -992,8 +992,8 @@ def run_matmul_1d_multiple_output_blocks_per_core( per_core_N = n // num_cores // 32 + uneven_width else: in0_block_w = k // 32 - per_core_M = (m // 32 // num_cores + uneven_width,) - per_core_N = (n // 32,) + per_core_M = m // 32 // num_cores + uneven_width + per_core_N = n // 32 out_block_h = per_core_M // num_out_block_h out_block_w = per_core_N // num_out_block_w out_subblock_h, out_subblock_w, _ = find_max_subblock(out_block_h, out_block_w) diff --git a/ttnn/cpp/ttnn/operations/matmul/device/kernels/compute/bmm_large_block_zm_fused_bias_activation.cpp b/ttnn/cpp/ttnn/operations/matmul/device/kernels/compute/bmm_large_block_zm_fused_bias_activation.cpp index 5b065dd8b1d..24ea3b81bbc 100644 --- a/ttnn/cpp/ttnn/operations/matmul/device/kernels/compute/bmm_large_block_zm_fused_bias_activation.cpp +++ b/ttnn/cpp/ttnn/operations/matmul/device/kernels/compute/bmm_large_block_zm_fused_bias_activation.cpp @@ -15,8 +15,6 @@ #include "compute_kernel_api/eltwise_unary/sfpu_split_includes.h" -#include "debug/dprint.h" - // Please update // tests/tt_metal/tt_metal/perf_microbenchmark/1_compute_mm/kernels/bmm_large_block_zm_fused_bias_activation_copy.cpp // when making any changes to this file. diff --git a/ttnn/cpp/ttnn/operations/matmul/device/kernels/dataflow/reader_bmm_tile_layout_in0_receiver.cpp b/ttnn/cpp/ttnn/operations/matmul/device/kernels/dataflow/reader_bmm_tile_layout_in0_receiver.cpp index 702d1aaf985..1835acc8ecc 100644 --- a/ttnn/cpp/ttnn/operations/matmul/device/kernels/dataflow/reader_bmm_tile_layout_in0_receiver.cpp +++ b/ttnn/cpp/ttnn/operations/matmul/device/kernels/dataflow/reader_bmm_tile_layout_in0_receiver.cpp @@ -7,8 +7,6 @@ #include "dataflow_api.h" #include "hostdevcommon/common_values.hpp" -#include "debug/dprint.h" - void kernel_main() { // in0 mcast args const uint32_t in0_mcast_sender_noc_x = get_arg_val(0); diff --git a/ttnn/cpp/ttnn/operations/matmul/device/kernels/dataflow/reader_bmm_tile_layout_in0_sender_padding.cpp b/ttnn/cpp/ttnn/operations/matmul/device/kernels/dataflow/reader_bmm_tile_layout_in0_sender_padding.cpp index b61f4adfa08..c84a4d4825a 100644 --- a/ttnn/cpp/ttnn/operations/matmul/device/kernels/dataflow/reader_bmm_tile_layout_in0_sender_padding.cpp +++ b/ttnn/cpp/ttnn/operations/matmul/device/kernels/dataflow/reader_bmm_tile_layout_in0_sender_padding.cpp @@ -8,8 +8,6 @@ #include "hostdevcommon/common_values.hpp" #include "ttnn/cpp/ttnn/operations/ccl/kernel_common/worker_sync_utils.hpp" -#include "debug/dprint.h" - void kernel_main() { uint32_t rt_args_idx = 0; // in0 tensor args diff --git a/ttnn/cpp/ttnn/operations/matmul/device/kernels/dataflow/reader_bmm_tile_layout_in1_sender_writer_padding.cpp b/ttnn/cpp/ttnn/operations/matmul/device/kernels/dataflow/reader_bmm_tile_layout_in1_sender_writer_padding.cpp index 503033f6566..70464ae2c57 100644 --- a/ttnn/cpp/ttnn/operations/matmul/device/kernels/dataflow/reader_bmm_tile_layout_in1_sender_writer_padding.cpp +++ b/ttnn/cpp/ttnn/operations/matmul/device/kernels/dataflow/reader_bmm_tile_layout_in1_sender_writer_padding.cpp @@ -8,8 +8,6 @@ #include "hostdevcommon/common_values.hpp" #include "ttnn/cpp/ttnn/operations/ccl/kernel_common/worker_sync_utils.hpp" -#include "debug/dprint.h" - void kernel_main() { // READER uint32_t rt_args_idx = 0;