diff --git a/tests/ttnn/unit_tests/operations/test_new_conv2d.py b/tests/ttnn/unit_tests/operations/test_new_conv2d.py index b090526b414..4d82c2141ec 100644 --- a/tests/ttnn/unit_tests/operations/test_new_conv2d.py +++ b/tests/ttnn/unit_tests/operations/test_new_conv2d.py @@ -63,9 +63,8 @@ def run_conv( deallocate_activation=False, debug=False, groups=1, + has_bias=True, ): - # has_bias = False - has_bias = True torch.manual_seed(0) conv_input_shape = [batch_size, input_channels, input_height, input_width] conv_weight_shape = [output_channels, input_channels // groups, filter_height, filter_width] @@ -467,6 +466,7 @@ def test_resnet50_conv_gs( ) @pytest.mark.parametrize("math_fidelity", [ttnn.MathFidelity.LoFi]) @pytest.mark.parametrize("packer_l1_acc", [True, False], ids=["pack_l1", "no_pack_l1"]) +@pytest.mark.parametrize("has_bias", [True, False], ids=["with_bias", "no_bias"]) def test_resnet50_conv_wh( device, use_program_cache, @@ -487,6 +487,7 @@ def test_resnet50_conv_wh( use_1d_systolic_array, config_override, packer_l1_acc, + has_bias, ): if device.core_grid.y == 7: pytest.skip("Issue #6992: Statically allocated circular buffers in program clash with L1 buffers on core range") @@ -533,6 +534,7 @@ def test_resnet50_conv_wh( transpose_mcast=use_1d_systolic_array, ## use RM (transpose_mcast=False) with 2D on WH packer_l1_acc=packer_l1_acc, fp32_accum=False, + has_bias=has_bias, ) diff --git a/ttnn/cpp/ttnn/operations/conv2d/device/kernels/conv_bmm_tilize_col_major_out_blocks.cpp b/ttnn/cpp/ttnn/operations/conv2d/device/kernels/conv_bmm_tilize_col_major_out_blocks.cpp index c22c8245009..5f1e11bc1a4 100644 --- a/ttnn/cpp/ttnn/operations/conv2d/device/kernels/conv_bmm_tilize_col_major_out_blocks.cpp +++ b/ttnn/cpp/ttnn/operations/conv2d/device/kernels/conv_bmm_tilize_col_major_out_blocks.cpp @@ -285,6 +285,10 @@ void MAIN { //then pop to update fifo rd pointer cb_wait_front(matmul_partials_cb, out_block_num_tiles); cb_pop_front(matmul_partials_cb, out_block_num_tiles); + if constexpr (spill) { + UNPACK( cb_interface[matmul_partials_cb].fifo_rd_ptr = partials_cb_read_ptr ); + PACK( cb_interface[matmul_partials_cb].fifo_wr_ptr = partials_cb_write_ptr ); + } } // never reload when with bias, bias uses interm buffer enable_reload = false; @@ -293,19 +297,32 @@ void MAIN { if (in0_block_w_i < in0_num_blocks_w - 2) { cb_wait_front(matmul_partials_cb, out_block_num_tiles); cb_pop_front(matmul_partials_cb, out_block_num_tiles); + if constexpr (spill) { + UNPACK( cb_interface[matmul_partials_cb].fifo_rd_ptr = partials_cb_read_ptr ); + PACK( cb_interface[matmul_partials_cb].fifo_wr_ptr = partials_cb_write_ptr ); + } } if (in0_block_w_i == in0_num_blocks_w - 2) { enable_reload = true; } #endif #else - if constexpr (spill) { enable_reload = true; } - #endif + if constexpr (spill) { + enable_reload = true; - if constexpr (spill) { - if (!last_out) { - UNPACK( cb_interface[matmul_partials_cb].fifo_rd_ptr = partials_cb_read_ptr ); - PACK( cb_interface[matmul_partials_cb].fifo_wr_ptr = partials_cb_write_ptr ); + #ifdef FUSE_BIAS + if (!last_out) { + UNPACK( cb_interface[matmul_partials_cb].fifo_rd_ptr = partials_cb_read_ptr ); + PACK( cb_interface[matmul_partials_cb].fifo_wr_ptr = partials_cb_write_ptr ); + } + #else + if (!last_out) { + UNPACK( cb_interface[matmul_partials_cb].fifo_rd_ptr = partials_cb_read_ptr ); + } + if (in0_block_w_i < in0_num_blocks_w - 2) { + PACK( cb_interface[matmul_partials_cb].fifo_wr_ptr = partials_cb_write_ptr ); + } + #endif } - } + #endif cb_pop_front(mm_in0_cb_id, in0_block_num_tiles); cb_pop_front(in1_cb_id, in1_block_num_tiles);