Skip to content

Commit

Permalink
#10108: Fix pcc in conv2d w/o bias (#10887)
Browse files Browse the repository at this point in the history
In case when bias is disabled and cbs for partials and outputs is shared,
partial output writes were stomping the output of previous in0_num_blocks_w iteration.

Co-authored-by: Pavle Josipovic <[email protected]>
  • Loading branch information
pavlejosipovic and Pavle Josipovic authored Jul 31, 2024
1 parent fbf58c9 commit bf90b82
Show file tree
Hide file tree
Showing 2 changed files with 28 additions and 9 deletions.
6 changes: 4 additions & 2 deletions tests/ttnn/unit_tests/operations/test_new_conv2d.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,9 +63,8 @@ def run_conv(
deallocate_activation=False,
debug=False,
groups=1,
has_bias=True,
):
# has_bias = False
has_bias = True
torch.manual_seed(0)
conv_input_shape = [batch_size, input_channels, input_height, input_width]
conv_weight_shape = [output_channels, input_channels // groups, filter_height, filter_width]
Expand Down Expand Up @@ -467,6 +466,7 @@ def test_resnet50_conv_gs(
)
@pytest.mark.parametrize("math_fidelity", [ttnn.MathFidelity.LoFi])
@pytest.mark.parametrize("packer_l1_acc", [True, False], ids=["pack_l1", "no_pack_l1"])
@pytest.mark.parametrize("has_bias", [True, False], ids=["with_bias", "no_bias"])
def test_resnet50_conv_wh(
device,
use_program_cache,
Expand All @@ -487,6 +487,7 @@ def test_resnet50_conv_wh(
use_1d_systolic_array,
config_override,
packer_l1_acc,
has_bias,
):
if device.core_grid.y == 7:
pytest.skip("Issue #6992: Statically allocated circular buffers in program clash with L1 buffers on core range")
Expand Down Expand Up @@ -533,6 +534,7 @@ def test_resnet50_conv_wh(
transpose_mcast=use_1d_systolic_array, ## use RM (transpose_mcast=False) with 2D on WH
packer_l1_acc=packer_l1_acc,
fp32_accum=False,
has_bias=has_bias,
)


Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -285,6 +285,10 @@ void MAIN {
//then pop to update fifo rd pointer
cb_wait_front(matmul_partials_cb, out_block_num_tiles);
cb_pop_front(matmul_partials_cb, out_block_num_tiles);
if constexpr (spill) {
UNPACK( cb_interface[matmul_partials_cb].fifo_rd_ptr = partials_cb_read_ptr );
PACK( cb_interface[matmul_partials_cb].fifo_wr_ptr = partials_cb_write_ptr );
}
}
// never reload when with bias, bias uses interm buffer
enable_reload = false;
Expand All @@ -293,19 +297,32 @@ void MAIN {
if (in0_block_w_i < in0_num_blocks_w - 2) {
cb_wait_front(matmul_partials_cb, out_block_num_tiles);
cb_pop_front(matmul_partials_cb, out_block_num_tiles);
if constexpr (spill) {
UNPACK( cb_interface[matmul_partials_cb].fifo_rd_ptr = partials_cb_read_ptr );
PACK( cb_interface[matmul_partials_cb].fifo_wr_ptr = partials_cb_write_ptr );
}
}
if (in0_block_w_i == in0_num_blocks_w - 2) { enable_reload = true; }
#endif
#else
if constexpr (spill) { enable_reload = true; }
#endif
if constexpr (spill) {
enable_reload = true;

if constexpr (spill) {
if (!last_out) {
UNPACK( cb_interface[matmul_partials_cb].fifo_rd_ptr = partials_cb_read_ptr );
PACK( cb_interface[matmul_partials_cb].fifo_wr_ptr = partials_cb_write_ptr );
#ifdef FUSE_BIAS
if (!last_out) {
UNPACK( cb_interface[matmul_partials_cb].fifo_rd_ptr = partials_cb_read_ptr );
PACK( cb_interface[matmul_partials_cb].fifo_wr_ptr = partials_cb_write_ptr );
}
#else
if (!last_out) {
UNPACK( cb_interface[matmul_partials_cb].fifo_rd_ptr = partials_cb_read_ptr );
}
if (in0_block_w_i < in0_num_blocks_w - 2) {
PACK( cb_interface[matmul_partials_cb].fifo_wr_ptr = partials_cb_write_ptr );
}
#endif
}
}
#endif

cb_pop_front(mm_in0_cb_id, in0_block_num_tiles);
cb_pop_front(in1_cb_id, in1_block_num_tiles);
Expand Down

0 comments on commit bf90b82

Please sign in to comment.