diff --git a/tt_metal/impl/dispatch/kernels/cq_dispatch.cpp b/tt_metal/impl/dispatch/kernels/cq_dispatch.cpp index e1740876f52c..861cb9be8c3b 100644 --- a/tt_metal/impl/dispatch/kernels/cq_dispatch.cpp +++ b/tt_metal/impl/dispatch/kernels/cq_dispatch.cpp @@ -574,6 +574,9 @@ void process_write_packed( uint32_t writes = 0; uint32_t mcasts = 0; auto wait_for_barrier = [&]() { + if (!mcast) { + return; + } noc_nonposted_writes_num_issued[noc_index] += writes; noc_nonposted_writes_acked[noc_index] += mcasts; writes = 0; @@ -757,7 +760,6 @@ void process_write_packed_large( wait_for_barrier(); cq_noc_async_write_with_state_any_len(data_ptr, dst_addr, xfer_size, num_dests); must_barrier = false; - writes += div_up(xfer_size, NOC_MAX_BURST_SIZE); } else { xfer_size = length; if (unlink) { @@ -769,16 +771,15 @@ void process_write_packed_large( uint32_t data_offset = xfer_size - rem_xfer_size; cq_noc_async_write_with_state( data_ptr + data_offset, dst_addr + data_offset, rem_xfer_size, num_dests); - writes += div_up(xfer_size, NOC_MAX_BURST_SIZE); must_barrier = true; // Later writes must barrier. } else { wait_for_barrier(); cq_noc_async_write_with_state_any_len(data_ptr, dst_addr, xfer_size, num_dests); must_barrier = false; - writes++; } } + writes += div_up(xfer_size, NOC_MAX_BURST_SIZE); length -= xfer_size; data_ptr += xfer_size; dst_addr += xfer_size;