Skip to content

Commit

Permalink
better
Browse files Browse the repository at this point in the history
  • Loading branch information
jbaumanTT committed Dec 17, 2024
1 parent 99a2991 commit dc96a4f
Show file tree
Hide file tree
Showing 2 changed files with 13 additions and 29 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,8 @@ for arg in "$@"; do
esac
done

set -x

# brisc only
echo "###" brisc only
build/test/tt_metal/perf_microbenchmark/dispatch/test_pgm_dispatch -w 5000 -s 256 -n -t $trace_option $eth_dispatch_option
Expand Down
40 changes: 11 additions & 29 deletions tt_metal/impl/dispatch/kernels/cq_dispatch.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -702,13 +702,14 @@ void process_write_packed_large(
uint32_t pad_size = align(length, alignment) - length;
uint32_t unlink = sub_cmd_ptr->flags & CQ_DISPATCH_CMD_PACKED_WRITE_LARGE_FLAG_UNLINK;
auto wait_for_barrier = [&]() {
noc_nonposted_writes_num_issued[noc_index] += writes;
if (must_barrier) {
noc_nonposted_writes_num_issued[noc_index] += writes;

mcasts += num_dests * writes;
noc_nonposted_writes_acked[noc_index] = mcasts;
writes = 0;
if (must_barrier)
mcasts += num_dests * writes;
noc_nonposted_writes_acked[noc_index] = mcasts;
writes = 0;
noc_async_write_barrier();
}
};

// Only re-init state after we have unlinked the last transaction
Expand Down Expand Up @@ -756,38 +757,19 @@ void process_write_packed_large(
wait_for_barrier();
cq_noc_async_write_with_state_any_len(data_ptr, dst_addr, xfer_size, num_dests);
must_barrier = false;
writes++;
writes += div_up(xfer_size, NOC_MAX_BURST_SIZE);
} else {
xfer_size = length;
if (unlink) {
uint32_t rem_xfer_size = xfer_size;
if (rem_xfer_size > NOC_MAX_BURST_SIZE) {
uint32_t src_addr = data_ptr;
uint32_t dst_addr2 = dst_addr;
wait_for_barrier();
cq_noc_async_write_with_state<CQ_NOC_SnDL>(src_addr, dst_addr2, NOC_MAX_BURST_SIZE, num_dests);
must_barrier = false;
writes++;
src_addr += NOC_MAX_BURST_SIZE;
dst_addr2 += NOC_MAX_BURST_SIZE;
rem_xfer_size -= NOC_MAX_BURST_SIZE;
while (rem_xfer_size > NOC_MAX_BURST_SIZE) {
wait_for_barrier();
cq_noc_async_write_with_state<CQ_NOC_SnDl>(
src_addr, dst_addr2, NOC_MAX_BURST_SIZE, num_dests);
writes++;
src_addr += NOC_MAX_BURST_SIZE;
dst_addr2 += NOC_MAX_BURST_SIZE;
rem_xfer_size -= NOC_MAX_BURST_SIZE;
}
}
wait_for_barrier();
uint32_t rem_xfer_size =
cq_noc_async_write_with_state_any_len<false>(data_ptr, dst_addr, xfer_size, num_dests);
// Unset Link flag
cq_noc_async_write_init_state<CQ_NOC_sndl, true, false>(0, 0, 0);
uint32_t data_offset = xfer_size - rem_xfer_size;
wait_for_barrier();
cq_noc_async_write_with_state<CQ_NOC_SnDL, CQ_NOC_wait>(
data_ptr + data_offset, dst_addr + data_offset, rem_xfer_size, num_dests);
writes++;
writes += div_up(xfer_size, NOC_MAX_BURST_SIZE);
must_barrier = true;
// Later writes must barrier.
} else {
Expand Down

0 comments on commit dc96a4f

Please sign in to comment.