Skip to content

Commit

Permalink
optimize write barrier
Browse files Browse the repository at this point in the history
  • Loading branch information
jbaumanTT committed Dec 16, 2024
1 parent 61ce70d commit 99a2991
Show file tree
Hide file tree
Showing 2 changed files with 22 additions and 4 deletions.
15 changes: 12 additions & 3 deletions tt_metal/impl/dispatch/command_queue.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1029,6 +1029,12 @@ void EnqueueProgramCommand::assemble_device_commands(
read_length = max_paged_length_per_sub_cmd;
write_length = read_length;
}
if (!kernel_bins_dispatch_subcmds.empty() && !kernel_bins_dispatch_subcmds.back().empty()) {
auto& back = kernel_bins_dispatch_subcmds.back().back();
if (back.noc_xy_addr != noc_encoding) {
back.flags = CQ_DISPATCH_CMD_PACKED_WRITE_LARGE_FLAG_UNLINK;
}
}
kernel_bins_dispatch_subcmds.back().emplace_back(CQDispatchWritePackedLargeSubCmd{
.noc_xy_addr = noc_encoding,
.addr = kernel_config_buffer_offset,
Expand All @@ -1050,9 +1056,12 @@ void EnqueueProgramCommand::assemble_device_commands(
}
}
}
// Unlink the last subcmd of the current core range
if (!write_linear) {
kernel_bins_dispatch_subcmds.back().back().flags |= CQ_DISPATCH_CMD_PACKED_WRITE_LARGE_FLAG_UNLINK;
}
// Unlink the last subcmd of the current core range
for (auto& subcmd_list : kernel_bins_dispatch_subcmds) {
if (!subcmd_list.empty()) {
subcmd_list.back().flags |= CQ_DISPATCH_CMD_PACKED_WRITE_LARGE_FLAG_UNLINK;

}
}
uint32_t pcie_alignment = hal.get_alignment(HalMemType::HOST);
Expand Down
11 changes: 10 additions & 1 deletion tt_metal/impl/dispatch/kernels/cq_dispatch.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -694,6 +694,7 @@ void process_write_packed_large(
CQDispatchWritePackedLargeSubCmd* sub_cmd_ptr = (CQDispatchWritePackedLargeSubCmd*)l1_cache;

bool init_state = true;
bool must_barrier = true;
while (count != 0) {
uint32_t dst_addr = sub_cmd_ptr->addr + local_write_offset;
uint32_t length = sub_cmd_ptr->length;
Expand All @@ -706,7 +707,8 @@ void process_write_packed_large(
mcasts += num_dests * writes;
noc_nonposted_writes_acked[noc_index] = mcasts;
writes = 0;
noc_async_write_barrier();
if (must_barrier)
noc_async_write_barrier();
};

// Only re-init state after we have unlinked the last transaction
Expand All @@ -717,8 +719,10 @@ void process_write_packed_large(
uint32_t dst_noc = sub_cmd_ptr->noc_xy_addr;
// TODO: Linking should be set to true once atomic txn is handled properly
cq_noc_async_write_init_state<CQ_NOC_sNdl, true, true>(0, get_noc_addr_helper(dst_noc, dst_addr));
must_barrier = true;
}


sub_cmd_ptr++;

while (length != 0) {
Expand Down Expand Up @@ -751,6 +755,7 @@ void process_write_packed_large(
xfer_size = available_data;
wait_for_barrier();
cq_noc_async_write_with_state_any_len(data_ptr, dst_addr, xfer_size, num_dests);
must_barrier = false;
writes++;
} else {
xfer_size = length;
Expand All @@ -761,6 +766,7 @@ void process_write_packed_large(
uint32_t dst_addr2 = dst_addr;
wait_for_barrier();
cq_noc_async_write_with_state<CQ_NOC_SnDL>(src_addr, dst_addr2, NOC_MAX_BURST_SIZE, num_dests);
must_barrier = false;
writes++;
src_addr += NOC_MAX_BURST_SIZE;
dst_addr2 += NOC_MAX_BURST_SIZE;
Expand All @@ -782,9 +788,12 @@ void process_write_packed_large(
cq_noc_async_write_with_state<CQ_NOC_SnDL, CQ_NOC_wait>(
data_ptr + data_offset, dst_addr + data_offset, rem_xfer_size, num_dests);
writes++;
must_barrier = true;
// Later writes must barrier.
} else {
wait_for_barrier();
cq_noc_async_write_with_state_any_len(data_ptr, dst_addr, xfer_size, num_dests);
must_barrier = false;
writes++;
}
}
Expand Down

0 comments on commit 99a2991

Please sign in to comment.