From 2307952333db0a8762dc1b00c77c23b3065e9ba6 Mon Sep 17 00:00:00 2001 From: John Bauman Date: Tue, 3 Dec 2024 15:37:04 +0000 Subject: [PATCH] #15221: Post completion messages to dispatch_s We never wait on the acks from these completion messages, so make them posted to avoid contention from a lot of replies being sent at once. In the case where every worker is sending them at the same time, this can halve the latency from 500ns to 250ns (on wormhole). --- tt_metal/hw/firmware/src/brisc.cc | 6 ++++-- tt_metal/hw/firmware/src/idle_erisc.cc | 10 +++++++++- tt_metal/hw/inc/ethernet/tunneling.h | 9 ++++++++- 3 files changed, 21 insertions(+), 4 deletions(-) diff --git a/tt_metal/hw/firmware/src/brisc.cc b/tt_metal/hw/firmware/src/brisc.cc index f376b9746e7..ec28c62af26 100644 --- a/tt_metal/hw/firmware/src/brisc.cc +++ b/tt_metal/hw/firmware/src/brisc.cc @@ -406,7 +406,8 @@ int main() { NOC_UNICAST_WRITE_VC, 1, 31 /*wrap*/, - false /*linked*/); + false /*linked*/, + true /*posted*/); } } @@ -529,7 +530,8 @@ int main() { NOC_UNICAST_WRITE_VC, 1, 31 /*wrap*/, - false /*linked*/); + false /*linked*/, + true /*posted*/); mailboxes->launch_msg_rd_ptr = (launch_msg_rd_ptr + 1) & (launch_msg_buffer_num_entries - 1); } } diff --git a/tt_metal/hw/firmware/src/idle_erisc.cc b/tt_metal/hw/firmware/src/idle_erisc.cc index a425dd5c49d..9de56599b4d 100644 --- a/tt_metal/hw/firmware/src/idle_erisc.cc +++ b/tt_metal/hw/firmware/src/idle_erisc.cc @@ -176,7 +176,15 @@ int main() { DISPATCH_MESSAGE_ADDR + mailboxes->go_message.dispatch_message_offset); DEBUG_SANITIZE_NOC_ADDR(noc_index, dispatch_addr, 4); CLEAR_PREVIOUS_LAUNCH_MESSAGE_ENTRY_FOR_WATCHER(); - noc_fast_atomic_increment(noc_index, NCRISC_AT_CMD_BUF, dispatch_addr, NOC_UNICAST_WRITE_VC, 1, 31 /*wrap*/, false /*linked*/); + noc_fast_atomic_increment( + noc_index, + NCRISC_AT_CMD_BUF, + dispatch_addr, + NOC_UNICAST_WRITE_VC, + 1, + 31 /*wrap*/, + false /*linked*/, + true /*posted*/); mailboxes->launch_msg_rd_ptr = (launch_msg_rd_ptr + 1) & (launch_msg_buffer_num_entries - 1); } diff --git a/tt_metal/hw/inc/ethernet/tunneling.h b/tt_metal/hw/inc/ethernet/tunneling.h index 375df30fa67..fbbf252619b 100644 --- a/tt_metal/hw/inc/ethernet/tunneling.h +++ b/tt_metal/hw/inc/ethernet/tunneling.h @@ -109,7 +109,14 @@ void notify_dispatch_core_done(uint64_t dispatch_addr) { } DEBUG_SANITIZE_NOC_ADDR(noc_index, dispatch_addr, 4); noc_fast_atomic_increment( - noc_index, NCRISC_AT_CMD_BUF, dispatch_addr, NOC_UNICAST_WRITE_VC, 1, 31 /*wrap*/, false /*linked*/); + noc_index, + NCRISC_AT_CMD_BUF, + dispatch_addr, + NOC_UNICAST_WRITE_VC, + 1, + 31 /*wrap*/, + false /*linked*/, + true /*posted*/); } } // namespace internal_