Skip to content

Commit

Permalink
#8704: Program _HI registers in BH noc nonblocking apis and zero out
Browse files Browse the repository at this point in the history
_MID registers
  • Loading branch information
abhullar-tt committed Jun 14, 2024
1 parent a0a8e5a commit e4248b3
Show file tree
Hide file tree
Showing 2 changed files with 61 additions and 9 deletions.
22 changes: 13 additions & 9 deletions tt_metal/hw/inc/blackhole/noc_nonblocking_api.h
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@ inline __attribute__((always_inline)) void ncrisc_noc_fast_read(
uint32_t noc, uint32_t cmd_buf, uint64_t src_addr, uint32_t dest_addr, uint32_t len_bytes) {
NOC_CMD_BUF_WRITE_REG(noc, cmd_buf, NOC_RET_ADDR_LO, dest_addr);
NOC_CMD_BUF_WRITE_REG(noc, cmd_buf, NOC_TARG_ADDR_LO, (uint32_t)src_addr);
NOC_CMD_BUF_WRITE_REG(noc, cmd_buf, NOC_TARG_ADDR_MID, src_addr >> 32);
NOC_CMD_BUF_WRITE_REG(noc, cmd_buf, NOC_TARG_ADDR_HI, (uint32_t)(src_addr >> 36));
NOC_CMD_BUF_WRITE_REG(noc, cmd_buf, NOC_AT_LEN_BE, len_bytes);
NOC_CMD_BUF_WRITE_REG(noc, cmd_buf, NOC_CMD_CTRL, NOC_CTRL_SEND_REQ);
noc_reads_num_issued[noc] += 1;
Expand Down Expand Up @@ -89,7 +89,7 @@ inline __attribute__((always_inline)) void ncrisc_noc_fast_write(
NOC_CMD_BUF_WRITE_REG(noc, cmd_buf, NOC_CTRL, noc_cmd_field);
NOC_CMD_BUF_WRITE_REG(noc, cmd_buf, NOC_TARG_ADDR_LO, src_addr);
NOC_CMD_BUF_WRITE_REG(noc, cmd_buf, NOC_RET_ADDR_LO, (uint32_t)dest_addr);
NOC_CMD_BUF_WRITE_REG(noc, cmd_buf, NOC_RET_ADDR_MID, dest_addr >> 32);
NOC_CMD_BUF_WRITE_REG(noc, cmd_buf, NOC_RET_ADDR_HI, (uint32_t)(dest_addr >> 36));
NOC_CMD_BUF_WRITE_REG(noc, cmd_buf, NOC_AT_LEN_BE, len_bytes);
NOC_CMD_BUF_WRITE_REG(noc, cmd_buf, NOC_CMD_CTRL, NOC_CTRL_SEND_REQ);
if (posted) {
Expand Down Expand Up @@ -119,7 +119,7 @@ inline __attribute__((always_inline)) void ncrisc_noc_fast_write_loopback_src(
NOC_CMD_BUF_WRITE_REG(noc, cmd_buf, NOC_CTRL, noc_cmd_field);
NOC_CMD_BUF_WRITE_REG(noc, cmd_buf, NOC_TARG_ADDR_LO, src_addr);
NOC_CMD_BUF_WRITE_REG(noc, cmd_buf, NOC_RET_ADDR_LO, (uint32_t)dest_addr);
NOC_CMD_BUF_WRITE_REG(noc, cmd_buf, NOC_RET_ADDR_MID, dest_addr >> 32);
NOC_CMD_BUF_WRITE_REG(noc, cmd_buf, NOC_RET_ADDR_HI, (uint32_t)(dest_addr >> 36));
NOC_CMD_BUF_WRITE_REG(noc, cmd_buf, NOC_AT_LEN_BE, len_bytes);
NOC_CMD_BUF_WRITE_REG(noc, cmd_buf, NOC_CMD_CTRL, NOC_CTRL_SEND_REQ);
noc_nonposted_writes_num_issued[noc] += 1;
Expand All @@ -133,7 +133,7 @@ inline __attribute__((always_inline)) void ncrisc_noc_blitz_write_setup(
while (!noc_cmd_buf_ready(noc, cmd_buf));
NOC_CMD_BUF_WRITE_REG(noc, cmd_buf, NOC_CTRL, noc_cmd_field);
NOC_CMD_BUF_WRITE_REG(noc, cmd_buf, NOC_AT_LEN_BE, len_bytes);
NOC_CMD_BUF_WRITE_REG(noc, cmd_buf, NOC_RET_ADDR_MID, dest_addr >> 32);
NOC_CMD_BUF_WRITE_REG(noc, cmd_buf, NOC_RET_ADDR_HI, (uint32_t)(dest_addr >> 36));
noc_nonposted_writes_num_issued[noc] += num_times_to_write;
noc_nonposted_writes_acked[noc] += num_times_to_write;
}
Expand Down Expand Up @@ -162,17 +162,21 @@ inline __attribute__((always_inline)) void noc_init() {
uint32_t my_y = (noc_id_reg >> NOC_ADDR_NODE_ID_BITS) & NOC_NODE_ID_MASK;
uint64_t xy_local_addr = NOC_XY_ADDR(my_x, my_y, 0);

NOC_CMD_BUF_WRITE_REG(noc, NCRISC_WR_CMD_BUF, NOC_TARG_ADDR_MID, (uint32_t)(xy_local_addr >> 32));
NOC_CMD_BUF_WRITE_REG(noc, NCRISC_WR_REG_CMD_BUF, NOC_TARG_ADDR_MID, (uint32_t)(xy_local_addr >> 32));
NOC_CMD_BUF_WRITE_REG(noc, NCRISC_WR_CMD_BUF, NOC_TARG_ADDR_MID, 0x0);
NOC_CMD_BUF_WRITE_REG(noc, NCRISC_WR_CMD_BUF, NOC_TARG_ADDR_HI, (uint32_t)(xy_local_addr >> 36));
NOC_CMD_BUF_WRITE_REG(noc, NCRISC_WR_REG_CMD_BUF, NOC_TARG_ADDR_MID, 0x0);
NOC_CMD_BUF_WRITE_REG(noc, NCRISC_WR_REG_CMD_BUF, NOC_TARG_ADDR_HI, (uint32_t)(xy_local_addr >> 36));

uint64_t atomic_ret_addr = NOC_XY_ADDR(my_x, my_y, (uint32_t)(&atomic_ret_val));
NOC_CMD_BUF_WRITE_REG(noc, NCRISC_AT_CMD_BUF, NOC_RET_ADDR_LO, (uint32_t)(atomic_ret_addr & 0xFFFFFFFF));
NOC_CMD_BUF_WRITE_REG(noc, NCRISC_AT_CMD_BUF, NOC_RET_ADDR_MID, (uint32_t)(atomic_ret_addr >> 32));
NOC_CMD_BUF_WRITE_REG(noc, NCRISC_AT_CMD_BUF, NOC_RET_ADDR_MID, 0x0);
NOC_CMD_BUF_WRITE_REG(noc, NCRISC_AT_CMD_BUF, NOC_RET_ADDR_HI, (uint32_t)(atomic_ret_addr >> 36));

uint32_t noc_rd_cmd_field =
NOC_CMD_CPY | NOC_CMD_RD | NOC_CMD_RESP_MARKED | NOC_CMD_VC_STATIC | NOC_CMD_STATIC_VC(1);
NOC_CMD_BUF_WRITE_REG(noc, NCRISC_RD_CMD_BUF, NOC_CTRL, noc_rd_cmd_field);
NOC_CMD_BUF_WRITE_REG(noc, NCRISC_RD_CMD_BUF, NOC_RET_ADDR_MID, (uint32_t)(xy_local_addr >> 32));
NOC_CMD_BUF_WRITE_REG(noc, NCRISC_RD_CMD_BUF, NOC_RET_ADDR_MID, 0x0);
NOC_CMD_BUF_WRITE_REG(noc, NCRISC_RD_CMD_BUF, NOC_RET_ADDR_HI, (uint32_t)(xy_local_addr >> 36));
}
}

Expand Down Expand Up @@ -331,7 +335,7 @@ inline __attribute__((always_inline)) void noc_fast_atomic_increment(
bool posted = false) {
while (!noc_cmd_buf_ready(noc, cmd_buf));
NOC_CMD_BUF_WRITE_REG(noc, cmd_buf, NOC_TARG_ADDR_LO, (uint32_t)(addr & 0xFFFFFFFF));
NOC_CMD_BUF_WRITE_REG(noc, cmd_buf, NOC_TARG_ADDR_MID, (uint32_t)(addr >> 32));
NOC_CMD_BUF_WRITE_REG(noc, cmd_buf, NOC_TARG_ADDR_HI, (uint32_t)(addr >> 36));
NOC_CMD_BUF_WRITE_REG(
noc,
cmd_buf,
Expand Down
48 changes: 48 additions & 0 deletions tt_metal/hw/inc/dataflow_api.h
Original file line number Diff line number Diff line change
Expand Up @@ -537,7 +537,11 @@ void noc_async_read_one_packet(std::uint64_t src_noc_addr, std::uint32_t dst_loc

NOC_CMD_BUF_WRITE_REG(noc_index, NCRISC_RD_CMD_BUF, NOC_RET_ADDR_LO, dst_local_l1_addr);
NOC_CMD_BUF_WRITE_REG(noc_index, NCRISC_RD_CMD_BUF, NOC_TARG_ADDR_LO, (uint32_t)src_noc_addr);
#ifdef ARCH_BLACKHOLE
NOC_CMD_BUF_WRITE_REG(noc_index, NCRISC_RD_CMD_BUF, NOC_TARG_ADDR_HI, (uint32_t)(src_noc_addr >> 36));
#else
NOC_CMD_BUF_WRITE_REG(noc_index, NCRISC_RD_CMD_BUF, NOC_TARG_ADDR_MID, src_noc_addr >> 32);
#endif
NOC_CMD_BUF_WRITE_REG(noc_index, NCRISC_RD_CMD_BUF, NOC_AT_LEN_BE, size);
NOC_CMD_BUF_WRITE_REG(noc_index, NCRISC_RD_CMD_BUF, NOC_CMD_CTRL, NOC_CTRL_SEND_REQ);
noc_reads_num_issued[noc_index] += 1;
Expand All @@ -560,7 +564,11 @@ void noc_async_read_one_packet_set_state(std::uint64_t src_noc_addr, std::uint32

DEBUG_STATUS("NARW");

#ifdef ARCH_BLACKHOLE
NOC_CMD_BUF_WRITE_REG(noc_index, NCRISC_RD_CMD_BUF, NOC_TARG_ADDR_HI, (uint32_t)(src_noc_addr >> 36));
#else
NOC_CMD_BUF_WRITE_REG(noc_index, NCRISC_RD_CMD_BUF, NOC_TARG_ADDR_MID, src_noc_addr >> 32);
#endif
NOC_CMD_BUF_WRITE_REG(noc_index, NCRISC_RD_CMD_BUF, NOC_AT_LEN_BE, size);

DEBUG_STATUS("NARD");
Expand Down Expand Up @@ -609,7 +617,11 @@ void noc_async_read_set_state(std::uint64_t src_noc_addr) {
while (!noc_cmd_buf_ready(noc_index, NCRISC_RD_CMD_BUF));
DEBUG_STATUS("RPD");

#ifdef ARCH_BLACKHOLE
NOC_CMD_BUF_WRITE_REG(noc_index, NCRISC_RD_CMD_BUF, NOC_TARG_ADDR_HI, (uint32_t)(src_noc_addr >> 36));
#else
NOC_CMD_BUF_WRITE_REG(noc_index, NCRISC_RD_CMD_BUF, NOC_TARG_ADDR_MID, src_noc_addr >> 32);
#endif

DEBUG_STATUS("NARD");
}
Expand Down Expand Up @@ -683,7 +695,11 @@ void noc_async_write_one_packet(std::uint32_t src_local_l1_addr, std::uint64_t d
NOC_CMD_BUF_WRITE_REG(noc_index, NCRISC_WR_CMD_BUF, NOC_CTRL, noc_cmd_field);
NOC_CMD_BUF_WRITE_REG(noc_index, NCRISC_WR_CMD_BUF, NOC_TARG_ADDR_LO, src_local_l1_addr);
NOC_CMD_BUF_WRITE_REG(noc_index, NCRISC_WR_CMD_BUF, NOC_RET_ADDR_LO, (uint32_t)dst_noc_addr);
#ifdef ARCH_BLACKHOLE
NOC_CMD_BUF_WRITE_REG(noc_index, NCRISC_WR_CMD_BUF, NOC_RET_ADDR_HI, (uint32_t(dst_noc_addr >> 36)));
#else
NOC_CMD_BUF_WRITE_REG(noc_index, NCRISC_WR_CMD_BUF, NOC_RET_ADDR_MID, dst_noc_addr >> 32);
#endif
NOC_CMD_BUF_WRITE_REG(noc_index, NCRISC_WR_CMD_BUF, NOC_AT_LEN_BE, size);
NOC_CMD_BUF_WRITE_REG(noc_index, NCRISC_WR_CMD_BUF, NOC_CMD_CTRL, NOC_CTRL_SEND_REQ);
noc_nonposted_writes_num_issued[noc_index] += 1;
Expand Down Expand Up @@ -716,7 +732,11 @@ void noc_async_write_multicast_one_packet(
NOC_CMD_BUF_WRITE_REG(noc_index, NCRISC_WR_CMD_BUF, NOC_CTRL, noc_cmd_field);
NOC_CMD_BUF_WRITE_REG(noc_index, NCRISC_WR_CMD_BUF, NOC_TARG_ADDR_LO, src_local_l1_addr);
NOC_CMD_BUF_WRITE_REG(noc_index, NCRISC_WR_CMD_BUF, NOC_RET_ADDR_LO, (uint32_t)dst_noc_addr_multicast);
#ifdef ARCH_BLACKHOLE
NOC_CMD_BUF_WRITE_REG(noc_index, NCRISC_WR_CMD_BUF, NOC_RET_ADDR_HI, (uint32_t)(dst_noc_addr_multicast >> 36));
#else
NOC_CMD_BUF_WRITE_REG(noc_index, NCRISC_WR_CMD_BUF, NOC_RET_ADDR_MID, dst_noc_addr_multicast >> 32);
#endif
NOC_CMD_BUF_WRITE_REG(noc_index, NCRISC_WR_CMD_BUF, NOC_AT_LEN_BE, size);
NOC_CMD_BUF_WRITE_REG(noc_index, NCRISC_WR_CMD_BUF, NOC_CMD_CTRL, NOC_CTRL_SEND_REQ);
noc_nonposted_writes_num_issued[noc_index] += 1;
Expand All @@ -739,7 +759,11 @@ void noc_async_write_one_packet_set_state(std::uint64_t dst_noc_addr, std::uint3
(non_posted ? NOC_CMD_RESP_MARKED : 0x0);

NOC_CMD_BUF_WRITE_REG(noc_index, NCRISC_WR_CMD_BUF, NOC_CTRL, noc_cmd_field);
#ifdef ARCH_BLACKHOLE
NOC_CMD_BUF_WRITE_REG(noc_index, NCRISC_WR_CMD_BUF, NOC_RET_ADDR_HI, (uint32_t)(dst_noc_addr >> 36));
#else
NOC_CMD_BUF_WRITE_REG(noc_index, NCRISC_WR_CMD_BUF, NOC_RET_ADDR_MID, dst_noc_addr >> 32);
#endif
NOC_CMD_BUF_WRITE_REG(noc_index, NCRISC_WR_CMD_BUF, NOC_AT_LEN_BE, size);
}

Expand Down Expand Up @@ -936,7 +960,11 @@ struct InterleavedAddrGenFast {

NOC_CMD_BUF_WRITE_REG(noc_index, NCRISC_RD_CMD_BUF, NOC_RET_ADDR_LO, dest_addr);
NOC_CMD_BUF_WRITE_REG(noc_index, NCRISC_RD_CMD_BUF, NOC_TARG_ADDR_LO, src_addr); // (uint32_t)src_addr
#ifdef ARCH_BLACKHOLE
NOC_CMD_BUF_WRITE_REG(noc_index, NCRISC_RD_CMD_BUF, NOC_TARG_ADDR_HI, src_noc_xy); // src_addr >> 32
#else
NOC_CMD_BUF_WRITE_REG(noc_index, NCRISC_RD_CMD_BUF, NOC_TARG_ADDR_MID, src_noc_xy); // src_addr >> 32
#endif
NOC_CMD_BUF_WRITE_REG(noc_index, NCRISC_RD_CMD_BUF, NOC_AT_LEN_BE, this->page_size); // len_bytes
NOC_CMD_BUF_WRITE_REG(noc_index, NCRISC_RD_CMD_BUF, NOC_CMD_CTRL, NOC_CTRL_SEND_REQ);
noc_reads_num_issued[noc_index] += 1;
Expand Down Expand Up @@ -989,7 +1017,11 @@ struct InterleavedAddrGenFast {
NOC_CMD_BUF_WRITE_REG(noc_index, NCRISC_WR_CMD_BUF, NOC_CTRL, noc_cmd_field);
NOC_CMD_BUF_WRITE_REG(noc_index, NCRISC_WR_CMD_BUF, NOC_TARG_ADDR_LO, src_addr);
NOC_CMD_BUF_WRITE_REG(noc_index, NCRISC_WR_CMD_BUF, NOC_RET_ADDR_LO, dest_addr); // (uint32_t)dest_addr
#ifdef ARCH_BLACKHOLE
NOC_CMD_BUF_WRITE_REG(noc_index, NCRISC_WR_CMD_BUF, NOC_RET_ADDR_HI, dest_noc_xy); // dest_addr >> 32
#else
NOC_CMD_BUF_WRITE_REG(noc_index, NCRISC_WR_CMD_BUF, NOC_RET_ADDR_MID, dest_noc_xy); // dest_addr >> 32
#endif
NOC_CMD_BUF_WRITE_REG(noc_index, NCRISC_WR_CMD_BUF, NOC_AT_LEN_BE, this->page_size); // len_bytes
NOC_CMD_BUF_WRITE_REG(noc_index, NCRISC_WR_CMD_BUF, NOC_CMD_CTRL, NOC_CTRL_SEND_REQ);
noc_nonposted_writes_num_issued[noc_index] += 1;
Expand Down Expand Up @@ -1039,7 +1071,11 @@ struct InterleavedPow2AddrGenFast {

NOC_CMD_BUF_WRITE_REG(noc_index, NCRISC_RD_CMD_BUF, NOC_RET_ADDR_LO, dest_addr);
NOC_CMD_BUF_WRITE_REG(noc_index, NCRISC_RD_CMD_BUF, NOC_TARG_ADDR_LO, src_addr); // (uint32_t)src_addr
#ifdef ARCH_BLACKHOLE
NOC_CMD_BUF_WRITE_REG(noc_index, NCRISC_RD_CMD_BUF, NOC_TARG_ADDR_HI, src_noc_xy); // src_addr >> 32
#else
NOC_CMD_BUF_WRITE_REG(noc_index, NCRISC_RD_CMD_BUF, NOC_TARG_ADDR_MID, src_noc_xy); // src_addr >> 32
#endif
NOC_CMD_BUF_WRITE_REG(noc_index, NCRISC_RD_CMD_BUF, NOC_AT_LEN_BE, 1 << log_base_2_of_page_size); // len_bytes
NOC_CMD_BUF_WRITE_REG(noc_index, NCRISC_RD_CMD_BUF, NOC_CMD_CTRL, NOC_CTRL_SEND_REQ);
noc_reads_num_issued[noc_index] += 1;
Expand Down Expand Up @@ -1080,7 +1116,11 @@ struct InterleavedPow2AddrGenFast {

NOC_CMD_BUF_WRITE_REG(noc_index, NCRISC_RD_CMD_BUF, NOC_RET_ADDR_LO, dest_addr);
NOC_CMD_BUF_WRITE_REG(noc_index, NCRISC_RD_CMD_BUF, NOC_TARG_ADDR_LO, src_addr); // (uint32_t)src_addr
#ifdef ARCH_BLACKHOLE
NOC_CMD_BUF_WRITE_REG(noc_index, NCRISC_RD_CMD_BUF, NOC_TARG_ADDR_HI, src_noc_xy); // src_addr >> 32
#else
NOC_CMD_BUF_WRITE_REG(noc_index, NCRISC_RD_CMD_BUF, NOC_TARG_ADDR_MID, src_noc_xy); // src_addr >> 32
#endif
NOC_CMD_BUF_WRITE_REG(noc_index, NCRISC_RD_CMD_BUF, NOC_AT_LEN_BE, size); // len_bytes
NOC_CMD_BUF_WRITE_REG(noc_index, NCRISC_RD_CMD_BUF, NOC_CMD_CTRL, NOC_CTRL_SEND_REQ);
noc_reads_num_issued[noc_index] += 1;
Expand Down Expand Up @@ -1127,7 +1167,11 @@ struct InterleavedPow2AddrGenFast {
NOC_CMD_BUF_WRITE_REG(noc_index, NCRISC_WR_CMD_BUF, NOC_CTRL, noc_cmd_field);
NOC_CMD_BUF_WRITE_REG(noc_index, NCRISC_WR_CMD_BUF, NOC_TARG_ADDR_LO, src_addr);
NOC_CMD_BUF_WRITE_REG(noc_index, NCRISC_WR_CMD_BUF, NOC_RET_ADDR_LO, dest_addr); // (uint32_t)dest_addr
#ifdef ARCH_BLACKHOLE
NOC_CMD_BUF_WRITE_REG(noc_index, NCRISC_WR_CMD_BUF, NOC_RET_ADDR_HI, dest_noc_xy); // dest_addr >> 32
#else
NOC_CMD_BUF_WRITE_REG(noc_index, NCRISC_WR_CMD_BUF, NOC_RET_ADDR_MID, dest_noc_xy); // dest_addr >> 32
#endif
NOC_CMD_BUF_WRITE_REG(noc_index, NCRISC_WR_CMD_BUF, NOC_AT_LEN_BE, write_size_bytes); // len_bytes
NOC_CMD_BUF_WRITE_REG(noc_index, NCRISC_WR_CMD_BUF, NOC_CMD_CTRL, NOC_CTRL_SEND_REQ);
noc_nonposted_writes_num_issued[noc_index] += 1;
Expand Down Expand Up @@ -1864,7 +1908,11 @@ uint32_t noc_async_read_tile_dram_sharded_set_state(uint32_t bank_base_address,
NOC_CMD_BUF_WRITE_REG(noc_index, NCRISC_RD_CMD_BUF, NOC_CTRL, noc_rd_cmd_field);
}

#ifdef ARCH_BLACKHOLE
NOC_CMD_BUF_WRITE_REG(noc_index, NCRISC_RD_CMD_BUF, NOC_TARG_ADDR_HI, src_noc_xy); // src_addr >> 32
#else
NOC_CMD_BUF_WRITE_REG(noc_index, NCRISC_RD_CMD_BUF, NOC_TARG_ADDR_MID, src_noc_xy); // src_addr >> 32
#endif
NOC_CMD_BUF_WRITE_REG(noc_index, NCRISC_RD_CMD_BUF, NOC_AT_LEN_BE, page_size); // len_bytes

return src_addr_;
Expand Down

0 comments on commit e4248b3

Please sign in to comment.