Skip to content

Commit

Permalink
#11307: Remove l1_data section
Browse files Browse the repository at this point in the history
test_pgm_dispatch perf still uses l1_data for creating fixed size kernels,
this will work w/ the ring buffer since the data is never referenced - renamed
l1_data section to l1_data_test_only to reflect this
  • Loading branch information
pgkeller committed Oct 5, 2024
1 parent e5dc6c2 commit 4816a62
Show file tree
Hide file tree
Showing 15 changed files with 41 additions and 42 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,9 @@
// SPDX-License-Identifier: Apache-2.0

// NULL kernel is not 0, subtract off overhead
#if KERNEL_BYTES > 30
uint8_t data1[KERNEL_BYTES-30] __attribute__ ((section ("l1_data"))) __attribute__((used));
#if KERNEL_BYTES > 16
constexpr uint32_t empty_kernel_bytes = 16;
uint8_t data1[KERNEL_BYTES - empty_kernel_bytes] __attribute__ ((section ("l1_data_test_only"))) __attribute__((used));
#endif

#ifdef KERNEL_GLOBAL
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,6 @@ void kernel_main() {

zero_l1_buf(reinterpret_cast<tt_l1_ptr uint32_t*>(queue_start_addr_words*PACKET_WORD_SIZE_BYTES),
queue_size_words);
noc_init();

for (uint32_t i = 0; i < num_src_endpoints; i++) {
src_rnd_state[i].init(prng_seed, src_endpoint_start_id+i);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -121,7 +121,6 @@ void kernel_main() {
test_results[PQ_TEST_MISC_INDEX] = 0xff000000;
test_results[PQ_TEST_MISC_INDEX + 1] = 0xcc000000 | src_endpoint_id;

noc_init();
zero_l1_buf(
reinterpret_cast<tt_l1_ptr uint32_t*>(queue_start_addr_words * PACKET_WORD_SIZE_BYTES), queue_size_words);

Expand Down
3 changes: 1 addition & 2 deletions tt_metal/hw/firmware/src/brisc.cc
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,6 @@ uint32_t noc_nonposted_writes_num_issued[NUM_NOCS] __attribute__((used));
uint32_t noc_nonposted_writes_acked[NUM_NOCS] __attribute__((used));
uint32_t noc_nonposted_atomics_acked[NUM_NOCS] __attribute__((used));
uint32_t noc_posted_writes_num_issued[NUM_NOCS] __attribute__((used));
uint32_t atomic_ret_val __attribute__((section("l1_data"))) __attribute__((used));

CBInterface cb_interface[NUM_CIRCULAR_BUFFERS] __attribute__((used));

Expand Down Expand Up @@ -342,7 +341,7 @@ int main() {
noc_index = 0;
risc_init();
device_setup();
noc_init();
noc_init(MEM_NOC_ATOMIC_RET_VAL_ADDR);

// Set ncrisc's resume address to 0 so we know when ncrisc has overwritten it
mailboxes->ncrisc_halt.resume_addr = 0;
Expand Down
3 changes: 1 addition & 2 deletions tt_metal/hw/firmware/src/erisc.cc
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,6 @@ uint32_t noc_nonposted_writes_num_issued[NUM_NOCS] __attribute__((used));
uint32_t noc_nonposted_writes_acked[NUM_NOCS] __attribute__((used));
uint32_t noc_nonposted_atomics_acked[NUM_NOCS] __attribute__((used));
uint32_t noc_posted_writes_num_issued[NUM_NOCS] __attribute__((used));
uint32_t atomic_ret_val __attribute__ ((section ("l1_data"))) __attribute__((used));

uint32_t tt_l1_ptr *rta_l1_base __attribute__((used));
uint32_t tt_l1_ptr *crta_l1_base __attribute__((used));
Expand All @@ -47,7 +46,7 @@ void __attribute__((section("erisc_l1_code.1"), noinline)) Application(void) {
wzerorange(__ldm_bss_start, __ldm_bss_end);

risc_init();
noc_init();
noc_init(MEM_NOC_ATOMIC_RET_VAL_ADDR);
wzerorange(__ldm_bss_start, __ldm_bss_end);

for (uint32_t n = 0; n < NUM_NOCS; n++) {
Expand Down
3 changes: 1 addition & 2 deletions tt_metal/hw/firmware/src/idle_erisc.cc
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,6 @@ uint32_t noc_nonposted_writes_num_issued[NUM_NOCS] __attribute__((used));
uint32_t noc_nonposted_writes_acked[NUM_NOCS] __attribute__((used));
uint32_t noc_nonposted_atomics_acked[NUM_NOCS] __attribute__((used));
uint32_t noc_posted_writes_num_issued[NUM_NOCS] __attribute__((used));
uint32_t atomic_ret_val __attribute__ ((section ("l1_data"))) __attribute__((used));

uint32_t tt_l1_ptr *rta_l1_base __attribute__((used));
uint32_t tt_l1_ptr *crta_l1_base __attribute__((used));
Expand Down Expand Up @@ -102,7 +101,7 @@ int main() {

risc_init();
//device_setup();
noc_init();
noc_init(MEM_NOC_ATOMIC_RET_VAL_ADDR);

mailboxes->go_message.signal = RUN_MSG_DONE;
mailboxes->launch_msg_rd_ptr = 0; // Initialize the rdptr to 0
Expand Down
14 changes: 9 additions & 5 deletions tt_metal/hw/inc/blackhole/dev_mem_map.h
Original file line number Diff line number Diff line change
Expand Up @@ -15,12 +15,16 @@
// Before adding a define here, read the following:
// 1) Any "truly global" address must be specified explicitly here. Truly
// global addresses are addresses that are referenced on both the host and
// device
// device or between processors
// 2) Memory section sizes must be specified here, these are used in the
// linker scripts
// 3) Device static/global variables generally should NOT be listed here. If
// they are global to a core, declare them in the that core's source code and
// tag them if needed with a section (e.g., "l1_data")
// 3) static/global variables generally should NOT be listed here. If
// they are global to a processor, declare them in the that processor's source
// code, they will get placed in local memory
// 4) L1 data sections are no longer supported as addressing them with XIP
// binaries requires runtime address patching. Instead of using named
// variables in the L1 data section use a mailbox (or address in the mailbox
// range and initialize explicitly)
//

/////////////
Expand All @@ -39,7 +43,6 @@

/////////////
// Firmware/kernel code holes
#define MEM_BOOT_CODE_SIZE 4
#define MEM_BRISC_FIRMWARE_SIZE (10 * 1024 + MEM_BRISC_LOCAL_SIZE)
#define MEM_NCRISC_FIRMWARE_SIZE (16 * 1024 + MEM_NCRISC_LOCAL_SIZE)
#define MEM_TRISC0_FIRMWARE_SIZE (16 * 1024 + MEM_TRISC_LOCAL_SIZE)
Expand All @@ -48,6 +51,7 @@
#define MEM_ZEROS_SIZE 512

#define MEM_BOOT_CODE_BASE 0
#define MEM_NOC_ATOMIC_RET_VAL_ADDR 4
#define MEM_L1_BARRIER 12
#define MEM_MAILBOX_BASE 16
// Magic size must be big enough to hold dev_msgs_t. static_asserts will fire if this is too small
Expand Down
5 changes: 2 additions & 3 deletions tt_metal/hw/inc/blackhole/noc_nonblocking_api.h
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,6 @@ extern uint32_t noc_nonposted_writes_num_issued[NUM_NOCS];
extern uint32_t noc_nonposted_writes_acked[NUM_NOCS];
extern uint32_t noc_nonposted_atomics_acked[NUM_NOCS];
extern uint32_t noc_posted_writes_num_issued[NUM_NOCS];
extern uint32_t atomic_ret_val;

inline __attribute__((always_inline)) void NOC_CMD_BUF_WRITE_REG(
uint32_t noc, uint32_t buf, uint32_t addr, uint32_t val) {
Expand Down Expand Up @@ -166,7 +165,7 @@ inline __attribute__((always_inline)) bool ncrisc_noc_nonposted_atomics_flushed(
return (NOC_STATUS_READ_REG(noc, NIU_MST_ATOMIC_RESP_RECEIVED) == noc_nonposted_atomics_acked[noc]);
}

inline __attribute__((always_inline)) void noc_init() {
inline __attribute__((always_inline)) void noc_init(uint32_t atomic_ret_val) {
#pragma GCC unroll 0
for (int noc = 0; noc < NUM_NOCS; noc++) {
uint32_t noc_id_reg = NOC_CMD_BUF_READ_REG(noc, 0, NOC_NODE_ID);
Expand All @@ -179,7 +178,7 @@ inline __attribute__((always_inline)) void noc_init() {
NOC_CMD_BUF_WRITE_REG(noc, NCRISC_WR_REG_CMD_BUF, NOC_TARG_ADDR_MID, 0x0);
NOC_CMD_BUF_WRITE_REG(noc, NCRISC_WR_REG_CMD_BUF, NOC_TARG_ADDR_COORDINATE, (uint32_t)(xy_local_addr >> NOC_ADDR_COORD_SHIFT) & NOC_COORDINATE_MASK);

uint64_t atomic_ret_addr = NOC_XY_ADDR(my_x, my_y, (uint32_t)(&atomic_ret_val));
uint64_t atomic_ret_addr = NOC_XY_ADDR(my_x, my_y, atomic_ret_val);
NOC_CMD_BUF_WRITE_REG(noc, NCRISC_AT_CMD_BUF, NOC_RET_ADDR_LO, (uint32_t)(atomic_ret_addr & 0xFFFFFFFF));
NOC_CMD_BUF_WRITE_REG(noc, NCRISC_AT_CMD_BUF, NOC_RET_ADDR_MID, 0x0);
NOC_CMD_BUF_WRITE_REG(noc, NCRISC_AT_CMD_BUF, NOC_RET_ADDR_COORDINATE, (uint32_t)(atomic_ret_addr >> NOC_ADDR_COORD_SHIFT) & NOC_COORDINATE_MASK);
Expand Down
15 changes: 9 additions & 6 deletions tt_metal/hw/inc/grayskull/dev_mem_map.h
Original file line number Diff line number Diff line change
Expand Up @@ -15,12 +15,16 @@
// Before adding a define here, read the following:
// 1) Any "truly global" address must be specified explicitly here. Truly
// global addresses are addresses that are referenced on both the host and
// device
// device or between processors
// 2) Memory section sizes must be specified here, these are used in the
// linker scripts
// 3) Device static/global variables generally should NOT be listed here. If
// they are global to a core, declare them in the that core's source code and
// tag them if needed with a section (e.g., "l1_data")
// 3) static/global variables generally should NOT be listed here. If
// they are global to a processor, declare them in the that processor's source
// code, they will get placed in local memory
// 4) L1 data sections are no longer supported as addressing them with XIP
// binaries requires runtime address patching. Instead of using named
// variables in the L1 data section use a mailbox (or address in the mailbox
// range and initialize explicitly)
//

/////////////
Expand All @@ -42,8 +46,6 @@

/////////////
// Firmware/kernel code holes
#define MEM_BOOT_CODE_SIZE 4

#define MEM_BRISC_FIRMWARE_SIZE (10 * 1024 + MEM_BRISC_LOCAL_SIZE)
#define MEM_NCRISC_FIRMWARE_SIZE (16 * 1024)
#define MEM_TRISC0_FIRMWARE_SIZE (16 * 1024 + MEM_TRISC_LOCAL_SIZE)
Expand All @@ -53,6 +55,7 @@
#define MEM_ZEROS_SIZE 512

#define MEM_BOOT_CODE_BASE 0
#define MEM_NOC_ATOMIC_RET_VAL_ADDR 4
#define MEM_L1_BARRIER 12
#define MEM_MAILBOX_BASE 16
// Magic size must be big enough to hold dev_msgs_t. static_asserts will fire if this is too small
Expand Down
5 changes: 2 additions & 3 deletions tt_metal/hw/inc/grayskull/noc_nonblocking_api.h
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,6 @@ extern uint32_t noc_nonposted_writes_num_issued[NUM_NOCS];
extern uint32_t noc_nonposted_writes_acked[NUM_NOCS];
extern uint32_t noc_nonposted_atomics_acked[NUM_NOCS];
extern uint32_t noc_posted_writes_num_issued[NUM_NOCS];
extern uint32_t atomic_ret_val;

inline __attribute__((always_inline)) void NOC_CMD_BUF_WRITE_REG(uint32_t noc, uint32_t buf, uint32_t addr, uint32_t val) {
uint32_t offset = (buf << NOC_CMD_BUF_OFFSET_BIT) + (noc << NOC_INSTANCE_OFFSET_BIT) + addr;
Expand Down Expand Up @@ -139,7 +138,7 @@ inline __attribute__((always_inline)) bool ncrisc_noc_nonposted_atomics_flushed(
return (NOC_STATUS_READ_REG(noc, NIU_MST_ATOMIC_RESP_RECEIVED) == noc_nonposted_atomics_acked[noc]);
}

inline __attribute__((always_inline)) void noc_init() {
inline __attribute__((always_inline)) void noc_init(uint32_t atomic_ret_val) {
#pragma GCC unroll 0
for (int noc = 0; noc < NUM_NOCS; noc++) {
uint32_t noc_id_reg = NOC_CMD_BUF_READ_REG(noc, 0, NOC_NODE_ID);
Expand All @@ -150,7 +149,7 @@ inline __attribute__((always_inline)) void noc_init() {
NOC_CMD_BUF_WRITE_REG(noc, NCRISC_WR_CMD_BUF, NOC_TARG_ADDR_COORDINATE, (uint32_t)(xy_local_addr >> NOC_ADDR_COORD_SHIFT));
NOC_CMD_BUF_WRITE_REG(noc, NCRISC_WR_REG_CMD_BUF, NOC_TARG_ADDR_COORDINATE, (uint32_t)(xy_local_addr >> NOC_ADDR_COORD_SHIFT));

uint64_t atomic_ret_addr = NOC_XY_ADDR(my_x, my_y, (uint32_t)(&atomic_ret_val));
uint64_t atomic_ret_addr = NOC_XY_ADDR(my_x, my_y, atomic_ret_val);
NOC_CMD_BUF_WRITE_REG(noc, NCRISC_AT_CMD_BUF, NOC_RET_ADDR_LO, (uint32_t)(atomic_ret_addr & 0xFFFFFFFF));
NOC_CMD_BUF_WRITE_REG(noc, NCRISC_AT_CMD_BUF, NOC_RET_ADDR_COORDINATE, (uint32_t)(atomic_ret_addr >> NOC_ADDR_COORD_SHIFT));

Expand Down
14 changes: 9 additions & 5 deletions tt_metal/hw/inc/wormhole/dev_mem_map.h
Original file line number Diff line number Diff line change
Expand Up @@ -15,12 +15,16 @@
// Before adding a define here, read the following:
// 1) Any "truly global" address must be specified explicitly here. Truly
// global addresses are addresses that are referenced on both the host and
// device
// device or between processors
// 2) Memory section sizes must be specified here, these are used in the
// linker scripts
// 3) Device static/global variables generally should NOT be listed here. If
// they are global to a core, declare them in the that core's source code and
// tag them if needed with a section (e.g., "l1_data")
// 3) static/global variables generally should NOT be listed here. If
// they are global to a processor, declare them in the that processor's source
// code, they will get placed in local memory
// 4) L1 data sections are no longer supported as addressing them with XIP
// binaries requires runtime address patching. Instead of using named
// variables in the L1 data section use a mailbox (or address in the mailbox
// range and initialize explicitly)
//

/////////////
Expand All @@ -43,7 +47,6 @@

/////////////
// Firmware/kernel code holes
#define MEM_BOOT_CODE_SIZE 4
#define MEM_BRISC_FIRMWARE_SIZE (10 * 1024 + MEM_BRISC_LOCAL_SIZE)
#define MEM_NCRISC_FIRMWARE_SIZE (16 * 1024)
#define MEM_TRISC0_FIRMWARE_SIZE (16 * 1024 + MEM_TRISC_LOCAL_SIZE)
Expand All @@ -53,6 +56,7 @@
#define MEM_ZEROS_SIZE 512

#define MEM_BOOT_CODE_BASE 0
#define MEM_NOC_ATOMIC_RET_VAL_ADDR 4
#define MEM_L1_BARRIER 12
#define MEM_MAILBOX_BASE 16
// Magic size must be big enough to hold dev_msgs_t. static_asserts will fire if this is too small
Expand Down
5 changes: 2 additions & 3 deletions tt_metal/hw/inc/wormhole/noc_nonblocking_api.h
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,6 @@ extern uint32_t noc_nonposted_writes_num_issued[NUM_NOCS];
extern uint32_t noc_nonposted_writes_acked[NUM_NOCS];
extern uint32_t noc_nonposted_atomics_acked[NUM_NOCS];
extern uint32_t noc_posted_writes_num_issued[NUM_NOCS];
extern uint32_t atomic_ret_val;

inline __attribute__((always_inline)) void NOC_CMD_BUF_WRITE_REG(uint32_t noc, uint32_t buf, uint32_t addr, uint32_t val) {
uint32_t offset = (buf << NOC_CMD_BUF_OFFSET_BIT) + (noc << NOC_INSTANCE_OFFSET_BIT) + addr;
Expand Down Expand Up @@ -149,7 +148,7 @@ inline __attribute__((always_inline)) bool ncrisc_noc_nonposted_atomics_flushed(
return (NOC_STATUS_READ_REG(noc, NIU_MST_ATOMIC_RESP_RECEIVED) == noc_nonposted_atomics_acked[noc]);
}

inline __attribute__((always_inline)) void noc_init() {
inline __attribute__((always_inline)) void noc_init(uint32_t atomic_ret_val) {
#pragma GCC unroll 0
for (int noc = 0; noc < NUM_NOCS; noc++) {
uint32_t noc_id_reg = NOC_CMD_BUF_READ_REG(noc, 0, NOC_NODE_ID);
Expand All @@ -160,7 +159,7 @@ inline __attribute__((always_inline)) void noc_init() {
NOC_CMD_BUF_WRITE_REG(noc, NCRISC_WR_CMD_BUF, NOC_TARG_ADDR_COORDINATE, (uint32_t)(xy_local_addr >> NOC_ADDR_COORD_SHIFT));
NOC_CMD_BUF_WRITE_REG(noc, NCRISC_WR_REG_CMD_BUF, NOC_TARG_ADDR_COORDINATE, (uint32_t)(xy_local_addr >> NOC_ADDR_COORD_SHIFT));

uint64_t atomic_ret_addr = NOC_XY_ADDR(my_x, my_y, (uint32_t)(&atomic_ret_val));
uint64_t atomic_ret_addr = NOC_XY_ADDR(my_x, my_y, atomic_ret_val);
NOC_CMD_BUF_WRITE_REG(noc, NCRISC_AT_CMD_BUF, NOC_RET_ADDR_LO, (uint32_t)(atomic_ret_addr & 0xFFFFFFFF));
NOC_CMD_BUF_WRITE_REG(noc, NCRISC_AT_CMD_BUF, NOC_RET_ADDR_COORDINATE, (uint32_t)(atomic_ret_addr >> NOC_ADDR_COORD_SHIFT));

Expand Down
6 changes: 1 addition & 5 deletions tt_metal/hw/toolchain/sections.ld
Original file line number Diff line number Diff line change
Expand Up @@ -68,11 +68,7 @@ SECTIONS

l1_data :
{
*(l1_data)
} > REGION_CODE
l1_data_noinit (NOLOAD) :
{
*(l1_data_noinit)
*(l1_data_test_only)
} > REGION_CODE

. = ALIGN(. + MEM_PAD, MEM_ALIGN);
Expand Down
2 changes: 1 addition & 1 deletion tt_metal/impl/dispatch/kernels/cq_dispatch_slave.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,7 @@ void dispatch_s_wr_reg_cmd_buf_init() {

FORCE_INLINE
void dispatch_s_atomic_cmd_buf_init() {
uint64_t atomic_ret_addr = get_noc_addr_helper(my_noc_xy, (uint32_t)(&atomic_ret_val));
uint64_t atomic_ret_addr = get_noc_addr_helper(my_noc_xy, MEM_NOC_ATOMIC_RET_VAL_ADDR);
NOC_CMD_BUF_WRITE_REG(my_noc_index, DISPATCH_S_ATOMIC_CMD_BUF, NOC_RET_ADDR_LO, (uint32_t)(atomic_ret_addr & 0xFFFFFFFF));
NOC_CMD_BUF_WRITE_REG(my_noc_index, DISPATCH_S_ATOMIC_CMD_BUF, NOC_RET_ADDR_COORDINATE, (uint32_t)(atomic_ret_addr >> NOC_ADDR_COORD_SHIFT));
}
Expand Down
1 change: 0 additions & 1 deletion tt_metal/impl/dispatch/kernels/eth_tunneler.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,6 @@ constexpr uint32_t inner_stop_mux_d_bypass = get_compile_time_arg_val(15);

void kernel_main() {
rtos_context_switch_ptr = (void (*)())RtosTable[0];
noc_init();

write_test_results(test_results, PQ_TEST_STATUS_INDEX, PACKET_QUEUE_TEST_STARTED);
write_test_results(test_results, PQ_TEST_MISC_INDEX, 0xff000000);
Expand Down

0 comments on commit 4816a62

Please sign in to comment.