Skip to content

Commit

Permalink
Remove firmware dependency on generated_bank_to_noc_coord_mapping.h (#…
Browse files Browse the repository at this point in the history
…15819)

### Ticket
#12844


### Problem description
Firmware build is dependent on runtime values.

### What's changed
This PR is same as this one -
#15070. Only difference is
the 'uninit' changes has been removed. The original PR changes were
reverted since the CI had ND hangs on N300. Removing 'uninit' has fixed
the hang. Tested with multiple CI runs.
https://github.com/tenstorrent/tt-metal/actions/runs/12222728886
https://github.com/tenstorrent/tt-metal/actions/runs/12206639040
https://github.com/tenstorrent/tt-metal/actions/runs/12227870153
https://github.com/tenstorrent/tt-metal/actions/runs/12237979786

Firmware now declares a global array for dram_bank_to_noc_xy,
l1_bank_to_noc_xy, bank_to_dram_offset, bank_to_l1_offset. During build,
values are written to L1 memory. Firmware during initialization would
copy these values from L1 to the above global arrays.
Moved l1_to_local_mem_copy to substitutes.cpp. Removed 'inline' keyword
as the function is used in multiple places and let LTO decide the
inlining

### Checklist
- [x] Post commit CI passes -
https://github.com/tenstorrent/tt-metal/actions/runs/12227870153
- [x] Blackhole Post commit (if applicable) -
https://github.com/tenstorrent/tt-metal/actions/runs/12227923651
- [ ] Model regression CI testing passes (if applicable)
- [ ] Device performance regression CI testing passes (if applicable)
- [ ] **(For models and ops writers)** Full [new
models](https://github.com/tenstorrent/tt-metal/actions/workflows/full-new-models-suite.yaml)
tests passes
- [ ] New/Existing tests provide coverage for changes
  • Loading branch information
spoojaryTT authored Dec 9, 2024
1 parent cac4dcb commit c1a246e
Show file tree
Hide file tree
Showing 28 changed files with 239 additions and 197 deletions.
10 changes: 9 additions & 1 deletion tt_metal/hw/firmware/src/brisc.cc
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,6 @@
#include "tools/profiler/kernel_profiler.hpp"
#include "dev_msgs.h"
#include "risc_attribs.h"
#include "generated_bank_to_noc_coord_mapping.h"
#include "circular_buffer.h"
#include "circular_buffer_init.h"
#include "dataflow_api.h"
Expand Down Expand Up @@ -67,6 +66,13 @@ uint32_t tt_l1_ptr *rta_l1_base __attribute__((used));
uint32_t tt_l1_ptr *crta_l1_base __attribute__((used));
uint32_t tt_l1_ptr *sem_l1_base[ProgrammableCoreType::COUNT] __attribute__((used));

// These arrays are stored in local memory of FW, but primarily used by the kernel which shares
// FW symbols. Hence mark these as 'used' so that FW compiler doesn't optimize it out.
uint16_t dram_bank_to_noc_xy[NUM_NOCS][NUM_DRAM_BANKS] __attribute__((used));
uint16_t l1_bank_to_noc_xy[NUM_NOCS][NUM_L1_BANKS] __attribute__((used));
int32_t bank_to_dram_offset[NUM_DRAM_BANKS] __attribute__((used));
int32_t bank_to_l1_offset[NUM_L1_BANKS] __attribute__((used));

#define MEM_MOVER_VIEW_IRAM_BASE_ADDR (0x4 << 12)

#if defined(PROFILE_KERNEL)
Expand Down Expand Up @@ -343,6 +349,8 @@ int main() {

do_crt1((uint32_t*)MEM_BRISC_INIT_LOCAL_L1_BASE_SCRATCH);

noc_bank_table_init(MEM_BANK_TO_NOC_SCRATCH);

mailboxes->launch_msg_rd_ptr = 0; // Initialize the rdptr to 0
noc_index = 0;
risc_init();
Expand Down
10 changes: 9 additions & 1 deletion tt_metal/hw/firmware/src/erisc.cc
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,6 @@
#include "ethernet/dataflow_api.h"
#include "ethernet/tunneling.h"
#include "firmware_common.h"
#include "generated_bank_to_noc_coord_mapping.h"
#include "noc_parameters.h"
#include "risc_attribs.h"
#include "tools/profiler/kernel_profiler.hpp"
Expand Down Expand Up @@ -34,6 +33,13 @@ uint32_t tt_l1_ptr *rta_l1_base __attribute__((used));
uint32_t tt_l1_ptr *crta_l1_base __attribute__((used));
uint32_t tt_l1_ptr *sem_l1_base[ProgrammableCoreType::COUNT] __attribute__((used));

// These arrays are stored in local memory of FW, but primarily used by the kernel which shares
// FW symbols. Hence mark these as 'used' so that FW compiler doesn't optimize it out.
uint16_t dram_bank_to_noc_xy[NUM_NOCS][NUM_DRAM_BANKS] __attribute__((used));
uint16_t l1_bank_to_noc_xy[NUM_NOCS][NUM_L1_BANKS] __attribute__((used));
int32_t bank_to_dram_offset[NUM_DRAM_BANKS] __attribute__((used));
int32_t bank_to_l1_offset[NUM_L1_BANKS] __attribute__((used));

void __attribute__((noinline)) Application(void) {
WAYPOINT("I");

Expand All @@ -43,6 +49,8 @@ void __attribute__((noinline)) Application(void) {

rtos_context_switch_ptr = (void (*)())RtosTable[0];

noc_bank_table_init(eth_l1_mem::address_map::ERISC_MEM_BANK_TO_NOC_SCRATCH);

risc_init();
noc_init(MEM_NOC_ATOMIC_RET_VAL_ADDR);

Expand Down
10 changes: 9 additions & 1 deletion tt_metal/hw/firmware/src/idle_erisc.cc
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,6 @@
#include "tools/profiler/kernel_profiler.hpp"
#include "dev_msgs.h"
#include "risc_attribs.h"
#include "generated_bank_to_noc_coord_mapping.h"
#include "circular_buffer.h"
#include "dataflow_api.h"

Expand All @@ -42,6 +41,13 @@ uint32_t tt_l1_ptr *sem_l1_base[ProgrammableCoreType::COUNT] __attribute__((used
uint8_t my_x[NUM_NOCS] __attribute__((used));
uint8_t my_y[NUM_NOCS] __attribute__((used));

// These arrays are stored in local memory of FW, but primarily used by the kernel which shares
// FW symbols. Hence mark these as 'used' so that FW compiler doesn't optimize it out.
uint16_t dram_bank_to_noc_xy[NUM_NOCS][NUM_DRAM_BANKS] __attribute__((used));
uint16_t l1_bank_to_noc_xy[NUM_NOCS][NUM_L1_BANKS] __attribute__((used));
int32_t bank_to_dram_offset[NUM_DRAM_BANKS] __attribute__((used));
int32_t bank_to_l1_offset[NUM_L1_BANKS] __attribute__((used));

//c_tensix_core core;

tt_l1_ptr mailboxes_t * const mailboxes = (tt_l1_ptr mailboxes_t *)(MEM_IERISC_MAILBOX_BASE);
Expand Down Expand Up @@ -101,6 +107,8 @@ int main() {
do_crt1((uint32_t *)MEM_IERISC_INIT_LOCAL_L1_BASE_SCRATCH);
uint32_t heartbeat = 0;

noc_bank_table_init(MEM_IERISC_BANK_TO_NOC_SCRATCH);

risc_init();

mailboxes->slave_sync.all = RUN_SYNC_MSG_ALL_SLAVES_DONE;
Expand Down
10 changes: 9 additions & 1 deletion tt_metal/hw/firmware/src/ncrisc.cc
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,6 @@
#include "firmware_common.h"
#include "tools/profiler/kernel_profiler.hpp"
#include "risc_attribs.h"
#include "generated_bank_to_noc_coord_mapping.h"
#include "circular_buffer.h"
#include "circular_buffer_init.h"

Expand Down Expand Up @@ -40,6 +39,13 @@ uint32_t tt_l1_ptr *rta_l1_base __attribute__((used));
uint32_t tt_l1_ptr *crta_l1_base __attribute__((used));
uint32_t tt_l1_ptr *sem_l1_base[ProgrammableCoreType::COUNT] __attribute__((used));

// These arrays are stored in local memory of FW, but primarily used by the kernel which shares
// FW symbols. Hence mark these as 'used' so that FW compiler doesn't optimize it out.
uint16_t dram_bank_to_noc_xy[NUM_NOCS][NUM_DRAM_BANKS] __attribute__((used));
int32_t bank_to_dram_offset[NUM_DRAM_BANKS] __attribute__((used));
uint16_t l1_bank_to_noc_xy[NUM_NOCS][NUM_L1_BANKS] __attribute__((used));
int32_t bank_to_l1_offset[NUM_L1_BANKS] __attribute__((used));

#if defined(PROFILE_KERNEL)
namespace kernel_profiler {
uint32_t wIndex __attribute__((used));
Expand Down Expand Up @@ -79,6 +85,8 @@ int main(int argc, char *argv[]) {

do_crt1((uint32_t tt_l1_ptr *)MEM_NCRISC_INIT_LOCAL_L1_BASE_SCRATCH);

noc_bank_table_init(MEM_BANK_TO_NOC_SCRATCH);

risc_init();

// If NCRISC has IRAM it needs to halt before BRISC copies data from L1 to IRAM
Expand Down
1 change: 0 additions & 1 deletion tt_metal/hw/firmware/src/slave_idle_erisc.cc
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,6 @@
#include "firmware_common.h"
#include "tools/profiler/kernel_profiler.hpp"
#include "risc_attribs.h"
#include "generated_bank_to_noc_coord_mapping.h"
#include "circular_buffer.h"

#include "debug/waypoint.h"
Expand Down
11 changes: 11 additions & 0 deletions tt_metal/hw/inc/blackhole/dev_mem_map.h
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,11 @@
#define MEM_NCRISC_LOCAL_SIZE (8 * 1024)
#define MEM_TRISC_LOCAL_SIZE (4 * 1024)

// Memory for (dram/l1)_bank_to_noc_xy arrays, size needs to be atleast 2 * NUM_NOCS * (NUM_DRAM_BANKS + NUM_L1_BANKS)
#define MEM_BANK_TO_NOC_XY_SIZE 1024
// Memory for bank_to_dram_offset and bank_to_l1_offset arrays, size needs to be atleast 4 * (NUM_DRAM_BANKS + NUM_L1_BANKS)
#define MEM_BANK_OFFSET_SIZE 1024

/////////////
// Firmware/kernel code holes
#define MEM_BRISC_FIRMWARE_SIZE (5 * 1024 + 128)
Expand Down Expand Up @@ -91,6 +96,9 @@
#define MEM_TRISC1_INIT_LOCAL_L1_BASE_SCRATCH (MEM_TRISC0_INIT_LOCAL_L1_BASE_SCRATCH + MEM_TRISC_LOCAL_SIZE)
#define MEM_TRISC2_INIT_LOCAL_L1_BASE_SCRATCH (MEM_TRISC1_INIT_LOCAL_L1_BASE_SCRATCH + MEM_TRISC_LOCAL_SIZE)

#define MEM_BANK_TO_NOC_SCRATCH (MEM_TRISC2_INIT_LOCAL_L1_BASE_SCRATCH + MEM_TRISC_LOCAL_SIZE)
#define MEM_BANK_TO_NOC_SIZE (MEM_BANK_TO_NOC_XY_SIZE + MEM_BANK_OFFSET_SIZE)

/////////////
// Stack info
// Increasing the stack size comes at the expense of less local memory for globals
Expand Down Expand Up @@ -130,6 +138,9 @@
#define MEM_IERISC_STACK_BASE (MEM_LOCAL_BASE + MEM_IERISC_LOCAL_SIZE - MEM_IERISC_STACK_SIZE)
#define MEM_SLAVE_IERISC_STACK_BASE (MEM_LOCAL_BASE + MEM_SLAVE_IERISC_LOCAL_SIZE - MEM_SLAVE_IERISC_STACK_SIZE)

#define MEM_IERISC_BANK_TO_NOC_SCRATCH (MEM_SLAVE_IERISC_INIT_LOCAL_L1_BASE_SCRATCH + MEM_SLAVE_IERISC_LOCAL_SIZE)
#define MEM_IERISC_BANK_TO_NOC_SIZE (MEM_BANK_TO_NOC_XY_SIZE + MEM_BANK_OFFSET_SIZE)

/////////////
// Padding/alignment restriction needed in linker scripts for erisc
#define MEM_IERISC_KERNEL_PAD 32
13 changes: 13 additions & 0 deletions tt_metal/hw/inc/blackhole/eth_l1_address_map.h
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,13 @@ struct address_map {
static constexpr std::int32_t DATA_BUFFER_SIZE_ETH = 4 * 1024;
static constexpr std::int32_t DATA_BUFFER_SIZE_NOC = 16 * 1024;
static constexpr std::int32_t DATA_BUFFER_SIZE = 24 * 1024;
// Memory for (dram/l1)_bank_to_noc_xy arrays, size needs to be atleast 2 * NUM_NOCS * (NUM_DRAM_BANKS +
// NUM_L1_BANKS)
static constexpr std::int32_t ERISC_MEM_BANK_TO_NOC_XY_SIZE = 1024;
// Memory for bank_to_dram_offset and bank_to_l1_offset arrays, size needs to be atleast 4 * (NUM_DRAM_BANKS +
// NUM_L1_BANKS)
static constexpr std::int32_t ERISC_MEM_BANK_OFFSET_SIZE = 1024;

// Kernel config buffer is WIP
// Size is presently based on the old sizes of the RTAs + CB config + Sems
static constexpr std::int32_t ERISC_L1_KERNEL_CONFIG_SIZE = 96 * 4 + 8 * 16;
Expand Down Expand Up @@ -65,6 +72,12 @@ struct address_map {

static_assert((ERISC_L1_UNRESERVED_BASE % 32) == 0);

// This scratch address is same as ERISC_L1_UNRESERVED_BASE, as the scratch space is used to copy data during
// runtime build, and is unused once FW copies the data to local memory during FW initialization.
static constexpr std::int32_t ERISC_MEM_BANK_TO_NOC_SCRATCH =
(ERISC_L1_KERNEL_CONFIG_BASE + ERISC_L1_KERNEL_CONFIG_SIZE + 31) & ~31;
static constexpr std::int32_t ERISC_MEM_BANK_TO_NOC_SIZE = ERISC_MEM_BANK_TO_NOC_XY_SIZE + ERISC_MEM_BANK_OFFSET_SIZE;

static constexpr std::int32_t LAUNCH_ERISC_APP_FLAG = L1_EPOCH_Q_BASE + 4;

// BIDIR Tunneling Kernel Space
Expand Down
14 changes: 11 additions & 3 deletions tt_metal/hw/inc/dataflow_api.h
Original file line number Diff line number Diff line change
Expand Up @@ -10,9 +10,7 @@
#include "chlkc_unpack_tile_dims.h"
#define DATA_FORMATS_DEFINED
#endif
#if __has_include("generated_bank_to_noc_coord_mapping.h")
#include "generated_bank_to_noc_coord_mapping.h"
#endif
#include <noc/noc_parameters.h>

#include <stdint.h>

Expand All @@ -37,9 +35,15 @@ constexpr uint8_t proc_type = static_cast<std::underlying_type_t<TensixProcessor
constexpr uint8_t noc_index = NOC_INDEX;
constexpr uint8_t noc_mode = NOC_MODE;
#else

extern uint8_t noc_index;
constexpr uint8_t noc_mode = DM_DEDICATED_NOC;
#endif
extern uint16_t dram_bank_to_noc_xy[NUM_NOCS][NUM_DRAM_BANKS];
extern int32_t bank_to_dram_offset[NUM_DRAM_BANKS];
extern uint16_t l1_bank_to_noc_xy[NUM_NOCS][NUM_L1_BANKS];
extern int32_t bank_to_l1_offset[NUM_L1_BANKS];

extern uint32_t tt_l1_ptr* rta_l1_base;
extern uint32_t tt_l1_ptr* crta_l1_base;
extern uint32_t tt_l1_ptr* sem_l1_base[];
Expand Down Expand Up @@ -89,6 +93,10 @@ constexpr uint32_t write_at_cmd_buf = NCRISC_AT_CMD_BUF;
#define EXCLUDE_START_X_OFFSET 8
#define DYNAMIC_NOC_DIRECTION(noc, direction) (noc == 1 ? 1 - direction : direction)

static_assert(NUM_NOCS == 2);
// "Scratch" in L1 has space allocated for 256 DRAM and L1 enteries, to store offsets and NOC XY data. (MEM_BANK_TO_NOC_XY_SCRATCH and MEM_BANK_OFFSET_SCRATCH)
static_assert((NUM_DRAM_BANKS + NUM_L1_BANKS) <= 256);

namespace interleaved_addr_gen {

template <bool DRAM>
Expand Down
50 changes: 20 additions & 30 deletions tt_metal/hw/inc/firmware_common.h
Original file line number Diff line number Diff line change
Expand Up @@ -13,39 +13,17 @@
#include "dev_mem_map.h"
#include "hostdevcommon/kernel_structs.h"
#include "dev_msgs.h"
#include "noc/noc_parameters.h"
#include "debug/dprint.h"

extern uint16_t dram_bank_to_noc_xy[NUM_NOCS][NUM_DRAM_BANKS];
extern int32_t bank_to_dram_offset[NUM_DRAM_BANKS];
extern uint16_t l1_bank_to_noc_xy[NUM_NOCS][NUM_L1_BANKS];
extern int32_t bank_to_l1_offset[NUM_L1_BANKS];

extern void kernel_init(uint32_t kernel_init);
extern void kernel_launch(uint32_t kernel_base_addr);

inline void l1_to_local_mem_copy(uint32_t* dst, uint32_t tt_l1_ptr* src, int32_t len) {
#pragma GCC unroll 0
while (len >= 3) {
auto v0 = src[0], v1 = src[1], v2 = src[2];
// 1) Make sure the optimizer does not think this is memcpy by
// hiding the pointer bookkeeping in an asm.
// 2) The scheduler doesn't know the above loads have 6 cycle
// latency. We emit the 3 bookkeeping adds as a single block
// in the load shadow before the stores. The optimizer will
// not be able to move these.
// 3) We don't need early clobbers here because of the +r
// constraint -- early clobbers would pessimize.
asm inline(
"addi %0,%0,3*%3\n\t"
"addi %1,%1,3*%3\n\t"
"addi %2,%2,-3"
: "+r"(src), "+r"(dst), "+r"(len)
: "i"(sizeof(v0)));
dst[-3] = v0, dst[-2] = v1, dst[-1] = v2;
}
// There are 0, 1 or 2 words of residue. This is smaller than a loop.
// We get smaller code layout by expecting the conditions to be true.
if (__builtin_expect(len >= 1, true)) {
dst[0] = src[0];
if (__builtin_expect(len >= 2, true)) {
dst[1] = src[1];
}
}
}
void l1_to_local_mem_copy(uint32_t* dst, uint32_t tt_l1_ptr* src, int32_t len);

inline void do_crt1(uint32_t tt_l1_ptr* data_image) {
// Clear bss.
Expand All @@ -59,6 +37,18 @@ inline void do_crt1(uint32_t tt_l1_ptr* data_image) {
l1_to_local_mem_copy(__ldm_data_start, data_image, __ldm_data_end - __ldm_data_start);
}

inline void noc_bank_table_init(uint64_t mem_bank_to_noc_addr) {
int32_t dram_to_noc_size_bytes = sizeof(dram_bank_to_noc_xy);
l1_to_local_mem_copy((uint*)dram_bank_to_noc_xy, (uint tt_l1_ptr*)mem_bank_to_noc_addr, dram_to_noc_size_bytes >> 2);
int32_t l1_to_noc_size_bytes = sizeof(l1_bank_to_noc_xy);
l1_to_local_mem_copy((uint*)l1_bank_to_noc_xy, (uint tt_l1_ptr*)(mem_bank_to_noc_addr + dram_to_noc_size_bytes), l1_to_noc_size_bytes >> 2);

int32_t dram_offsets_size_bytes = sizeof(bank_to_dram_offset);
l1_to_local_mem_copy((uint*)bank_to_dram_offset, (uint tt_l1_ptr*)(mem_bank_to_noc_addr + dram_to_noc_size_bytes + l1_to_noc_size_bytes), dram_offsets_size_bytes >> 2);
int32_t l1_offsets_size_bytes = sizeof(bank_to_l1_offset);
l1_to_local_mem_copy((uint*)bank_to_l1_offset, (uint tt_l1_ptr*)(mem_bank_to_noc_addr + dram_to_noc_size_bytes + l1_to_noc_size_bytes + dram_offsets_size_bytes), l1_offsets_size_bytes >> 2);
}

FORCE_INLINE
uint32_t firmware_config_init(
tt_l1_ptr mailboxes_t* const mailboxes, uint32_t core_type_index, uint32_t dispatch_class) {
Expand Down
14 changes: 12 additions & 2 deletions tt_metal/hw/inc/grayskull/dev_mem_map.h
Original file line number Diff line number Diff line change
Expand Up @@ -40,15 +40,20 @@
#define MEM_NCRISC_LOCAL_SIZE (4 * 1024)
#define MEM_TRISC_LOCAL_SIZE (2 * 1024)

// Memory for (dram/l1)_bank_to_noc_xy arrays, size needs to be atleast 2 * NUM_NOCS * (NUM_DRAM_BANKS + NUM_L1_BANKS)
#define MEM_BANK_TO_NOC_XY_SIZE 1024
// Memory for bank_to_dram_offset and bank_to_l1_offset arrays, size needs to be atleast 4 * (NUM_DRAM_BANKS + NUM_L1_BANKS)
#define MEM_BANK_OFFSET_SIZE 1024

#define NCRISC_HAS_IRAM 1
#define MEM_NCRISC_IRAM_BASE 0xFFC00000
#define MEM_NCRISC_IRAM_SIZE (16 * 1024)

/////////////
// Firmware/kernel code holes
#define MEM_BRISC_FIRMWARE_SIZE (5 * 1024 + 416)
#define MEM_BRISC_FIRMWARE_SIZE (5 * 1024 + 624)
// TODO: perhaps put NCRISC FW in the scratch area and free 1.5K after init (GS/WH)
#define MEM_NCRISC_FIRMWARE_SIZE 1616
#define MEM_NCRISC_FIRMWARE_SIZE 1824
#define MEM_TRISC0_FIRMWARE_SIZE 1536
#define MEM_TRISC1_FIRMWARE_SIZE 1536
#define MEM_TRISC2_FIRMWARE_SIZE 1536
Expand Down Expand Up @@ -100,6 +105,9 @@
#define MEM_TRISC1_INIT_LOCAL_L1_BASE_SCRATCH (MEM_TRISC0_INIT_LOCAL_L1_BASE_SCRATCH + MEM_TRISC_LOCAL_SIZE)
#define MEM_TRISC2_INIT_LOCAL_L1_BASE_SCRATCH (MEM_TRISC1_INIT_LOCAL_L1_BASE_SCRATCH + MEM_TRISC_LOCAL_SIZE)

#define MEM_BANK_TO_NOC_SCRATCH (MEM_TRISC2_INIT_LOCAL_L1_BASE_SCRATCH + MEM_TRISC_LOCAL_SIZE)
#define MEM_BANK_TO_NOC_SIZE (MEM_BANK_TO_NOC_XY_SIZE + MEM_BANK_OFFSET_SIZE)

/////////////
// Stack info
// Increasing the stack size comes at the expense of less local memory for globals
Expand All @@ -125,5 +133,7 @@
#define MEM_IERISC_MAP_END 0
#define MEM_IERISC_INIT_LOCAL_L1_BASE_SCRATCH 0
#define MEM_IERISC_STACK_SIZE 0
#define MEM_IERISC_BANK_TO_NOC_SCRATCH 0
#define MEM_IERISC_BANK_TO_NOC_SIZE 0

#define MEM_IERISC_KERNEL_PAD 0
2 changes: 2 additions & 0 deletions tt_metal/hw/inc/grayskull/eth_l1_address_map.h
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,8 @@ struct address_map {

static constexpr std::int32_t ERISC_L1_UNRESERVED_SIZE = 0;
static constexpr std::int32_t ERISC_L1_TUNNEL_BUFFER_SIZE = 0;
static constexpr std::int32_t ERISC_MEM_BANK_TO_NOC_SCRATCH = 0;
static constexpr std::int32_t ERISC_MEM_BANK_TO_NOC_SIZE = 0;

static constexpr std::uint32_t RETRAIN_COUNT_ADDR = 0x1EDC;
static constexpr std::uint32_t RETRAIN_FORCE_ADDR = 0x1EFC;
Expand Down
14 changes: 13 additions & 1 deletion tt_metal/hw/inc/wormhole/dev_mem_map.h
Original file line number Diff line number Diff line change
Expand Up @@ -41,13 +41,18 @@
#define MEM_NCRISC_LOCAL_SIZE (4 * 1024)
#define MEM_TRISC_LOCAL_SIZE (2 * 1024)

// Memory for (dram/l1)_bank_to_noc_xy arrays, size needs to be atleast 2 * NUM_NOCS * (NUM_DRAM_BANKS + NUM_L1_BANKS)
#define MEM_BANK_TO_NOC_XY_SIZE 1024
// Memory for bank_to_dram_offset and bank_to_l1_offset arrays, size needs to be atleast 4 * (NUM_DRAM_BANKS + NUM_L1_BANKS)
#define MEM_BANK_OFFSET_SIZE 1024

#define NCRISC_HAS_IRAM 1
#define MEM_NCRISC_IRAM_BASE 0xFFC00000
#define MEM_NCRISC_IRAM_SIZE (16 * 1024)

/////////////
// Firmware/kernel code holes
#define MEM_BRISC_FIRMWARE_SIZE (5 * 1024 + 64)
#define MEM_BRISC_FIRMWARE_SIZE (5 * 1024 + 256)
// TODO: perhaps put NCRISC FW in the scratch area and free 1.5K after init (GS/WH)
#define MEM_NCRISC_FIRMWARE_SIZE 1536
#define MEM_TRISC0_FIRMWARE_SIZE 1536
Expand Down Expand Up @@ -102,6 +107,9 @@
#define MEM_TRISC1_INIT_LOCAL_L1_BASE_SCRATCH (MEM_TRISC0_INIT_LOCAL_L1_BASE_SCRATCH + MEM_TRISC_LOCAL_SIZE)
#define MEM_TRISC2_INIT_LOCAL_L1_BASE_SCRATCH (MEM_TRISC1_INIT_LOCAL_L1_BASE_SCRATCH + MEM_TRISC_LOCAL_SIZE)

#define MEM_BANK_TO_NOC_SCRATCH (MEM_TRISC2_INIT_LOCAL_L1_BASE_SCRATCH + MEM_TRISC_LOCAL_SIZE)
#define MEM_BANK_TO_NOC_SIZE (MEM_BANK_TO_NOC_XY_SIZE + MEM_BANK_OFFSET_SIZE)

/////////////
// Stack info
// Increasing the stack size comes at the expense of less local memory for globals
Expand Down Expand Up @@ -137,6 +145,10 @@
#define MEM_IERISC_STACK_SIZE 1024
#define MEM_IERISC_STACK_BASE (MEM_LOCAL_BASE + MEM_IERISC_LOCAL_SIZE - MEM_IERISC_STACK_SIZE)

#define MEM_IERISC_BANK_TO_NOC_SCRATCH (MEM_IERISC_INIT_LOCAL_L1_BASE_SCRATCH + MEM_IERISC_LOCAL_SIZE)
#define MEM_IERISC_BANK_TO_NOC_SIZE (MEM_BANK_TO_NOC_XY_SIZE + MEM_BANK_OFFSET_SIZE)


/////////////
// Padding/alignment restriction needed in linker scripts for erisc
#define MEM_IERISC_KERNEL_PAD 32
Loading

0 comments on commit c1a246e

Please sign in to comment.