Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Pkeller/memmap profiler #12067

Merged
merged 6 commits into from
Sep 16, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
38 changes: 6 additions & 32 deletions tt_metal/hostdevcommon/common_runtime_address_map.h
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
#include "common_values.hpp"
#include "dev_mem_map.h"
#include "noc/noc_parameters.h"
#include "hostdevcommon/profiler_common.h"

/*
* This file contains addresses that are visible to both host and device compiled code.
Expand All @@ -20,58 +21,31 @@ constexpr static std::uint32_t DRAM_ALIGNMENT = NOC_DRAM_READ_ALIGNMENT_BYTES >=
constexpr static std::uint32_t DRAM_BARRIER_SIZE = ((sizeof(uint32_t) + DRAM_ALIGNMENT - 1) / DRAM_ALIGNMENT) * DRAM_ALIGNMENT;
constexpr static std::uint32_t DRAM_UNRESERVED_BASE = DRAM_BARRIER_BASE + DRAM_BARRIER_SIZE; // Start of unreserved space

constexpr static std::uint32_t L1_ALIGNMENT = NOC_L1_READ_ALIGNMENT_BYTES >= NOC_L1_WRITE_ALIGNMENT_BYTES ? NOC_L1_READ_ALIGNMENT_BYTES : NOC_L1_WRITE_ALIGNMENT_BYTES;

// Take max alignment to satisfy NoC rd/wr constraints
// Tensix/Eth -> PCIe/DRAM src and dst addrs must be L1_ALIGNMENT aligned
// PCIe/DRAM -> Tensix/Eth src and dst addrs must be DRAM_ALIGNMENT aligned
// Tensix/Eth <-> Tensix/Eth src and dst addrs must be L1_ALIGNMENT aligned
constexpr static std::uint32_t ALLOCATOR_ALIGNMENT = DRAM_ALIGNMENT >= L1_ALIGNMENT ? DRAM_ALIGNMENT : L1_ALIGNMENT;

// TODO: these could be moved to even lower addresses -- 5 RISC-V hexes combined don't need 100 KB
constexpr static std::uint32_t PROFILER_L1_MARKER_UINT32_SIZE = 2;
constexpr static std::uint32_t PROFILER_L1_MARKER_BYTES_SIZE = PROFILER_L1_MARKER_UINT32_SIZE * sizeof(uint32_t);

constexpr static std::uint32_t PROFILER_L1_PROGRAM_ID_COUNT = 2;
constexpr static std::uint32_t PROFILER_L1_GUARANTEED_MARKER_COUNT = 4;

constexpr static std::uint32_t PROFILER_L1_OPTIONAL_MARKER_COUNT = 250;
constexpr static std::uint32_t PROFILER_L1_OP_MIN_OPTIONAL_MARKER_COUNT = 2;

constexpr static std::uint32_t PROFILER_L1_VECTOR_SIZE = (PROFILER_L1_OPTIONAL_MARKER_COUNT + PROFILER_L1_GUARANTEED_MARKER_COUNT + PROFILER_L1_PROGRAM_ID_COUNT) * PROFILER_L1_MARKER_UINT32_SIZE;
constexpr static std::uint32_t PROFILER_L1_BUFFER_SIZE = PROFILER_L1_VECTOR_SIZE * sizeof(uint32_t);

constexpr static std::uint32_t PROFILER_L1_BUFFER_BR = MEM_MAP_END;
constexpr static std::uint32_t PROFILER_L1_BUFFER_NC = PROFILER_L1_BUFFER_BR + PROFILER_L1_BUFFER_SIZE;
constexpr static std::uint32_t PROFILER_L1_BUFFER_T0 = PROFILER_L1_BUFFER_NC + PROFILER_L1_BUFFER_SIZE;
constexpr static std::uint32_t PROFILER_L1_BUFFER_T1 = PROFILER_L1_BUFFER_T0 + PROFILER_L1_BUFFER_SIZE;
constexpr static std::uint32_t PROFILER_L1_BUFFER_T2 = PROFILER_L1_BUFFER_T1 + PROFILER_L1_BUFFER_SIZE;

constexpr static std::uint32_t PROFILER_L1_END_ADDRESS = PROFILER_L1_BUFFER_T2 + PROFILER_L1_BUFFER_SIZE;

// TODO: move these out of the memory map into profiler code
constexpr static std::uint32_t PROFILER_OP_SUPPORT_COUNT = 1000;
constexpr static std::uint32_t PROFILER_FULL_HOST_VECTOR_SIZE_PER_RISC = PROFILER_L1_MARKER_UINT32_SIZE * (PROFILER_L1_PROGRAM_ID_COUNT + PROFILER_L1_GUARANTEED_MARKER_COUNT + PROFILER_L1_OP_MIN_OPTIONAL_MARKER_COUNT) * PROFILER_OP_SUPPORT_COUNT;
constexpr static std::uint32_t PROFILER_FULL_HOST_VECTOR_SIZE_PER_RISC = kernel_profiler::PROFILER_L1_MARKER_UINT32_SIZE * (kernel_profiler::PROFILER_L1_PROGRAM_ID_COUNT + kernel_profiler::PROFILER_L1_GUARANTEED_MARKER_COUNT + kernel_profiler::PROFILER_L1_OP_MIN_OPTIONAL_MARKER_COUNT) * PROFILER_OP_SUPPORT_COUNT;
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

does PROFILER_FULL_HOST_VECTOR_SIZE_PER_RISC need to be in here? is it only used by profiling code? likewise for PROFILER_FULL_HOST_BUFFER_SIZE_PER_RISC

constexpr static std::uint32_t PROFILER_FULL_HOST_BUFFER_SIZE_PER_RISC = PROFILER_FULL_HOST_VECTOR_SIZE_PER_RISC * sizeof(uint32_t);
constexpr static std::uint32_t PROFILER_RISC_COUNT = 5;

static_assert (PROFILER_FULL_HOST_BUFFER_SIZE_PER_RISC > PROFILER_L1_BUFFER_SIZE);
static_assert (PROFILER_FULL_HOST_BUFFER_SIZE_PER_RISC > kernel_profiler::PROFILER_L1_BUFFER_SIZE);

// Kernel config buffer is WIP
// Size is presently based on the old sizes of the RTAs + CB config + Sems
// plus some extra space freed up in the mem map
constexpr static std::uint32_t L1_KERNEL_CONFIG_BASE = PROFILER_L1_END_ADDRESS;
constexpr static std::uint32_t L1_KERNEL_CONFIG_BASE = MEM_MAP_END;
constexpr static std::uint32_t L1_KERNEL_CONFIG_SIZE = 4 * 1024 + 256 + 128 + 512;

constexpr static std::uint32_t IDLE_ERISC_L1_KERNEL_CONFIG_BASE = 32 * 1024;

constexpr static std::uint32_t NUM_CIRCULAR_BUFFERS = 32;
constexpr static std::uint32_t UINT32_WORDS_PER_CIRCULAR_BUFFER_CONFIG = 4;

constexpr static std::uint32_t PROFILER_L1_CONTROL_VECTOR_SIZE = 32;
constexpr static std::uint32_t PROFILER_L1_CONTROL_BUFFER_SIZE = PROFILER_L1_CONTROL_VECTOR_SIZE * sizeof(uint32_t);
constexpr static std::uint32_t PROFILER_L1_BUFFER_CONTROL = L1_KERNEL_CONFIG_BASE + L1_KERNEL_CONFIG_SIZE;

constexpr static std::uint32_t L1_UNRESERVED_BASE = ((PROFILER_L1_BUFFER_CONTROL + PROFILER_L1_CONTROL_BUFFER_SIZE - 1) | (DRAM_ALIGNMENT - 1)) + 1;
constexpr static std::uint32_t L1_UNRESERVED_BASE = ((L1_KERNEL_CONFIG_BASE + L1_KERNEL_CONFIG_SIZE - 1) | (DRAM_ALIGNMENT - 1)) + 1;

constexpr static std::uint32_t ERISC_L1_UNRESERVED_BASE = L1_UNRESERVED_BASE; // Start of unreserved space

Expand Down
17 changes: 12 additions & 5 deletions tt_metal/hostdevcommon/profiler_common.h
Original file line number Diff line number Diff line change
Expand Up @@ -24,18 +24,16 @@ namespace kernel_profiler{

enum ControlBuffer
{
HOST_BUFFER_END_INDEX_BR,
HOST_BUFFER_END_INDEX_BR_ER,
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

is this because eth cores treated as if there is one brisc? this may not be scalable for BH which has 2 riscvs on eth cores

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Good call, this will turn into a union and each device can define its own struct for controls.

HOST_BUFFER_END_INDEX_NC,
HOST_BUFFER_END_INDEX_T0,
HOST_BUFFER_END_INDEX_T1,
HOST_BUFFER_END_INDEX_T2,
HOST_BUFFER_END_INDEX_ER,
DEVICE_BUFFER_END_INDEX_BR,
DEVICE_BUFFER_END_INDEX_BR_ER,
DEVICE_BUFFER_END_INDEX_NC,
DEVICE_BUFFER_END_INDEX_T0,
DEVICE_BUFFER_END_INDEX_T1,
DEVICE_BUFFER_END_INDEX_T2,
DEVICE_BUFFER_END_INDEX_ER,
FW_RESET_H,
FW_RESET_L,
DRAM_PROFILER_ADDRESS,
Expand All @@ -47,6 +45,15 @@ namespace kernel_profiler{
PROFILER_DONE,
};


// TODO: use data types in profile_msg_t rather than addresses/sizes
constexpr static std::uint32_t PROFILER_L1_CONTROL_VECTOR_SIZE = 32;
constexpr static std::uint32_t PROFILER_L1_CONTROL_BUFFER_SIZE = PROFILER_L1_CONTROL_VECTOR_SIZE * sizeof(uint32_t);
constexpr static std::uint32_t PROFILER_L1_MARKER_UINT32_SIZE = 2;
constexpr static std::uint32_t PROFILER_L1_PROGRAM_ID_COUNT = 2;
constexpr static std::uint32_t PROFILER_L1_GUARANTEED_MARKER_COUNT = 4;
constexpr static std::uint32_t PROFILER_L1_OPTIONAL_MARKER_COUNT = 250;
constexpr static std::uint32_t PROFILER_L1_OP_MIN_OPTIONAL_MARKER_COUNT = 2;
constexpr static std::uint32_t PROFILER_L1_VECTOR_SIZE = (PROFILER_L1_OPTIONAL_MARKER_COUNT + PROFILER_L1_GUARANTEED_MARKER_COUNT + PROFILER_L1_PROGRAM_ID_COUNT) * PROFILER_L1_MARKER_UINT32_SIZE;
constexpr static std::uint32_t PROFILER_L1_BUFFER_SIZE = PROFILER_L1_VECTOR_SIZE * sizeof(uint32_t);

}
3 changes: 2 additions & 1 deletion tt_metal/hw/inc/blackhole/dev_mem_map.h
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,8 @@
#define MEM_L1_BARRIER 12
#define MEM_MAILBOX_BASE 16
// Magic size must be big enough to hold dev_msgs_t. static_asserts will fire if this is too small
#define MEM_MAILBOX_END (MEM_MAILBOX_BASE + 1356)
#define MEM_MAILBOX_SIZE 5 * 4 * 512 + 4 * 32 + 1364
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

comments for constants?

#define MEM_MAILBOX_END (MEM_MAILBOX_BASE + MEM_MAILBOX_SIZE)
#define MEM_IERISC_MAILBOX_BASE 1024
#define MEM_IERISC_MAILBOX_END (MEM_IERISC_MAILBOX_BASE + 128)
#define MEM_ZEROS_BASE ((MEM_MAILBOX_END + 31) & ~31)
Expand Down
11 changes: 2 additions & 9 deletions tt_metal/hw/inc/blackhole/eth_l1_address_map.h
Original file line number Diff line number Diff line change
Expand Up @@ -60,16 +60,9 @@ struct address_map {
// erisc early exit functionality re-uses mailboxes_t::ncrisc_halt_msg_t::stack_save memory
static constexpr std::int32_t ERISC_MEM_MAILBOX_STACK_SAVE = ERISC_MEM_MAILBOX_BASE + 4;

// Kernel config buffer is WIP
// Size is presently based on the old sizes of the RTAs + CB config + Sems
static constexpr std::uint32_t PROFILER_L1_BUFFER_ER = ERISC_MEM_MAILBOX_BASE + 288 + 256 + 16;
static constexpr std::uint32_t PROFILER_L1_BUFFER_CONTROL = PROFILER_L1_BUFFER_ER + PROFILER_L1_BUFFER_SIZE;

static constexpr std::int32_t ERISC_L1_KERNEL_CONFIG_BASE = PROFILER_L1_BUFFER_CONTROL + PROFILER_L1_CONTROL_BUFFER_SIZE;

static_assert((PROFILER_L1_BUFFER_ER % 32) == 0);
static_assert((PROFILER_L1_BUFFER_CONTROL % 32) == 0);
static constexpr std::uint32_t ERISC_MEM_MAILBOX_END = ERISC_MEM_MAILBOX_BASE + 288 + 256 + 16 + (32 + 512) * 4;
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

comments for constants?


static constexpr std::int32_t ERISC_L1_KERNEL_CONFIG_BASE = ERISC_MEM_MAILBOX_END;
static constexpr std::int32_t ERISC_L1_UNRESERVED_BASE = ERISC_L1_KERNEL_CONFIG_BASE + ERISC_L1_KERNEL_CONFIG_SIZE;
static constexpr std::int32_t ERISC_L1_UNRESERVED_SIZE = MAX_L1_LOADING_SIZE - ERISC_L1_UNRESERVED_BASE;

Expand Down
2 changes: 2 additions & 0 deletions tt_metal/hw/inc/blackhole/noc/noc_parameters.h
Original file line number Diff line number Diff line change
Expand Up @@ -43,4 +43,6 @@
#define NOC_DRAM_READ_ALIGNMENT_BYTES 64
#define NOC_DRAM_WRITE_ALIGNMENT_BYTES 16

#define L1_ALIGNMENT (static_cast<uint32_t>(NOC_L1_READ_ALIGNMENT_BYTES >= NOC_L1_WRITE_ALIGNMENT_BYTES ? NOC_L1_READ_ALIGNMENT_BYTES : NOC_L1_WRITE_ALIGNMENT_BYTES))

#endif
69 changes: 51 additions & 18 deletions tt_metal/hw/inc/dev_msgs.h
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
#include "core_config.h"
#include "noc/noc_parameters.h"
#include "dev_mem_map.h"
#include "eth_l1_address_map.h"
#include "hostdevcommon/profiler_common.h"

// TODO: move these to processor specific files
#if defined(COMPILE_FOR_ERISC)
Expand Down Expand Up @@ -195,6 +195,12 @@ struct debug_ring_buf_msg_t {
uint32_t data[DEBUG_RING_BUFFER_ELEMENTS];
};

struct debug_stack_usage_t {
volatile uint16_t max_usage[DebugNumUniqueRiscs];
volatile uint16_t watcher_kernel_id[DebugNumUniqueRiscs];
volatile uint16_t pad[16 - DebugNumUniqueRiscs * 2];
};

constexpr static std::uint32_t DPRINT_BUFFER_SIZE = 204; // per thread
// TODO: when device specific headers specify number of processors
// (and hal abstracts them on host), get these from there
Expand All @@ -204,25 +210,14 @@ constexpr static std::uint32_t DPRINT_BUFFERS_COUNT = 1;
constexpr static std::uint32_t DPRINT_BUFFERS_COUNT = 5;
#endif

// TODO: w/ the hal, this can come from core specific defines
constexpr static std::uint32_t MAX_RISCV_PER_CORE = 5;

struct dprint_buf_msg_t {
uint8_t data[DPRINT_BUFFERS_COUNT][DPRINT_BUFFER_SIZE];
uint32_t pad; // to 1024 bytes
};

struct debug_stack_usage_t {
volatile uint16_t max_usage[DebugNumUniqueRiscs];
volatile uint16_t watcher_kernel_id[DebugNumUniqueRiscs];
volatile uint16_t pad[16 - DebugNumUniqueRiscs * 2];
};

enum watcher_enable_msg_t {
WatcherDisabled = 2,
WatcherEnabled = 3,
};

// TODO: w/ the hal, this can come from core specific defines
constexpr static std::uint32_t MAX_RISCV_PER_CORE = 5;

struct watcher_msg_t {
volatile uint32_t enable;
struct debug_waypoint_msg_t debug_waypoint[MAX_RISCV_PER_CORE];
Expand All @@ -234,13 +229,40 @@ struct watcher_msg_t {
struct debug_ring_buf_msg_t debug_ring_buf;
};

struct dprint_buf_msg_t {
uint8_t data[DPRINT_BUFFERS_COUNT][DPRINT_BUFFER_SIZE];
uint32_t pad; // to 1024 bytes
};


// NOC aligment max from BH
static constexpr uint32_t TT_ARCH_MAX_NOC_WRITE_ALIGNMENT = 16;

// TODO: when device specific headers specify number of processors
// (and hal abstracts them on host), get these from there (same as above for dprint)
#if defined(COMPILE_FOR_ERISC) || defined (COMPILE_FOR_IDLE_ERISC)
static constexpr uint32_t PROFILER_RISC_COUNT = 1;
#else
static constexpr uint32_t PROFILER_RISC_COUNT = 5;
#endif

static constexpr uint32_t LAUNCH_NOC_ALIGMENT_PAD_COUNT = 1;
static constexpr uint32_t PROFILER_NOC_ALIGMENT_PAD_COUNT = 2;

struct profiler_msg_t {
uint32_t control_vector[kernel_profiler::PROFILER_L1_CONTROL_VECTOR_SIZE];
uint32_t buffer[PROFILER_RISC_COUNT][kernel_profiler::PROFILER_L1_VECTOR_SIZE];
};

struct mailboxes_t {
struct ncrisc_halt_msg_t ncrisc_halt;
struct slave_sync_msg_t slave_sync;
uint32_t pad;
uint32_t pads_1[LAUNCH_NOC_ALIGMENT_PAD_COUNT];
struct launch_msg_t launch;
struct watcher_msg_t watcher;
struct dprint_buf_msg_t dprint_buf;
uint32_t pads_2[PROFILER_NOC_ALIGMENT_PAD_COUNT];
struct profiler_msg_t profiler;
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is the padding here to ensure that the profiler struct base address respects NOC alignment?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

That is right.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

the downside of an array of pads is as we add/remove members the array size could be 0 which won't compile

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The size is chosen in the ifdef depending ERISC vs TENSIX. It is dependant on that because of the variable size of DPRINT and PROFILER. I double checked and compile time size of zero is fine, it compiles.

};

// Watcher struct needs to be 32b-divisible, since we need to write it from host using write_hex_vec_to_core().
Expand All @@ -249,15 +271,26 @@ static_assert(sizeof(kernel_config_msg_t) % sizeof(uint32_t) == 0);

#ifndef TENSIX_FIRMWARE
// Validate assumptions on mailbox layout on host compile
static_assert((MEM_MAILBOX_BASE + offsetof(mailboxes_t, launch)) % 32 == 0);
static_assert((eth_l1_mem::address_map::ERISC_MEM_MAILBOX_BASE + offsetof(mailboxes_t, launch)) % 32 == 0);
// Constexpr definitions allow for printing of breaking values at compile time
#ifdef NCRISC_HAS_IRAM
// These are only used in ncrisc-halt.S
static_assert(MEM_MAILBOX_BASE + offsetof(mailboxes_t, slave_sync.ncrisc) == MEM_SLAVE_RUN_MAILBOX_ADDRESS);
static_assert(
MEM_MAILBOX_BASE + offsetof(mailboxes_t, ncrisc_halt.stack_save) == MEM_NCRISC_HALT_STACK_MAILBOX_ADDRESS);
#endif
#if defined(COMPILE_FOR_ERISC) || defined (COMPILE_FOR_IDLE_ERISC)
static_assert( eth_l1_mem::address_map::ERISC_MEM_MAILBOX_BASE + sizeof(mailboxes_t) < eth_l1_mem::address_map::ERISC_MEM_MAILBOX_END);
static constexpr uint32_t ETH_LAUNCH_CHECK = (eth_l1_mem::address_map::ERISC_MEM_MAILBOX_BASE + offsetof(mailboxes_t, launch)) % TT_ARCH_MAX_NOC_WRITE_ALIGNMENT;
static constexpr uint32_t ETH_PROFILER_CHECK = (eth_l1_mem::address_map::ERISC_MEM_MAILBOX_BASE + offsetof(mailboxes_t, profiler)) % TT_ARCH_MAX_NOC_WRITE_ALIGNMENT;
static_assert( ETH_LAUNCH_CHECK == 0);
static_assert( ETH_PROFILER_CHECK == 0);
#else
static_assert(MEM_MAILBOX_BASE + sizeof(mailboxes_t) < MEM_MAILBOX_END);
static constexpr uint32_t TENSIX_LAUNCH_CHECK = (MEM_MAILBOX_BASE + offsetof(mailboxes_t, launch)) % TT_ARCH_MAX_NOC_WRITE_ALIGNMENT;
static constexpr uint32_t TENSIX_PROFILER_CHECK = (MEM_MAILBOX_BASE + offsetof(mailboxes_t, profiler)) % TT_ARCH_MAX_NOC_WRITE_ALIGNMENT;
static_assert( TENSIX_LAUNCH_CHECK == 0);
static_assert( TENSIX_PROFILER_CHECK == 0);
#endif
#endif

struct eth_word_t {
Expand Down
3 changes: 2 additions & 1 deletion tt_metal/hw/inc/grayskull/dev_mem_map.h
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,8 @@
#define MEM_L1_BARRIER 12
#define MEM_MAILBOX_BASE 16
// Magic size must be big enough to hold dev_msgs_t. static_asserts will fire if this is too small
#define MEM_MAILBOX_END (MEM_MAILBOX_BASE + 1356)
#define MEM_MAILBOX_SIZE 5 * 2 * 1024 + 128 + 1364
#define MEM_MAILBOX_END (MEM_MAILBOX_BASE + MEM_MAILBOX_SIZE)
#define MEM_IERISC_MAILBOX_BASE 0
#define MEM_IERISC_MAILBOX_END 0
#define MEM_ZEROS_BASE ((MEM_MAILBOX_END + 31) & ~31)
Expand Down
2 changes: 0 additions & 2 deletions tt_metal/hw/inc/grayskull/eth_l1_address_map.h
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,5 @@ struct address_map {

static constexpr std::int32_t ERISC_L1_UNRESERVED_SIZE = 0;
static constexpr std::int32_t ERISC_L1_TUNNEL_BUFFER_SIZE = 0;
static constexpr std::uint32_t PROFILER_L1_BUFFER_ER = 0;
static constexpr std::uint32_t PROFILER_L1_BUFFER_CONTROL = 0;
};
} // namespace llk
2 changes: 2 additions & 0 deletions tt_metal/hw/inc/grayskull/noc/noc_parameters.h
Original file line number Diff line number Diff line change
Expand Up @@ -34,4 +34,6 @@
#define NOC_DRAM_READ_ALIGNMENT_BYTES 32
#define NOC_DRAM_WRITE_ALIGNMENT_BYTES 16

#define L1_ALIGNMENT (static_cast<uint32_t>(NOC_L1_READ_ALIGNMENT_BYTES >= NOC_L1_WRITE_ALIGNMENT_BYTES ? NOC_L1_READ_ALIGNMENT_BYTES : NOC_L1_WRITE_ALIGNMENT_BYTES))

#endif
3 changes: 2 additions & 1 deletion tt_metal/hw/inc/wormhole/dev_mem_map.h
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,8 @@
#define MEM_L1_BARRIER 12
#define MEM_MAILBOX_BASE 16
// Magic size must be big enough to hold dev_msgs_t. static_asserts will fire if this is too small
#define MEM_MAILBOX_END (MEM_MAILBOX_BASE + 1356)
#define MEM_MAILBOX_SIZE 5 * 4 * 512 + 4 * 32 + 1364
#define MEM_MAILBOX_END (MEM_MAILBOX_BASE + MEM_MAILBOX_SIZE)
#define MEM_IERISC_MAILBOX_BASE 1024
#define MEM_IERISC_MAILBOX_END (MEM_IERISC_MAILBOX_BASE + 128)
#define MEM_ZEROS_BASE ((MEM_MAILBOX_END + 31) & ~31)
Expand Down
11 changes: 3 additions & 8 deletions tt_metal/hw/inc/wormhole/eth_l1_address_map.h
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@

#include <cstdint>

#include "tt_metal/hostdevcommon/common_runtime_address_map.h"
#include "noc/noc_parameters.h"

namespace eth_l1_mem {

Expand Down Expand Up @@ -60,14 +60,9 @@ struct address_map {
// erisc early exit functionality re-uses mailboxes_t::ncrisc_halt_msg_t::stack_save memory
static constexpr std::int32_t ERISC_MEM_MAILBOX_STACK_SAVE = ERISC_MEM_MAILBOX_BASE + 4;

static constexpr std::uint32_t PROFILER_L1_BUFFER_ER = ERISC_MEM_MAILBOX_BASE + 288 + 256 + 16;
static constexpr std::uint32_t PROFILER_L1_BUFFER_CONTROL = PROFILER_L1_BUFFER_ER + PROFILER_L1_BUFFER_SIZE;

static constexpr std::int32_t ERISC_L1_KERNEL_CONFIG_BASE = PROFILER_L1_BUFFER_CONTROL + PROFILER_L1_CONTROL_BUFFER_SIZE;

static_assert((PROFILER_L1_BUFFER_ER % 32) == 0);
static_assert((PROFILER_L1_BUFFER_CONTROL % 32) == 0);
static constexpr std::uint32_t ERISC_MEM_MAILBOX_END = ERISC_MEM_MAILBOX_BASE + 288 + 256 + 16 + (512 + 32) * 4;

static constexpr std::int32_t ERISC_L1_KERNEL_CONFIG_BASE = ERISC_MEM_MAILBOX_END;
static constexpr std::int32_t ERISC_L1_UNRESERVED_BASE = ERISC_L1_KERNEL_CONFIG_BASE + ERISC_L1_KERNEL_CONFIG_SIZE;
static constexpr std::int32_t ERISC_L1_UNRESERVED_SIZE = MAX_L1_LOADING_SIZE - ERISC_L1_UNRESERVED_BASE;

Expand Down
2 changes: 2 additions & 0 deletions tt_metal/hw/inc/wormhole/noc/noc_parameters.h
Original file line number Diff line number Diff line change
Expand Up @@ -46,4 +46,6 @@
#define NOC_DRAM_READ_ALIGNMENT_BYTES 32
#define NOC_DRAM_WRITE_ALIGNMENT_BYTES 16

#define L1_ALIGNMENT (static_cast<uint32_t>(NOC_L1_READ_ALIGNMENT_BYTES >= NOC_L1_WRITE_ALIGNMENT_BYTES ? NOC_L1_READ_ALIGNMENT_BYTES : NOC_L1_WRITE_ALIGNMENT_BYTES))

#endif
3 changes: 2 additions & 1 deletion tt_metal/impl/allocator/allocator_types.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,8 @@ enum class MemoryAllocator {
L1_BANKING = 1,
};

constexpr static std::uint32_t STORAGE_ONLY_RESERVED_SIZE = ((MEM_MAILBOX_END + ALLOCATOR_ALIGNMENT - 1) / ALLOCATOR_ALIGNMENT) * ALLOCATOR_ALIGNMENT;
constexpr static std::uint32_t STORAGE_ONLY_RESERVED_SIZE = ((MEM_MAILBOX_BASE + ALLOCATOR_ALIGNMENT - 1) / ALLOCATOR_ALIGNMENT) * ALLOCATOR_ALIGNMENT;

// Storage only cores only need to reserve mailbox space to hold barriers
constexpr static std::uint32_t STORAGE_ONLY_UNRESERVED_BASE = STORAGE_ONLY_RESERVED_SIZE;

Expand Down
7 changes: 6 additions & 1 deletion tt_metal/impl/dispatch/kernels/cq_common.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -261,10 +261,15 @@ FORCE_INLINE
void cb_wait_all_pages(uint32_t n) {
volatile tt_l1_ptr uint32_t* sem_addr =
reinterpret_cast<volatile tt_l1_ptr uint32_t*>(get_semaphore<fd_core_type>(sem_id));

// Downstream component sets the MSB as a terminate bit
// Mask that off to avoid a race between the sem count and terminate
n &= 0x7fffffff;

WAYPOINT("TAPW");
do {
invalidate_l1_cache();
} while ((*sem_addr) != n);
} while ((*sem_addr & 0x7fffffff) != n); // mask off terminate bit
WAYPOINT("TAPD");
}

Expand Down
2 changes: 2 additions & 0 deletions tt_metal/llrt/blackhole/bh_hal_active_eth_mem_map.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@ HalCoreInfoType create_active_eth_mem_map() {
mem_map_bases[hv(HalMemAddrType::LAUNCH)] = GET_ETH_MAILBOX_ADDRESS_HOST(launch);
mem_map_bases[hv(HalMemAddrType::WATCHER)] = GET_ETH_MAILBOX_ADDRESS_HOST(watcher);
mem_map_bases[hv(HalMemAddrType::DPRINT)] = GET_ETH_MAILBOX_ADDRESS_HOST(dprint_buf);
mem_map_bases[hv(HalMemAddrType::PROFILER)] = GET_ETH_MAILBOX_ADDRESS_HOST(profiler);
mem_map_bases[hv(HalMemAddrType::KERNEL_CONFIG)] = eth_l1_mem::address_map::ERISC_L1_KERNEL_CONFIG_BASE;
mem_map_bases[hv(HalMemAddrType::UNRESERVED)] = eth_l1_mem::address_map::ERISC_L1_UNRESERVED_BASE;

Expand All @@ -44,6 +45,7 @@ HalCoreInfoType create_active_eth_mem_map() {
mem_map_sizes[hv(HalMemAddrType::LAUNCH)] = sizeof(launch_msg_t);
mem_map_sizes[hv(HalMemAddrType::WATCHER)] = sizeof(watcher_msg_t);
mem_map_sizes[hv(HalMemAddrType::DPRINT)] = sizeof(dprint_buf_msg_t);
mem_map_sizes[hv(HalMemAddrType::PROFILER)] = sizeof(profiler_msg_t);
mem_map_sizes[hv(HalMemAddrType::KERNEL_CONFIG)] = eth_l1_mem::address_map::ERISC_L1_KERNEL_CONFIG_SIZE;
mem_map_sizes[hv(HalMemAddrType::UNRESERVED)] = eth_l1_mem::address_map::MAX_SIZE - eth_l1_mem::address_map::ERISC_L1_UNRESERVED_BASE;

Expand Down
Loading
Loading