-
Notifications
You must be signed in to change notification settings - Fork 90
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Pkeller/memmap profiler #12067
Pkeller/memmap profiler #12067
Changes from all commits
9f37e58
c4f666e
8e1aed7
5000574
648cd4e
ff6513d
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -24,18 +24,16 @@ namespace kernel_profiler{ | |
|
||
enum ControlBuffer | ||
{ | ||
HOST_BUFFER_END_INDEX_BR, | ||
HOST_BUFFER_END_INDEX_BR_ER, | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. is this because eth cores treated as if there is one brisc? this may not be scalable for BH which has 2 riscvs on eth cores There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Good call, this will turn into a union and each device can define its own struct for controls. |
||
HOST_BUFFER_END_INDEX_NC, | ||
HOST_BUFFER_END_INDEX_T0, | ||
HOST_BUFFER_END_INDEX_T1, | ||
HOST_BUFFER_END_INDEX_T2, | ||
HOST_BUFFER_END_INDEX_ER, | ||
DEVICE_BUFFER_END_INDEX_BR, | ||
DEVICE_BUFFER_END_INDEX_BR_ER, | ||
DEVICE_BUFFER_END_INDEX_NC, | ||
DEVICE_BUFFER_END_INDEX_T0, | ||
DEVICE_BUFFER_END_INDEX_T1, | ||
DEVICE_BUFFER_END_INDEX_T2, | ||
DEVICE_BUFFER_END_INDEX_ER, | ||
FW_RESET_H, | ||
FW_RESET_L, | ||
DRAM_PROFILER_ADDRESS, | ||
|
@@ -47,6 +45,15 @@ namespace kernel_profiler{ | |
PROFILER_DONE, | ||
}; | ||
|
||
|
||
// TODO: use data types in profile_msg_t rather than addresses/sizes | ||
constexpr static std::uint32_t PROFILER_L1_CONTROL_VECTOR_SIZE = 32; | ||
constexpr static std::uint32_t PROFILER_L1_CONTROL_BUFFER_SIZE = PROFILER_L1_CONTROL_VECTOR_SIZE * sizeof(uint32_t); | ||
constexpr static std::uint32_t PROFILER_L1_MARKER_UINT32_SIZE = 2; | ||
constexpr static std::uint32_t PROFILER_L1_PROGRAM_ID_COUNT = 2; | ||
constexpr static std::uint32_t PROFILER_L1_GUARANTEED_MARKER_COUNT = 4; | ||
constexpr static std::uint32_t PROFILER_L1_OPTIONAL_MARKER_COUNT = 250; | ||
constexpr static std::uint32_t PROFILER_L1_OP_MIN_OPTIONAL_MARKER_COUNT = 2; | ||
constexpr static std::uint32_t PROFILER_L1_VECTOR_SIZE = (PROFILER_L1_OPTIONAL_MARKER_COUNT + PROFILER_L1_GUARANTEED_MARKER_COUNT + PROFILER_L1_PROGRAM_ID_COUNT) * PROFILER_L1_MARKER_UINT32_SIZE; | ||
constexpr static std::uint32_t PROFILER_L1_BUFFER_SIZE = PROFILER_L1_VECTOR_SIZE * sizeof(uint32_t); | ||
|
||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -53,7 +53,8 @@ | |
#define MEM_L1_BARRIER 12 | ||
#define MEM_MAILBOX_BASE 16 | ||
// Magic size must be big enough to hold dev_msgs_t. static_asserts will fire if this is too small | ||
#define MEM_MAILBOX_END (MEM_MAILBOX_BASE + 1356) | ||
#define MEM_MAILBOX_SIZE 5 * 4 * 512 + 4 * 32 + 1364 | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. comments for constants? |
||
#define MEM_MAILBOX_END (MEM_MAILBOX_BASE + MEM_MAILBOX_SIZE) | ||
#define MEM_IERISC_MAILBOX_BASE 1024 | ||
#define MEM_IERISC_MAILBOX_END (MEM_IERISC_MAILBOX_BASE + 128) | ||
#define MEM_ZEROS_BASE ((MEM_MAILBOX_END + 31) & ~31) | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -60,16 +60,9 @@ struct address_map { | |
// erisc early exit functionality re-uses mailboxes_t::ncrisc_halt_msg_t::stack_save memory | ||
static constexpr std::int32_t ERISC_MEM_MAILBOX_STACK_SAVE = ERISC_MEM_MAILBOX_BASE + 4; | ||
|
||
// Kernel config buffer is WIP | ||
// Size is presently based on the old sizes of the RTAs + CB config + Sems | ||
static constexpr std::uint32_t PROFILER_L1_BUFFER_ER = ERISC_MEM_MAILBOX_BASE + 288 + 256 + 16; | ||
static constexpr std::uint32_t PROFILER_L1_BUFFER_CONTROL = PROFILER_L1_BUFFER_ER + PROFILER_L1_BUFFER_SIZE; | ||
|
||
static constexpr std::int32_t ERISC_L1_KERNEL_CONFIG_BASE = PROFILER_L1_BUFFER_CONTROL + PROFILER_L1_CONTROL_BUFFER_SIZE; | ||
|
||
static_assert((PROFILER_L1_BUFFER_ER % 32) == 0); | ||
static_assert((PROFILER_L1_BUFFER_CONTROL % 32) == 0); | ||
static constexpr std::uint32_t ERISC_MEM_MAILBOX_END = ERISC_MEM_MAILBOX_BASE + 288 + 256 + 16 + (32 + 512) * 4; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. comments for constants? |
||
|
||
static constexpr std::int32_t ERISC_L1_KERNEL_CONFIG_BASE = ERISC_MEM_MAILBOX_END; | ||
static constexpr std::int32_t ERISC_L1_UNRESERVED_BASE = ERISC_L1_KERNEL_CONFIG_BASE + ERISC_L1_KERNEL_CONFIG_SIZE; | ||
static constexpr std::int32_t ERISC_L1_UNRESERVED_SIZE = MAX_L1_LOADING_SIZE - ERISC_L1_UNRESERVED_BASE; | ||
|
||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -13,7 +13,7 @@ | |
#include "core_config.h" | ||
#include "noc/noc_parameters.h" | ||
#include "dev_mem_map.h" | ||
#include "eth_l1_address_map.h" | ||
#include "hostdevcommon/profiler_common.h" | ||
|
||
// TODO: move these to processor specific files | ||
#if defined(COMPILE_FOR_ERISC) | ||
|
@@ -195,6 +195,12 @@ struct debug_ring_buf_msg_t { | |
uint32_t data[DEBUG_RING_BUFFER_ELEMENTS]; | ||
}; | ||
|
||
struct debug_stack_usage_t { | ||
volatile uint16_t max_usage[DebugNumUniqueRiscs]; | ||
volatile uint16_t watcher_kernel_id[DebugNumUniqueRiscs]; | ||
volatile uint16_t pad[16 - DebugNumUniqueRiscs * 2]; | ||
}; | ||
|
||
constexpr static std::uint32_t DPRINT_BUFFER_SIZE = 204; // per thread | ||
// TODO: when device specific headers specify number of processors | ||
// (and hal abstracts them on host), get these from there | ||
|
@@ -204,25 +210,14 @@ constexpr static std::uint32_t DPRINT_BUFFERS_COUNT = 1; | |
constexpr static std::uint32_t DPRINT_BUFFERS_COUNT = 5; | ||
#endif | ||
|
||
// TODO: w/ the hal, this can come from core specific defines | ||
constexpr static std::uint32_t MAX_RISCV_PER_CORE = 5; | ||
|
||
struct dprint_buf_msg_t { | ||
uint8_t data[DPRINT_BUFFERS_COUNT][DPRINT_BUFFER_SIZE]; | ||
uint32_t pad; // to 1024 bytes | ||
}; | ||
|
||
struct debug_stack_usage_t { | ||
volatile uint16_t max_usage[DebugNumUniqueRiscs]; | ||
volatile uint16_t watcher_kernel_id[DebugNumUniqueRiscs]; | ||
volatile uint16_t pad[16 - DebugNumUniqueRiscs * 2]; | ||
}; | ||
|
||
enum watcher_enable_msg_t { | ||
WatcherDisabled = 2, | ||
WatcherEnabled = 3, | ||
}; | ||
|
||
// TODO: w/ the hal, this can come from core specific defines | ||
constexpr static std::uint32_t MAX_RISCV_PER_CORE = 5; | ||
|
||
struct watcher_msg_t { | ||
volatile uint32_t enable; | ||
struct debug_waypoint_msg_t debug_waypoint[MAX_RISCV_PER_CORE]; | ||
|
@@ -234,13 +229,40 @@ struct watcher_msg_t { | |
struct debug_ring_buf_msg_t debug_ring_buf; | ||
}; | ||
|
||
struct dprint_buf_msg_t { | ||
uint8_t data[DPRINT_BUFFERS_COUNT][DPRINT_BUFFER_SIZE]; | ||
uint32_t pad; // to 1024 bytes | ||
}; | ||
|
||
|
||
// NOC aligment max from BH | ||
static constexpr uint32_t TT_ARCH_MAX_NOC_WRITE_ALIGNMENT = 16; | ||
|
||
// TODO: when device specific headers specify number of processors | ||
// (and hal abstracts them on host), get these from there (same as above for dprint) | ||
#if defined(COMPILE_FOR_ERISC) || defined (COMPILE_FOR_IDLE_ERISC) | ||
static constexpr uint32_t PROFILER_RISC_COUNT = 1; | ||
#else | ||
static constexpr uint32_t PROFILER_RISC_COUNT = 5; | ||
#endif | ||
|
||
static constexpr uint32_t LAUNCH_NOC_ALIGMENT_PAD_COUNT = 1; | ||
static constexpr uint32_t PROFILER_NOC_ALIGMENT_PAD_COUNT = 2; | ||
|
||
struct profiler_msg_t { | ||
uint32_t control_vector[kernel_profiler::PROFILER_L1_CONTROL_VECTOR_SIZE]; | ||
uint32_t buffer[PROFILER_RISC_COUNT][kernel_profiler::PROFILER_L1_VECTOR_SIZE]; | ||
}; | ||
|
||
struct mailboxes_t { | ||
struct ncrisc_halt_msg_t ncrisc_halt; | ||
struct slave_sync_msg_t slave_sync; | ||
uint32_t pad; | ||
uint32_t pads_1[LAUNCH_NOC_ALIGMENT_PAD_COUNT]; | ||
struct launch_msg_t launch; | ||
struct watcher_msg_t watcher; | ||
struct dprint_buf_msg_t dprint_buf; | ||
uint32_t pads_2[PROFILER_NOC_ALIGMENT_PAD_COUNT]; | ||
struct profiler_msg_t profiler; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Is the padding here to ensure that the profiler struct base address respects NOC alignment? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. That is right. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. the downside of an array of pads is as we add/remove members the array size could be 0 which won't compile There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The size is chosen in the ifdef depending ERISC vs TENSIX. It is dependant on that because of the variable size of DPRINT and PROFILER. I double checked and compile time size of zero is fine, it compiles. |
||
}; | ||
|
||
// Watcher struct needs to be 32b-divisible, since we need to write it from host using write_hex_vec_to_core(). | ||
|
@@ -249,15 +271,26 @@ static_assert(sizeof(kernel_config_msg_t) % sizeof(uint32_t) == 0); | |
|
||
#ifndef TENSIX_FIRMWARE | ||
// Validate assumptions on mailbox layout on host compile | ||
static_assert((MEM_MAILBOX_BASE + offsetof(mailboxes_t, launch)) % 32 == 0); | ||
static_assert((eth_l1_mem::address_map::ERISC_MEM_MAILBOX_BASE + offsetof(mailboxes_t, launch)) % 32 == 0); | ||
// Constexpr definitions allow for printing of breaking values at compile time | ||
#ifdef NCRISC_HAS_IRAM | ||
// These are only used in ncrisc-halt.S | ||
static_assert(MEM_MAILBOX_BASE + offsetof(mailboxes_t, slave_sync.ncrisc) == MEM_SLAVE_RUN_MAILBOX_ADDRESS); | ||
static_assert( | ||
MEM_MAILBOX_BASE + offsetof(mailboxes_t, ncrisc_halt.stack_save) == MEM_NCRISC_HALT_STACK_MAILBOX_ADDRESS); | ||
#endif | ||
#if defined(COMPILE_FOR_ERISC) || defined (COMPILE_FOR_IDLE_ERISC) | ||
static_assert( eth_l1_mem::address_map::ERISC_MEM_MAILBOX_BASE + sizeof(mailboxes_t) < eth_l1_mem::address_map::ERISC_MEM_MAILBOX_END); | ||
static constexpr uint32_t ETH_LAUNCH_CHECK = (eth_l1_mem::address_map::ERISC_MEM_MAILBOX_BASE + offsetof(mailboxes_t, launch)) % TT_ARCH_MAX_NOC_WRITE_ALIGNMENT; | ||
static constexpr uint32_t ETH_PROFILER_CHECK = (eth_l1_mem::address_map::ERISC_MEM_MAILBOX_BASE + offsetof(mailboxes_t, profiler)) % TT_ARCH_MAX_NOC_WRITE_ALIGNMENT; | ||
static_assert( ETH_LAUNCH_CHECK == 0); | ||
static_assert( ETH_PROFILER_CHECK == 0); | ||
#else | ||
static_assert(MEM_MAILBOX_BASE + sizeof(mailboxes_t) < MEM_MAILBOX_END); | ||
static constexpr uint32_t TENSIX_LAUNCH_CHECK = (MEM_MAILBOX_BASE + offsetof(mailboxes_t, launch)) % TT_ARCH_MAX_NOC_WRITE_ALIGNMENT; | ||
static constexpr uint32_t TENSIX_PROFILER_CHECK = (MEM_MAILBOX_BASE + offsetof(mailboxes_t, profiler)) % TT_ARCH_MAX_NOC_WRITE_ALIGNMENT; | ||
static_assert( TENSIX_LAUNCH_CHECK == 0); | ||
static_assert( TENSIX_PROFILER_CHECK == 0); | ||
#endif | ||
#endif | ||
|
||
struct eth_word_t { | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
does
PROFILER_FULL_HOST_VECTOR_SIZE_PER_RISC
need to be in here? is it only used by profiling code? likewise forPROFILER_FULL_HOST_BUFFER_SIZE_PER_RISC