Skip to content

Commit

Permalink
#6748: Remove profiler flat id lookup table
Browse files Browse the repository at this point in the history
  • Loading branch information
mo-tenstorrent committed Oct 3, 2024
1 parent 844ca1c commit 03e6cff
Show file tree
Hide file tree
Showing 7 changed files with 9 additions and 43 deletions.
1 change: 0 additions & 1 deletion tt_metal/hw/firmware/src/brisc.cc
Original file line number Diff line number Diff line change
Expand Up @@ -76,7 +76,6 @@ namespace kernel_profiler {
uint32_t stackSize __attribute__((used));
uint32_t sums[SUM_COUNT] __attribute__((used));
uint32_t sumIDs[SUM_COUNT] __attribute__((used));
uint16_t core_flat_id __attribute__((used));
}
#endif

Expand Down
1 change: 0 additions & 1 deletion tt_metal/hw/firmware/src/erisc.cc
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,6 @@ namespace kernel_profiler {
uint32_t stackSize __attribute__((used));
uint32_t sums[SUM_COUNT] __attribute__((used));
uint32_t sumIDs[SUM_COUNT] __attribute__((used));
uint16_t core_flat_id __attribute__((used));
}
#endif

Expand Down
1 change: 0 additions & 1 deletion tt_metal/hw/firmware/src/idle_erisc.cc
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,6 @@ namespace kernel_profiler {
uint32_t stackSize __attribute__((used));
uint32_t sums[SUM_COUNT] __attribute__((used));
uint32_t sumIDs[SUM_COUNT] __attribute__((used));
uint16_t core_flat_id __attribute__((used));
}
#endif

Expand Down
1 change: 0 additions & 1 deletion tt_metal/hw/firmware/src/ncrisc.cc
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,6 @@ namespace kernel_profiler {
uint32_t stackSize __attribute__((used));
uint32_t sums[SUM_COUNT] __attribute__((used));
uint32_t sumIDs[SUM_COUNT] __attribute__((used));
uint16_t core_flat_id __attribute__((used));
}
#endif

Expand Down
27 changes: 0 additions & 27 deletions tt_metal/jit_build/genfiles.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -581,7 +581,6 @@ std::string generate_bank_to_noc_coord_descriptor_string(
ss << "#if defined(PROFILE_KERNEL) && (defined(COMPILE_FOR_BRISC) || defined(COMPILE_FOR_NCRISC) || "
"defined(COMPILE_FOR_ERISC))"
<< endl;
ss << "extern uint8_t noc_xy_to_profiler_flat_id[noc_size_x][noc_size_y];" << endl;
ss << "extern uint16_t profiler_core_count_per_dram;" << endl;
ss << "#endif" << endl;
#endif
Expand Down Expand Up @@ -613,38 +612,12 @@ std::string generate_bank_to_noc_coord_descriptor_string(
ss << endl;

#if defined(TRACY_ENABLE)
/*
* This part is adding the 2D array for sharing the flat IDs soc descriptor has assigned to every NOC coordinate,
* and the ceiled number of cores per DRAM banks.
*
* The logic of flat ID assignment can be optimized to lower NOC traffic. With this design the heuristic can be
* implemented in host and device just does look up to the table.
*
* For DRAM banks in particular, integer division of flat_id/core_count_per_dram gives the dram bank id and the
* modulo is the offset.
* */
ss << "#if defined(PROFILE_KERNEL) && (defined(COMPILE_FOR_BRISC) || defined(COMPILE_FOR_NCRISC) || "
"defined(COMPILE_FOR_ERISC))"
<< endl;
ss << "uint16_t profiler_core_count_per_dram __attribute__((used)) = ";
ss << core_count_per_dram << ";" << endl;
ss << endl;

ss << "uint8_t noc_xy_to_profiler_flat_id[noc_size_x][noc_size_y] __attribute__((used)) = {" << endl;
for (unsigned int x = 0; x < grid_size.x; x++) {
ss << " {" << endl;
for (unsigned int y = 0; y < grid_size.y; y++) {
CoreCoord core = {x, y};
if (profiler_flat_id_map.find(core) == profiler_flat_id_map.end()) {
ss << " " << 255 << "," << endl;
} else {
ss << " " << profiler_flat_id_map.at(core) << "," << endl;
}
}
ss << " }," << endl;
}
ss << "};" << endl;
ss << endl;
ss << "#endif" << endl;

#endif
Expand Down
20 changes: 8 additions & 12 deletions tt_metal/tools/profiler/kernel_profiler.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -50,13 +50,10 @@ namespace kernel_profiler{

#if defined(COMPILE_FOR_BRISC)
constexpr uint32_t myRiscID = 0;
extern uint16_t core_flat_id;
#elif defined(COMPILE_FOR_ERISC)
constexpr uint32_t myRiscID = 0;
extern uint16_t core_flat_id;
#elif defined(COMPILE_FOR_NCRISC)
constexpr uint32_t myRiscID = 1;
extern uint16_t core_flat_id;
#elif COMPILE_FOR_TRISC == 0
constexpr uint32_t myRiscID = 2;
#elif COMPILE_FOR_TRISC == 1
Expand Down Expand Up @@ -92,21 +89,16 @@ namespace kernel_profiler{

if (runCounter == 0)
{
core_flat_id = noc_xy_to_profiler_flat_id[my_x[0]][my_y[0]];

for (uint32_t riscID = 0; riscID < PROFILER_RISC_COUNT; riscID ++)
{
for (uint32_t i = ID_HH; i < GUARANTEED_MARKER_1_H; i ++)
{
profiler_data_buffer[riscID][i] = 0;
}

profiler_data_buffer[riscID][ID_LH] = ((core_flat_id & 0xFF) << 3) | riscID;
}

profiler_control_buffer[NOC_X] = my_x[0];
profiler_control_buffer[NOC_Y] = my_y[0];
profiler_control_buffer[FLAT_ID] = core_flat_id;
}

for (uint32_t riscID = 0; riscID < PROFILER_RISC_COUNT; riscID ++)
Expand Down Expand Up @@ -199,10 +191,11 @@ namespace kernel_profiler{
PROFILER_FULL_HOST_BUFFER_SIZE_PER_RISC * MAX_RISCV_PER_CORE * profiler_core_count_per_dram;

while (!profiler_control_buffer[DRAM_PROFILER_ADDRESS]);
uint32_t dram_profiler_address = profiler_control_buffer[DRAM_PROFILER_ADDRESS];
uint32_t core_flat_id = profiler_control_buffer[FLAT_ID];

for (uint32_t riscID = 0; riscID < PROFILER_RISC_COUNT; riscID ++)
{
profiler_data_buffer[riscID][ID_LH] = ((core_flat_id & 0xFF) << 3) | riscID;
int hostIndex = riscID;
int deviceIndex = kernel_profiler::DEVICE_BUFFER_END_INDEX_BR_ER + riscID;
if (profiler_control_buffer[deviceIndex])
Expand Down Expand Up @@ -244,7 +237,7 @@ namespace kernel_profiler{

if (do_noc){
const InterleavedAddrGen<true> s = {
.bank_base_address = dram_profiler_address,
.bank_base_address = profiler_control_buffer[DRAM_PROFILER_ADDRESS],
.page_size = pageSize
};

Expand All @@ -271,14 +264,17 @@ namespace kernel_profiler{
SrcLocNameToHash("PROFILER-NOC-QUICK-SEND");
mark_time_at_index_inlined(wIndex, hash);
wIndex += PROFILER_L1_MARKER_UINT32_SIZE;
core_flat_id = noc_xy_to_profiler_flat_id[my_x[0]][my_y[0]];

while (!profiler_control_buffer[DRAM_PROFILER_ADDRESS]);
uint32_t core_flat_id = profiler_control_buffer[FLAT_ID];

profiler_data_buffer[myRiscID][ID_LH] = ((core_flat_id & 0xFF) << 3) | myRiscID;

uint32_t dram_offset =
(core_flat_id % profiler_core_count_per_dram) * MAX_RISCV_PER_CORE * PROFILER_FULL_HOST_BUFFER_SIZE_PER_RISC +
(HOST_BUFFER_END_INDEX_BR_ER + myRiscID) * PROFILER_FULL_HOST_BUFFER_SIZE_PER_RISC +
profiler_control_buffer[HOST_BUFFER_END_INDEX_BR_ER + myRiscID] * sizeof(uint32_t);

while (!profiler_control_buffer[DRAM_PROFILER_ADDRESS]);
const InterleavedAddrGen<true> s = {
.bank_base_address = profiler_control_buffer[DRAM_PROFILER_ADDRESS],
.page_size = PROFILER_FULL_HOST_BUFFER_SIZE_PER_RISC * MAX_RISCV_PER_CORE * profiler_core_count_per_dram
Expand Down
1 change: 1 addition & 0 deletions tt_metal/tools/profiler/tt_metal_profiler.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,7 @@ void setControlBuffer(uint32_t device_id, std::vector<uint32_t>& control_buffer)
profiler_msg = hal.get_dev_addr<profiler_msg_t *>(HalProgrammableCoreType::ACTIVE_ETH, HalMemAddrType::PROFILER);
}

control_buffer[kernel_profiler::FLAT_ID] = core.second;
tt::llrt::write_hex_vec_to_core(
device_id,
core.first,
Expand Down

0 comments on commit 03e6cff

Please sign in to comment.