From 38278f9d72f19eb07c29b4e3eb8b6a5e777c83b1 Mon Sep 17 00:00:00 2001 From: Mo Date: Thu, 29 Aug 2024 14:57:50 +0000 Subject: [PATCH] #4984: Fix addressing issues on eth cores --- tt_metal/hw/inc/blackhole/eth_l1_address_map.h | 2 +- tt_metal/hw/inc/wormhole/eth_l1_address_map.h | 2 +- tt_metal/tools/profiler/kernel_profiler.hpp | 10 +++++----- tt_metal/tools/profiler/profiler.cpp | 17 +++++++++-------- 4 files changed, 16 insertions(+), 15 deletions(-) diff --git a/tt_metal/hw/inc/blackhole/eth_l1_address_map.h b/tt_metal/hw/inc/blackhole/eth_l1_address_map.h index f31cf304888..f2d7ac382d6 100644 --- a/tt_metal/hw/inc/blackhole/eth_l1_address_map.h +++ b/tt_metal/hw/inc/blackhole/eth_l1_address_map.h @@ -60,7 +60,7 @@ struct address_map { // erisc early exit functionality re-uses mailboxes_t::ncrisc_halt_msg_t::stack_save memory static constexpr std::int32_t ERISC_MEM_MAILBOX_STACK_SAVE = ERISC_MEM_MAILBOX_BASE + 4; - static constexpr std::uint32_t ERISC_MEM_MAILBOX_END = ERISC_MEM_MAILBOX_BASE + 288 + 256 + 16 + 32 * 4 + 512; + static constexpr std::uint32_t ERISC_MEM_MAILBOX_END = ERISC_MEM_MAILBOX_BASE + 288 + 256 + 16 + (32 + 512) * 4; static constexpr std::int32_t ERISC_L1_KERNEL_CONFIG_BASE = ERISC_MEM_MAILBOX_END; static constexpr std::int32_t ERISC_L1_UNRESERVED_BASE = ERISC_L1_KERNEL_CONFIG_BASE + ERISC_L1_KERNEL_CONFIG_SIZE; diff --git a/tt_metal/hw/inc/wormhole/eth_l1_address_map.h b/tt_metal/hw/inc/wormhole/eth_l1_address_map.h index 5f14468a979..cbd5b6f6f53 100644 --- a/tt_metal/hw/inc/wormhole/eth_l1_address_map.h +++ b/tt_metal/hw/inc/wormhole/eth_l1_address_map.h @@ -60,7 +60,7 @@ struct address_map { // erisc early exit functionality re-uses mailboxes_t::ncrisc_halt_msg_t::stack_save memory static constexpr std::int32_t ERISC_MEM_MAILBOX_STACK_SAVE = ERISC_MEM_MAILBOX_BASE + 4; - static constexpr std::uint32_t ERISC_MEM_MAILBOX_END = ERISC_MEM_MAILBOX_BASE + 288 + 256 + 16 + 512 + 32 * 4; + static constexpr std::uint32_t ERISC_MEM_MAILBOX_END = ERISC_MEM_MAILBOX_BASE + 288 + 256 + 16 + (512 + 32) * 4; static constexpr std::int32_t ERISC_L1_KERNEL_CONFIG_BASE = ERISC_MEM_MAILBOX_END; static constexpr std::int32_t ERISC_L1_UNRESERVED_BASE = ERISC_L1_KERNEL_CONFIG_BASE + ERISC_L1_KERNEL_CONFIG_SIZE; diff --git a/tt_metal/tools/profiler/kernel_profiler.hpp b/tt_metal/tools/profiler/kernel_profiler.hpp index de064644652..108ba6ab8f2 100644 --- a/tt_metal/tools/profiler/kernel_profiler.hpp +++ b/tt_metal/tools/profiler/kernel_profiler.hpp @@ -192,7 +192,7 @@ namespace kernel_profiler{ return; } uint32_t pageSize = - PROFILER_FULL_HOST_BUFFER_SIZE_PER_RISC * PROFILER_RISC_COUNT * profiler_core_count_per_dram; + PROFILER_FULL_HOST_BUFFER_SIZE_PER_RISC * MAX_RISCV_PER_CORE * profiler_core_count_per_dram; while (!profiler_control_buffer[DRAM_PROFILER_ADDRESS]); uint32_t dram_profiler_address = profiler_control_buffer[DRAM_PROFILER_ADDRESS]; @@ -213,7 +213,7 @@ namespace kernel_profiler{ if (currEndIndex <= PROFILER_FULL_HOST_VECTOR_SIZE_PER_RISC) { dram_offset = - (core_flat_id % profiler_core_count_per_dram) * PROFILER_RISC_COUNT * PROFILER_FULL_HOST_BUFFER_SIZE_PER_RISC + + (core_flat_id % profiler_core_count_per_dram) * MAX_RISCV_PER_CORE * PROFILER_FULL_HOST_BUFFER_SIZE_PER_RISC + hostIndex * PROFILER_FULL_HOST_BUFFER_SIZE_PER_RISC + profiler_control_buffer[hostIndex] * sizeof(uint32_t); @@ -226,7 +226,7 @@ namespace kernel_profiler{ { dram_offset = (core_flat_id % profiler_core_count_per_dram) * - PROFILER_RISC_COUNT * PROFILER_FULL_HOST_BUFFER_SIZE_PER_RISC + + MAX_RISCV_PER_CORE * PROFILER_FULL_HOST_BUFFER_SIZE_PER_RISC + hostIndex * PROFILER_FULL_HOST_BUFFER_SIZE_PER_RISC; send_size = CUSTOM_MARKERS * sizeof(uint32_t); @@ -270,14 +270,14 @@ namespace kernel_profiler{ core_flat_id = noc_xy_to_profiler_flat_id[my_x[0]][my_y[0]]; uint32_t dram_offset = - (core_flat_id % profiler_core_count_per_dram) * PROFILER_RISC_COUNT * PROFILER_FULL_HOST_BUFFER_SIZE_PER_RISC + + (core_flat_id % profiler_core_count_per_dram) * MAX_RISCV_PER_CORE * PROFILER_FULL_HOST_BUFFER_SIZE_PER_RISC + (HOST_BUFFER_END_INDEX_BR_ER + myRiscID) * PROFILER_FULL_HOST_BUFFER_SIZE_PER_RISC + profiler_control_buffer[HOST_BUFFER_END_INDEX_BR_ER + myRiscID] * sizeof(uint32_t); while (!profiler_control_buffer[DRAM_PROFILER_ADDRESS]); const InterleavedAddrGen s = { .bank_base_address = profiler_control_buffer[DRAM_PROFILER_ADDRESS], - .page_size = PROFILER_FULL_HOST_BUFFER_SIZE_PER_RISC * PROFILER_RISC_COUNT * profiler_core_count_per_dram + .page_size = PROFILER_FULL_HOST_BUFFER_SIZE_PER_RISC * MAX_RISCV_PER_CORE * profiler_core_count_per_dram }; uint64_t dram_bank_dst_noc_addr = s.get_noc_addr(core_flat_id / profiler_core_count_per_dram, dram_offset); diff --git a/tt_metal/tools/profiler/profiler.cpp b/tt_metal/tools/profiler/profiler.cpp index f8c99cce7cd..0a6b11dc9cd 100644 --- a/tt_metal/tools/profiler/profiler.cpp +++ b/tt_metal/tools/profiler/profiler.cpp @@ -30,27 +30,28 @@ void DeviceProfiler::readRiscProfilerResults( ZoneScoped; - std::pair deviceCore = {device_id,worker_core}; + HalProgrammableCoreType CoreType; + int riscCount; + profiler_msg_t *profiler_msg; const metal_SocDescriptor& soc_d = tt::Cluster::instance().get_soc_desc(device_id); - uint32_t coreFlatID = soc_d.physical_routing_to_profiler_flat_id.at(worker_core); - uint32_t startIndex = coreFlatID * PROFILER_RISC_COUNT * PROFILER_FULL_HOST_VECTOR_SIZE_PER_RISC; - auto ethCores = soc_d.get_physical_ethernet_cores() ; - - HalProgrammableCoreType CoreType; - profiler_msg_t *profiler_msg; if (std::find(ethCores.begin(), ethCores.end(), worker_core) == ethCores.end()) { profiler_msg = hal.get_dev_addr(HalProgrammableCoreType::TENSIX, HalMemAddrType::PROFILER); CoreType = HalProgrammableCoreType::TENSIX; + riscCount = 5; } else { profiler_msg = hal.get_dev_addr(HalProgrammableCoreType::ACTIVE_ETH, HalMemAddrType::PROFILER); CoreType = HalProgrammableCoreType::ACTIVE_ETH; + riscCount = 1; } + uint32_t coreFlatID = soc_d.physical_routing_to_profiler_flat_id.at(worker_core); + uint32_t startIndex = coreFlatID * MAX_RISCV_PER_CORE * PROFILER_FULL_HOST_VECTOR_SIZE_PER_RISC; + vector control_buffer = tt::llrt::read_hex_vec_from_core( device_id, worker_core, @@ -64,7 +65,7 @@ void DeviceProfiler::readRiscProfilerResults( } int riscNum = 0; - for (int riscEndIndex = 0; riscEndIndex < PROFILER_RISC_COUNT; riscEndIndex ++ ) { + for (int riscEndIndex = 0; riscEndIndex < riscCount; riscEndIndex ++ ) { uint32_t bufferEndIndex = control_buffer[riscEndIndex]; uint32_t riscType; if(CoreType == HalProgrammableCoreType::TENSIX)