From 90034518e51f6a27b5a2e9b727bf81afb8df66a8 Mon Sep 17 00:00:00 2001 From: Almeet Bhullar Date: Wed, 20 Nov 2024 16:21:17 +0000 Subject: [PATCH] Add address and value that will launch fw on a programmable core into the HalJitBuildConfig --- tt_metal/impl/debug/watcher_device_reader.cpp | 5 ++- tt_metal/impl/device/device.cpp | 10 ++--- tt_metal/llrt/blackhole/bh_hal_active_eth.cpp | 2 + tt_metal/llrt/blackhole/bh_hal_idle_eth.cpp | 11 ++++- tt_metal/llrt/blackhole/bh_hal_tensix.cpp | 20 +++++++-- tt_metal/llrt/grayskull/gs_hal.cpp | 17 +++++++- tt_metal/llrt/hal.cpp | 25 +++++++++++ tt_metal/llrt/hal.hpp | 40 ++++++------------ tt_metal/llrt/llrt.cpp | 41 +------------------ tt_metal/llrt/llrt.hpp | 5 --- tt_metal/llrt/wormhole/wh_hal_active_eth.cpp | 2 + tt_metal/llrt/wormhole/wh_hal_idle_eth.cpp | 2 + tt_metal/llrt/wormhole/wh_hal_tensix.cpp | 17 +++++++- 13 files changed, 110 insertions(+), 87 deletions(-) diff --git a/tt_metal/impl/debug/watcher_device_reader.cpp b/tt_metal/impl/debug/watcher_device_reader.cpp index 85ff63f6f12d..7a1c9f0ac240 100644 --- a/tt_metal/impl/debug/watcher_device_reader.cpp +++ b/tt_metal/impl/debug/watcher_device_reader.cpp @@ -407,7 +407,10 @@ void WatcherDeviceReader::DumpL1Status(CoreDescriptor &core, const launch_msg_t // Read L1 address 0, looking for memory corruption std::vector data; data = tt::llrt::read_hex_vec_from_core(device->id(), core.coord, MEM_L1_BASE, sizeof(uint32_t)); - if (data[0] != llrt::generate_risc_startup_addr(false)) { + TT_ASSERT(core.type == CoreType::WORKER); + uint32_t core_type_idx = hal.get_programmable_core_type_index(HalProgrammableCoreType::TENSIX); + auto fw_launch_value = hal.get_jit_build_config(core_type_idx, 0, 0).fw_launch_addr_value; + if (data[0] != fw_launch_value) { LogRunningKernels(core, launch_msg); TT_THROW("Watcher found corruption at L1[0] on core {}: read {}", core.coord.str(), data[0]); } diff --git a/tt_metal/impl/device/device.cpp b/tt_metal/impl/device/device.cpp index 4881b75f9323..387712c8d76a 100644 --- a/tt_metal/impl/device/device.cpp +++ b/tt_metal/impl/device/device.cpp @@ -411,10 +411,10 @@ void Device::initialize_firmware(const HalProgrammableCoreType &core_type, CoreC uint32_t core_type_idx = hal.get_programmable_core_type_index(core_type); uint32_t processor_class_count = hal.get_processor_classes_count(core_type); + auto jit_build_config = hal.get_jit_build_config(core_type_idx, 0, 0); // Only the first risc needs to be programmed switch (core_type) { case HalProgrammableCoreType::TENSIX: { - llrt::program_risc_startup_addr(this->id(), phys_core); for (uint32_t processor_class = 0; processor_class < processor_class_count; processor_class++) { auto [build_idx, num_build_states] = this->build_processor_type_to_index(core_type_idx, processor_class); for (uint32_t riscv_id = build_idx; riscv_id < (build_idx + num_build_states); riscv_id++) { @@ -469,11 +469,6 @@ void Device::initialize_firmware(const HalProgrammableCoreType &core_type, CoreC } } } - if (is_idle_eth) { - llrt::program_risc_startup_addr(this->id(), phys_core); - } else { - llrt::launch_erisc_app_fw_on_core(this->id(), phys_core); - } // Ethernet worker core. Launch messages will be sent by FD infra if it's enabled // Idle ethernet core. Used by FD infra. Host will write launch messages during init. launch_msg->kernel_config.mode = (this->using_slow_dispatch() or is_idle_eth) ? DISPATCH_MODE_HOST : DISPATCH_MODE_DEV; @@ -483,6 +478,9 @@ void Device::initialize_firmware(const HalProgrammableCoreType &core_type, CoreC TT_THROW("Unsupported programable core type {} to initialize build states", magic_enum::enum_name(core_type)); } + tt::Cluster::instance().write_core( + &jit_build_config.fw_launch_addr_value, sizeof(uint32_t), tt_cxy_pair(this->id_, phys_core), jit_build_config.fw_launch_addr); + // Initialize each entry in the launch_msg ring buffer with the correct dispatch mode - Cores that don't get a valid // launch_message during program execution need to at least have the correct dispatch mode. // When using Fast Dispatch on Tensix: diff --git a/tt_metal/llrt/blackhole/bh_hal_active_eth.cpp b/tt_metal/llrt/blackhole/bh_hal_active_eth.cpp index 3d807a680842..18db34df58af 100644 --- a/tt_metal/llrt/blackhole/bh_hal_active_eth.cpp +++ b/tt_metal/llrt/blackhole/bh_hal_active_eth.cpp @@ -64,6 +64,8 @@ HalCoreInfoType create_active_eth_mem_map() { processor_types[0] = HalJitBuildConfig{ .fw_base_addr = eth_l1_mem::address_map::FIRMWARE_BASE, .local_init_addr = eth_l1_mem::address_map::FIRMWARE_BASE, // this will be uplifted in subsequent commits enabling active erisc + .fw_launch_addr = 0xFFB14008, + .fw_launch_addr_value = (uint32_t)eth_l1_mem::address_map::FIRMWARE_BASE, }; processor_classes[processor_class_idx] = processor_types; } diff --git a/tt_metal/llrt/blackhole/bh_hal_idle_eth.cpp b/tt_metal/llrt/blackhole/bh_hal_idle_eth.cpp index 3a892fb1b3c8..d7b0202b418d 100644 --- a/tt_metal/llrt/blackhole/bh_hal_idle_eth.cpp +++ b/tt_metal/llrt/blackhole/bh_hal_idle_eth.cpp @@ -65,16 +65,21 @@ HalCoreInfoType create_idle_eth_mem_map() { std::vector> processor_classes(NumEthDispatchClasses); std::vector processor_types(1); for (std::uint8_t processor_class_idx = 0; processor_class_idx < NumEthDispatchClasses; processor_class_idx++) { - DeviceAddr fw_base, local_init; + DeviceAddr fw_base, local_init, fw_launch; + uint32_t fw_launch_value; switch (processor_class_idx) { case 0: { fw_base = MEM_IERISC_FIRMWARE_BASE; local_init = MEM_IERISC_INIT_LOCAL_L1_BASE_SCRATCH; + fw_launch = 0xFFB14000; + fw_launch_value = fw_base; } break; case 1: { fw_base = MEM_SLAVE_IERISC_FIRMWARE_BASE; local_init = MEM_SLAVE_IERISC_INIT_LOCAL_L1_BASE_SCRATCH; + fw_launch = 0xFFB14008; + fw_launch_value = fw_base; } break; default: @@ -82,7 +87,9 @@ HalCoreInfoType create_idle_eth_mem_map() { } processor_types[0] = HalJitBuildConfig{ .fw_base_addr = fw_base, - .local_init_addr = local_init + .local_init_addr = local_init, + .fw_launch_addr = fw_launch, + .fw_launch_addr_value = fw_launch_value }; processor_classes[processor_class_idx] = processor_types; } diff --git a/tt_metal/llrt/blackhole/bh_hal_tensix.cpp b/tt_metal/llrt/blackhole/bh_hal_tensix.cpp index 2ca4bc88acc6..f9b5dcdeb2cc 100644 --- a/tt_metal/llrt/blackhole/bh_hal_tensix.cpp +++ b/tt_metal/llrt/blackhole/bh_hal_tensix.cpp @@ -10,6 +10,7 @@ #include "dev_mem_map.h" #include "dev_msgs.h" #include "noc/noc_parameters.h" +#include "tensix.h" #include "hal.hpp" #include "hal_asserts.hpp" @@ -31,7 +32,7 @@ HalCoreInfoType create_tensix_mem_map() { std::vector mem_map_bases; mem_map_bases.resize(static_cast(HalL1MemAddrType::COUNT)); - mem_map_bases[static_cast(HalL1MemAddrType::BASE)] = MEM_L1_BASE; + mem_map_bases[static_cast(HalL1MemAddrType::BASE)] = MEM_L1_BASE; mem_map_bases[static_cast(HalL1MemAddrType::BARRIER)] = MEM_L1_BARRIER; mem_map_bases[static_cast(HalL1MemAddrType::MAILBOX)] = MEM_MAILBOX_BASE; mem_map_bases[static_cast(HalL1MemAddrType::LAUNCH)] = GET_MAILBOX_ADDRESS_HOST(launch); @@ -65,16 +66,21 @@ HalCoreInfoType create_tensix_mem_map() { uint32_t num_processors = processor_class_idx == (NumTensixDispatchClasses - 1) ? 3 : 1; processor_types.resize(num_processors); for (uint8_t processor_type_idx = 0; processor_type_idx < processor_types.size(); processor_type_idx++) { - DeviceAddr fw_base, local_init; + DeviceAddr fw_base, local_init, fw_launch; + uint32_t fw_launch_value; switch (processor_class_idx) { case 0: { fw_base = MEM_BRISC_FIRMWARE_BASE; local_init = MEM_BRISC_INIT_LOCAL_L1_BASE_SCRATCH; + fw_launch = 0x0; // BRISC is hardcoded to have reset PC of 0 + fw_launch_value = generate_risc_startup_addr(fw_base); } break; case 1: { fw_base = MEM_NCRISC_FIRMWARE_BASE; local_init = MEM_NCRISC_INIT_LOCAL_L1_BASE_SCRATCH; + fw_launch = RISCV_DEBUG_REG_NCRISC_RESET_PC; + fw_launch_value = fw_base; } break; case 2: { @@ -82,16 +88,22 @@ HalCoreInfoType create_tensix_mem_map() { case 0: { fw_base = MEM_TRISC0_FIRMWARE_BASE; local_init = MEM_TRISC0_INIT_LOCAL_L1_BASE_SCRATCH; + fw_launch = RISCV_DEBUG_REG_TRISC0_RESET_PC; + fw_launch_value = fw_base; } break; case 1: { fw_base = MEM_TRISC1_FIRMWARE_BASE; local_init = MEM_TRISC1_INIT_LOCAL_L1_BASE_SCRATCH; + fw_launch = RISCV_DEBUG_REG_TRISC1_RESET_PC; + fw_launch_value = fw_base; } break; case 2: { fw_base = MEM_TRISC2_FIRMWARE_BASE; local_init = MEM_TRISC2_INIT_LOCAL_L1_BASE_SCRATCH; + fw_launch = RISCV_DEBUG_REG_TRISC2_RESET_PC; + fw_launch_value = fw_base; } break; } @@ -103,7 +115,9 @@ HalCoreInfoType create_tensix_mem_map() { processor_types[processor_type_idx] = HalJitBuildConfig{ .fw_base_addr = fw_base, - .local_init_addr = local_init + .local_init_addr = local_init, + .fw_launch_addr = fw_launch, + .fw_launch_addr_value = fw_launch_value }; } processor_classes[processor_class_idx] = processor_types; diff --git a/tt_metal/llrt/grayskull/gs_hal.cpp b/tt_metal/llrt/grayskull/gs_hal.cpp index 21ea11a40b80..0e9affc800cf 100644 --- a/tt_metal/llrt/grayskull/gs_hal.cpp +++ b/tt_metal/llrt/grayskull/gs_hal.cpp @@ -74,16 +74,21 @@ void Hal::initialize_gs() { uint32_t num_processors = processor_class_idx == (NumTensixDispatchClasses - 1) ? 3 : 1; processor_types.resize(num_processors); for (uint8_t processor_type_idx = 0; processor_type_idx < processor_types.size(); processor_type_idx++) { - DeviceAddr fw_base, local_init; + DeviceAddr fw_base, local_init, fw_launch; + uint32_t fw_launch_value; switch (processor_class_idx) { case 0: { fw_base = MEM_BRISC_FIRMWARE_BASE; local_init = MEM_BRISC_INIT_LOCAL_L1_BASE_SCRATCH; + fw_launch = 0x0; // BRISC is hardcoded to have reset PC of 0 + fw_launch_value = generate_risc_startup_addr(fw_base); } break; case 1: { fw_base = MEM_NCRISC_FIRMWARE_BASE; local_init = MEM_NCRISC_INIT_LOCAL_L1_BASE_SCRATCH; + fw_launch = 0; // fix me + fw_launch_value = fw_base; } break; case 2: { @@ -91,16 +96,22 @@ void Hal::initialize_gs() { case 0: { fw_base = MEM_TRISC0_FIRMWARE_BASE; local_init = MEM_TRISC0_INIT_LOCAL_L1_BASE_SCRATCH; + fw_launch = 0; // fix me + fw_launch_value = fw_base; } break; case 1: { fw_base = MEM_TRISC1_FIRMWARE_BASE; local_init = MEM_TRISC1_INIT_LOCAL_L1_BASE_SCRATCH; + fw_launch = 0; // fix me + fw_launch_value = fw_base; } break; case 2: { fw_base = MEM_TRISC2_FIRMWARE_BASE; local_init = MEM_TRISC2_INIT_LOCAL_L1_BASE_SCRATCH; + fw_launch = 0; // fix me + fw_launch_value = fw_base; } break; } @@ -112,7 +123,9 @@ void Hal::initialize_gs() { processor_types[processor_type_idx] = HalJitBuildConfig{ .fw_base_addr = fw_base, - .local_init_addr = local_init + .local_init_addr = local_init, + .fw_launch_addr = fw_launch, + .fw_launch_addr_value = fw_launch_value }; } processor_classes[processor_class_idx] = processor_types; diff --git a/tt_metal/llrt/hal.cpp b/tt_metal/llrt/hal.cpp index 510772f2dbce..a018859a8a2c 100644 --- a/tt_metal/llrt/hal.cpp +++ b/tt_metal/llrt/hal.cpp @@ -68,5 +68,30 @@ HalCoreInfoType::HalCoreInfoType(HalProgrammableCoreType programmable_core_type, supports_cbs_(supports_cbs) { } +uint32_t generate_risc_startup_addr(uint32_t firmware_base) { + // Options for handling brisc fw not starting at mem[0]: + // 1) Program the register for the start address out of reset - no reset PC register on GS/WH/BH + // 2) Encode a jump in crt0 for mem[0] + // 3) Write the jump to mem[0] here + // This does #3. #1 may be best, #2 gets messy (elf files + // drop any section before .init, crt0 needs ifdefs, etc) + constexpr uint32_t jal_opcode = 0x6f; + constexpr uint32_t jal_max_offset = 0x0007ffff; + uint32_t opcode = jal_opcode; + assert(firmware_base < jal_max_offset); + // See riscv spec for offset encoding below + uint32_t jal_offset_bit_20 = 0; + uint32_t jal_offset_bits_10_to_1 = (firmware_base & 0x7fe) << 20; + uint32_t jal_offset_bit_11 = (firmware_base & 0x800) << 9; + uint32_t jal_offset_bits_19_to_12 = (firmware_base & 0xff000) << 0; + uint32_t jal_offset = + jal_offset_bit_20 | + jal_offset_bits_10_to_1 | + jal_offset_bit_11 | + jal_offset_bits_19_to_12; + + return jal_offset | opcode; +} + } // namespace tt_metal } // namespace tt diff --git a/tt_metal/llrt/hal.hpp b/tt_metal/llrt/hal.hpp index 784790f5ba7c..c18649de1ed2 100644 --- a/tt_metal/llrt/hal.hpp +++ b/tt_metal/llrt/hal.hpp @@ -71,9 +71,14 @@ enum class HalMemType : uint8_t { using DeviceAddr = std::uint64_t; +// Note: nsidwell will be removing need for fw_base_addr and local_init_addr +// fw_launch_addr is programmed with fw_launch_addr_value on the master risc +// of a given progammable core to start FW struct HalJitBuildConfig { DeviceAddr fw_base_addr; DeviceAddr local_init_addr; + DeviceAddr fw_launch_addr; + uint32_t fw_launch_addr_value; }; class Hal; @@ -100,10 +105,7 @@ class HalCoreInfoType { uint32_t get_dev_size(HalL1MemAddrType addr_type) const; uint32_t get_processor_classes_count() const; uint32_t get_processor_types_count(uint32_t processor_class_idx) const; - template - T get_base_firmware_addr(uint32_t processor_class_idx, uint32_t processor_type_idx) const; - template - T get_binary_local_init_addr(uint32_t processor_class_idx, uint32_t processor_type_idx) const; + const HalJitBuildConfig &get_jit_build_config(uint32_t processor_class_idx, uint32_t processor_type_idx) const; }; template @@ -128,18 +130,10 @@ inline uint32_t HalCoreInfoType::get_processor_types_count(uint32_t processor_cl return this->processor_classes_[processor_class_idx].size(); } -template -inline T HalCoreInfoType::get_base_firmware_addr(uint32_t processor_class_idx, uint32_t processor_type_idx) const { - TT_ASSERT(processor_class_idx < this->processor_classes_.size()); - TT_ASSERT(processor_type_idx < this->processor_classes_[processor_class_idx].size()); - return this->processor_classes_[processor_class_idx][processor_type_idx].fw_base_addr; -} - -template -inline T HalCoreInfoType::get_binary_local_init_addr(uint32_t processor_class_idx, uint32_t processor_type_idx) const { +inline const HalJitBuildConfig &HalCoreInfoType::get_jit_build_config(uint32_t processor_class_idx, uint32_t processor_type_idx) const { TT_ASSERT(processor_class_idx < this->processor_classes_.size()); TT_ASSERT(processor_type_idx < this->processor_classes_[processor_class_idx].size()); - return this->processor_classes_[processor_class_idx][processor_type_idx].local_init_addr; + return this->processor_classes_[processor_class_idx][processor_type_idx]; } class Hal { @@ -191,10 +185,7 @@ class Hal { uint32_t get_num_risc_processors() const; - template - T get_base_firmware_addr(uint32_t programmable_core_type_index, uint32_t processor_class_idx, uint32_t processor_type_idx) const; - template - T get_binary_local_init_addr(uint32_t programmable_core_type_index, uint32_t processor_class_idx, uint32_t processor_type_idx) const; + const HalJitBuildConfig &get_jit_build_config(uint32_t programmable_core_type_index, uint32_t processor_class_idx, uint32_t processor_type_idx) const; }; inline uint32_t Hal::get_programmable_core_type_count() const { @@ -284,16 +275,9 @@ inline bool Hal::get_supports_cbs(uint32_t programmable_core_type_index) const { return this->core_info_[programmable_core_type_index].supports_cbs_; } -template -inline T Hal::get_base_firmware_addr(uint32_t programmable_core_type_index, uint32_t processor_class_idx, uint32_t processor_type_idx) const { - TT_ASSERT(programmable_core_type_index < this->core_info_.size()); - return this->core_info_[programmable_core_type_index].get_base_firmware_addr(processor_class_idx, processor_type_idx); -} - -template -inline T Hal::get_binary_local_init_addr(uint32_t programmable_core_type_index, uint32_t processor_class_idx, uint32_t processor_type_idx) const { +inline const HalJitBuildConfig &Hal::get_jit_build_config(uint32_t programmable_core_type_index, uint32_t processor_class_idx, uint32_t processor_type_idx) const { TT_ASSERT(programmable_core_type_index < this->core_info_.size()); - return this->core_info_[programmable_core_type_index].get_binary_local_init_addr(processor_class_idx, processor_type_idx); + return this->core_info_[programmable_core_type_index].get_jit_build_config(processor_class_idx, processor_type_idx); } class HalSingleton : public Hal { @@ -315,6 +299,8 @@ class HalSingleton : public Hal { inline auto& hal = HalSingleton::getInstance(); // inline variable requires C++17 +uint32_t generate_risc_startup_addr(uint32_t firmware_base); // used by Tensix initializers to build HalJitBuildConfig + } // namespace tt_metal } // namespace tt diff --git a/tt_metal/llrt/llrt.cpp b/tt_metal/llrt/llrt.cpp index e11d423a0c60..231618059162 100644 --- a/tt_metal/llrt/llrt.cpp +++ b/tt_metal/llrt/llrt.cpp @@ -67,7 +67,7 @@ ll_api::memory get_risc_binary(string const &path, uint64_t data_start = MEM_LOCAL_BASE; uint64_t text_start = (relo_type == ll_api::memory::Relocate::XIP) ? 0 : - tt::tt_metal::hal.get_base_firmware_addr(core_type_idx, processor_class_idx, processor_type_idx); + tt::tt_metal::hal.get_jit_build_config(core_type_idx, processor_class_idx, processor_type_idx).fw_base_addr; ptr->pack_data_into_text(text_start, data_start); } @@ -121,10 +121,6 @@ void write_launch_msg_to_core(chip_id_t chip, const CoreCoord core, launch_msg_t } } -void launch_erisc_app_fw_on_core(chip_id_t chip, CoreCoord core) { - llrt::write_hex_vec_to_core(chip, core, {0x1}, eth_l1_mem::address_map::LAUNCH_ERISC_APP_FLAG); -} - void print_worker_cores(chip_id_t chip_id) { std::cout << std::endl << "worker cores: " << std::endl; for (const CoreCoord &core : tt::Cluster::instance().get_soc_desc(chip_id).physical_workers) { @@ -143,44 +139,11 @@ ll_api::memory read_mem_from_core(chip_id_t chip, const CoreCoord &core, const l return read_mem; } - -uint32_t generate_risc_startup_addr(bool is_eth_core) { - // Options for handling brisc fw not starting at mem[0]: - // 1) Program the register for the start address out of reset - // 2) Encode a jump in crt0 for mem[0] - // 3) Write the jump to mem[0] here - // This does #3. #1 may be best, #2 gets messy (elf files - // drop any section before .init, crt0 needs ifdefs, etc) - constexpr uint32_t jal_opcode = 0x6f; - constexpr uint32_t jal_max_offset = 0x0007ffff; - uint32_t opcode = jal_opcode; - uint32_t firmware_base = is_eth_core ? MEM_IERISC_FIRMWARE_BASE : MEM_BRISC_FIRMWARE_BASE; - assert(firmware_base < jal_max_offset); - // See riscv spec for offset encoding below - uint32_t jal_offset_bit_20 = 0; - uint32_t jal_offset_bits_10_to_1 = (firmware_base & 0x7fe) << 20; - uint32_t jal_offset_bit_11 = (firmware_base & 0x800) << 9; - uint32_t jal_offset_bits_19_to_12 = (firmware_base & 0xff000) << 0; - uint32_t jal_offset = - jal_offset_bit_20 | - jal_offset_bits_10_to_1 | - jal_offset_bit_11 | - jal_offset_bits_19_to_12; - - return jal_offset | opcode; -} - -void program_risc_startup_addr(chip_id_t chip_id, const CoreCoord &core) { - std::vector jump_to_fw; - jump_to_fw.push_back(generate_risc_startup_addr(is_ethernet_core(core, chip_id))); - write_hex_vec_to_core(chip_id, core, jump_to_fw, 0); -} - bool test_load_write_read_risc_binary( ll_api::memory &mem, chip_id_t chip_id, const CoreCoord &core, uint32_t core_type_idx, uint32_t processor_class_idx, uint32_t processor_type_idx) { assert(is_worker_core(core, chip_id) or is_ethernet_core(core, chip_id)); - uint64_t local_init_addr = tt::tt_metal::hal.get_binary_local_init_addr(core_type_idx, processor_class_idx, processor_type_idx); + uint64_t local_init_addr = tt::tt_metal::hal.get_jit_build_config(core_type_idx, processor_class_idx, processor_type_idx).local_init_addr; log_debug(tt::LogLLRuntime, "hex_vec size = {}, size_in_bytes = {}", mem.size(), mem.size()*sizeof(uint32_t)); mem.process_spans([&](std::vector::const_iterator mem_ptr, uint64_t addr, uint32_t len_words) { diff --git a/tt_metal/llrt/llrt.hpp b/tt_metal/llrt/llrt.hpp index 678873f3eaf0..2ee0c034fbfc 100644 --- a/tt_metal/llrt/llrt.hpp +++ b/tt_metal/llrt/llrt.hpp @@ -75,8 +75,6 @@ CoreCoord logical_core_from_ethernet_core(chip_id_t chip_id, CoreCoord &physical void write_launch_msg_to_core(chip_id_t chip, CoreCoord core, launch_msg_t *msg, go_msg_t * go_msg, uint64_t addr, bool send_go = true); -void launch_erisc_app_fw_on_core(chip_id_t chip, CoreCoord core); - void print_worker_cores(chip_id_t chip_id = 0); inline bool is_worker_core(const CoreCoord &core, chip_id_t chip_id) { @@ -91,9 +89,6 @@ inline bool is_ethernet_core(const CoreCoord &core, chip_id_t chip_id) { soc_desc.physical_ethernet_cores.end(); } -uint32_t generate_risc_startup_addr(bool is_eth_core); -void program_risc_startup_addr(chip_id_t chip_id, const CoreCoord &core); - bool test_load_write_read_risc_binary( ll_api::memory &mem, chip_id_t chip_id, const CoreCoord &core, uint32_t core_type_idx, uint32_t processor_class_idx, uint32_t processor_type_idx); void write_binary_to_address(ll_api::memory &mem, chip_id_t chip_id, const CoreCoord &core, uint32_t address); diff --git a/tt_metal/llrt/wormhole/wh_hal_active_eth.cpp b/tt_metal/llrt/wormhole/wh_hal_active_eth.cpp index b363ab02d0fd..33f8839cbbca 100644 --- a/tt_metal/llrt/wormhole/wh_hal_active_eth.cpp +++ b/tt_metal/llrt/wormhole/wh_hal_active_eth.cpp @@ -60,6 +60,8 @@ HalCoreInfoType create_active_eth_mem_map() { processor_types[0] = HalJitBuildConfig{ .fw_base_addr = eth_l1_mem::address_map::FIRMWARE_BASE, .local_init_addr = eth_l1_mem::address_map::FIRMWARE_BASE, + .fw_launch_addr = eth_l1_mem::address_map::LAUNCH_ERISC_APP_FLAG, + .fw_launch_addr_value = 0x1, }; processor_classes[processor_class_idx] = processor_types; } diff --git a/tt_metal/llrt/wormhole/wh_hal_idle_eth.cpp b/tt_metal/llrt/wormhole/wh_hal_idle_eth.cpp index 683d3e753621..af0d56d896c4 100644 --- a/tt_metal/llrt/wormhole/wh_hal_idle_eth.cpp +++ b/tt_metal/llrt/wormhole/wh_hal_idle_eth.cpp @@ -68,6 +68,8 @@ HalCoreInfoType create_idle_eth_mem_map() { processor_types[0] = HalJitBuildConfig{ .fw_base_addr = MEM_IERISC_FIRMWARE_BASE, .local_init_addr = MEM_IERISC_INIT_LOCAL_L1_BASE_SCRATCH, + .fw_launch_addr = 0x0, + .fw_launch_addr_value = generate_risc_startup_addr(MEM_IERISC_FIRMWARE_BASE), }; processor_classes[processor_class_idx] = processor_types; } diff --git a/tt_metal/llrt/wormhole/wh_hal_tensix.cpp b/tt_metal/llrt/wormhole/wh_hal_tensix.cpp index b95a2603d628..259ebedd3d6a 100644 --- a/tt_metal/llrt/wormhole/wh_hal_tensix.cpp +++ b/tt_metal/llrt/wormhole/wh_hal_tensix.cpp @@ -65,16 +65,21 @@ HalCoreInfoType create_tensix_mem_map() { std::uint32_t num_processors = processor_class_idx == (NumTensixDispatchClasses - 1) ? 3 : 1; processor_types.resize(num_processors); for (std::uint8_t processor_type_idx = 0; processor_type_idx < processor_types.size(); processor_type_idx++) { - DeviceAddr fw_base, local_init; + DeviceAddr fw_base, local_init, fw_launch; + uint32_t fw_launch_value; switch (processor_class_idx) { case 0: { fw_base = MEM_BRISC_FIRMWARE_BASE; local_init = MEM_BRISC_INIT_LOCAL_L1_BASE_SCRATCH; + fw_launch = 0x0; // BRISC is hardcoded to have reset PC of 0 + fw_launch_value = generate_risc_startup_addr(fw_base); } break; case 1: { fw_base = MEM_NCRISC_FIRMWARE_BASE; local_init = MEM_NCRISC_INIT_LOCAL_L1_BASE_SCRATCH; + fw_launch = 0;//fix me; + fw_launch_value = fw_base; } break; case 2: { @@ -82,16 +87,22 @@ HalCoreInfoType create_tensix_mem_map() { case 0: { fw_base = MEM_TRISC0_FIRMWARE_BASE; local_init = MEM_TRISC0_INIT_LOCAL_L1_BASE_SCRATCH; + fw_launch = 0;// fix me; + fw_launch_value = fw_base; } break; case 1: { fw_base = MEM_TRISC1_FIRMWARE_BASE; local_init = MEM_TRISC1_INIT_LOCAL_L1_BASE_SCRATCH; + fw_launch = 0; // fix me; + fw_launch_value = fw_base; } break; case 2: { fw_base = MEM_TRISC2_FIRMWARE_BASE; local_init = MEM_TRISC2_INIT_LOCAL_L1_BASE_SCRATCH; + fw_launch = 0; //fix me + fw_launch_value = fw_base; } break; } @@ -103,7 +114,9 @@ HalCoreInfoType create_tensix_mem_map() { processor_types[processor_type_idx] = HalJitBuildConfig{ .fw_base_addr = fw_base, - .local_init_addr = local_init + .local_init_addr = local_init, + .fw_launch_addr = fw_launch, + .fw_launch_addr_value = fw_launch_value }; } processor_classes[processor_class_idx] = processor_types;