Skip to content

Commit

Permalink
Add address and value that will launch fw on a programmable core into…
Browse files Browse the repository at this point in the history
… the HalJitBuildConfig
  • Loading branch information
abhullar-tt committed Nov 20, 2024
1 parent 2354099 commit d9c8f61
Show file tree
Hide file tree
Showing 11 changed files with 84 additions and 60 deletions.
10 changes: 4 additions & 6 deletions tt_metal/impl/device/device.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -411,10 +411,10 @@ void Device::initialize_firmware(const HalProgrammableCoreType &core_type, CoreC

uint32_t core_type_idx = hal.get_programmable_core_type_index(core_type);
uint32_t processor_class_count = hal.get_processor_classes_count(core_type);
auto jit_build_config = hal.get_jit_build_config(core_type_idx, 0, 0); // Only the first risc needs to be programmed

switch (core_type) {
case HalProgrammableCoreType::TENSIX: {
llrt::program_risc_startup_addr(this->id(), phys_core);
for (uint32_t processor_class = 0; processor_class < processor_class_count; processor_class++) {
auto [build_idx, num_build_states] = this->build_processor_type_to_index(core_type_idx, processor_class);
for (uint32_t riscv_id = build_idx; riscv_id < (build_idx + num_build_states); riscv_id++) {
Expand Down Expand Up @@ -469,11 +469,6 @@ void Device::initialize_firmware(const HalProgrammableCoreType &core_type, CoreC
}
}
}
if (is_idle_eth) {
llrt::program_risc_startup_addr(this->id(), phys_core);
} else {
llrt::launch_erisc_app_fw_on_core(this->id(), phys_core);
}
// Ethernet worker core. Launch messages will be sent by FD infra if it's enabled
// Idle ethernet core. Used by FD infra. Host will write launch messages during init.
launch_msg->kernel_config.mode = (this->using_slow_dispatch() or is_idle_eth) ? DISPATCH_MODE_HOST : DISPATCH_MODE_DEV;
Expand All @@ -483,6 +478,9 @@ void Device::initialize_firmware(const HalProgrammableCoreType &core_type, CoreC
TT_THROW("Unsupported programable core type {} to initialize build states", magic_enum::enum_name(core_type));
}

tt::Cluster::instance().write_core(
&jit_build_config.fw_launch_addr_value, sizeof(uint32_t), tt_cxy_pair(this->id_, phys_core), jit_build_config.fw_launch_addr);

// Initialize each entry in the launch_msg ring buffer with the correct dispatch mode - Cores that don't get a valid
// launch_message during program execution need to at least have the correct dispatch mode.
// When using Fast Dispatch on Tensix:
Expand Down
2 changes: 2 additions & 0 deletions tt_metal/llrt/blackhole/bh_hal_active_eth.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,8 @@ HalCoreInfoType create_active_eth_mem_map() {
processor_types[0] = HalJitBuildConfig{
.fw_base_addr = eth_l1_mem::address_map::FIRMWARE_BASE,
.local_init_addr = eth_l1_mem::address_map::FIRMWARE_BASE, // this will be uplifted in subsequent commits enabling active erisc
.fw_launch_addr = 0xFFB14008,
.fw_launch_addr_value = (uint32_t)eth_l1_mem::address_map::FIRMWARE_BASE,
};
processor_classes[processor_class_idx] = processor_types;
}
Expand Down
11 changes: 9 additions & 2 deletions tt_metal/llrt/blackhole/bh_hal_idle_eth.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -65,24 +65,31 @@ HalCoreInfoType create_idle_eth_mem_map() {
std::vector<std::vector<HalJitBuildConfig>> processor_classes(NumEthDispatchClasses);
std::vector<HalJitBuildConfig> processor_types(1);
for (std::uint8_t processor_class_idx = 0; processor_class_idx < NumEthDispatchClasses; processor_class_idx++) {
DeviceAddr fw_base, local_init;
DeviceAddr fw_base, local_init, fw_launch;
uint32_t fw_launch_value;
switch (processor_class_idx) {
case 0: {
fw_base = MEM_IERISC_FIRMWARE_BASE;
local_init = MEM_IERISC_INIT_LOCAL_L1_BASE_SCRATCH;
fw_launch = 0xFFB14000;
fw_launch_value = fw_base;
}
break;
case 1: {
fw_base = MEM_SLAVE_IERISC_FIRMWARE_BASE;
local_init = MEM_SLAVE_IERISC_INIT_LOCAL_L1_BASE_SCRATCH;
fw_launch = 0xFFB14008;
fw_launch_value = fw_base;
}
break;
default:
TT_THROW("Unexpected processor class {} for Blackhole Idle Ethernet", processor_class_idx);
}
processor_types[0] = HalJitBuildConfig{
.fw_base_addr = fw_base,
.local_init_addr = local_init
.local_init_addr = local_init,
.fw_launch_addr = fw_launch,
.fw_launch_addr_value = fw_launch_value
};
processor_classes[processor_class_idx] = processor_types;
}
Expand Down
18 changes: 16 additions & 2 deletions tt_metal/llrt/blackhole/bh_hal_tensix.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
#include "dev_mem_map.h"
#include "dev_msgs.h"
#include "noc/noc_parameters.h"
#include "llrt/llrt.hpp"

#include "hal.hpp"
#include "hal_asserts.hpp"
Expand Down Expand Up @@ -65,33 +66,44 @@ HalCoreInfoType create_tensix_mem_map() {
uint32_t num_processors = processor_class_idx == (NumTensixDispatchClasses - 1) ? 3 : 1;
processor_types.resize(num_processors);
for (uint8_t processor_type_idx = 0; processor_type_idx < processor_types.size(); processor_type_idx++) {
DeviceAddr fw_base, local_init;
DeviceAddr fw_base, local_init, fw_launch;
uint32_t fw_launch_value;
switch (processor_class_idx) {
case 0: {
fw_base = MEM_BRISC_FIRMWARE_BASE;
local_init = MEM_BRISC_INIT_LOCAL_L1_BASE_SCRATCH;
fw_launch = 0x0; // BRISC is hardcoded to have reset PC of 0
fw_launch_value = llrt::generate_risc_startup_addr(fw_base);
}
break;
case 1: {
fw_base = MEM_NCRISC_FIRMWARE_BASE;
local_init = MEM_NCRISC_INIT_LOCAL_L1_BASE_SCRATCH;
fw_launch = RISCV_DEBUG_REG_NCRISC_RESET_PC;
fw_launch_value = fw_base;
}
break;
case 2: {
switch (processor_type_idx) {
case 0: {
fw_base = MEM_TRISC0_FIRMWARE_BASE;
local_init = MEM_TRISC0_INIT_LOCAL_L1_BASE_SCRATCH;
fw_launch = RISCV_DEBUG_REG_TRISC0_RESET_PC;
fw_launch_value = fw_base;
}
break;
case 1: {
fw_base = MEM_TRISC1_FIRMWARE_BASE;
local_init = MEM_TRISC1_INIT_LOCAL_L1_BASE_SCRATCH;
fw_launch = RISCV_DEBUG_REG_TRISC1_RESET_PC;
fw_launch_value = fw_base;
}
break;
case 2: {
fw_base = MEM_TRISC2_FIRMWARE_BASE;
local_init = MEM_TRISC2_INIT_LOCAL_L1_BASE_SCRATCH;
fw_launch = RISCV_DEBUG_REG_TRISC2_RESET_PC;
fw_launch_value = fw_base;
}
break;
}
Expand All @@ -103,7 +115,9 @@ HalCoreInfoType create_tensix_mem_map() {

processor_types[processor_type_idx] = HalJitBuildConfig{
.fw_base_addr = fw_base,
.local_init_addr = local_init
.local_init_addr = local_init,
.fw_launch_addr = fw_launch,
.fw_launch_addr_value = fw_launch_value
};
}
processor_classes[processor_class_idx] = processor_types;
Expand Down
18 changes: 16 additions & 2 deletions tt_metal/llrt/grayskull/gs_hal.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
#include "hal.hpp"

#include "hal_asserts.hpp"
#include "llrt/llrt.hpp"

// FIXME: Eventually this file will be gone
#include "tt_metal/hostdevcommon/common_runtime_address_map.h" // L1_KERNEL_CONFIG_BASE
Expand Down Expand Up @@ -74,33 +75,44 @@ void Hal::initialize_gs() {
uint32_t num_processors = processor_class_idx == (NumTensixDispatchClasses - 1) ? 3 : 1;
processor_types.resize(num_processors);
for (uint8_t processor_type_idx = 0; processor_type_idx < processor_types.size(); processor_type_idx++) {
DeviceAddr fw_base, local_init;
DeviceAddr fw_base, local_init, fw_launch;
uint32_t fw_launch_value;
switch (processor_class_idx) {
case 0: {
fw_base = MEM_BRISC_FIRMWARE_BASE;
local_init = MEM_BRISC_INIT_LOCAL_L1_BASE_SCRATCH;
fw_launch = 0x0; // BRISC is hardcoded to have reset PC of 0
fw_launch_value = llrt::generate_risc_startup_addr(fw_base);
}
break;
case 1: {
fw_base = MEM_NCRISC_FIRMWARE_BASE;
local_init = MEM_NCRISC_INIT_LOCAL_L1_BASE_SCRATCH;
// fw_launch = 0; // fix me
fw_launch_value = fw_base;
}
break;
case 2: {
switch (processor_type_idx) {
case 0: {
fw_base = MEM_TRISC0_FIRMWARE_BASE;
local_init = MEM_TRISC0_INIT_LOCAL_L1_BASE_SCRATCH;
// fw_launch = 0; // fix me
fw_launch_value = fw_base;
}
break;
case 1: {
fw_base = MEM_TRISC1_FIRMWARE_BASE;
local_init = MEM_TRISC1_INIT_LOCAL_L1_BASE_SCRATCH;
// fw_launch = 0; // fix me
fw_launch_value = fw_base;
}
break;
case 2: {
fw_base = MEM_TRISC2_FIRMWARE_BASE;
local_init = MEM_TRISC2_INIT_LOCAL_L1_BASE_SCRATCH;
// fw_launch = 0; // fix me
fw_launch_value = fw_base;
}
break;
}
Expand All @@ -112,7 +124,9 @@ void Hal::initialize_gs() {

processor_types[processor_type_idx] = HalJitBuildConfig{
.fw_base_addr = fw_base,
.local_init_addr = local_init
.local_init_addr = local_init,
.fw_launch_addr = fw_launch,
.fw_launch_addr_value = fw_launch_value
};
}
processor_classes[processor_class_idx] = processor_types;
Expand Down
38 changes: 11 additions & 27 deletions tt_metal/llrt/hal.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -71,9 +71,14 @@ enum class HalMemType : uint8_t {

using DeviceAddr = std::uint64_t;

// Note: nsidwell will be removing need for fw_base_addr and local_init_addr
// fw_launch_addr is programmed with fw_launch_addr_value on the master risc
// of a given progammable core to start FW
struct HalJitBuildConfig {
DeviceAddr fw_base_addr;
DeviceAddr local_init_addr;
DeviceAddr fw_launch_addr;
uint32_t fw_launch_addr_value;
};

class Hal;
Expand All @@ -100,10 +105,7 @@ class HalCoreInfoType {
uint32_t get_dev_size(HalL1MemAddrType addr_type) const;
uint32_t get_processor_classes_count() const;
uint32_t get_processor_types_count(uint32_t processor_class_idx) const;
template <typename T = DeviceAddr>
T get_base_firmware_addr(uint32_t processor_class_idx, uint32_t processor_type_idx) const;
template <typename T = DeviceAddr>
T get_binary_local_init_addr(uint32_t processor_class_idx, uint32_t processor_type_idx) const;
const HalJitBuildConfig &get_jit_build_config(uint32_t processor_class_idx, uint32_t processor_type_idx) const;
};

template <typename T>
Expand All @@ -128,18 +130,10 @@ inline uint32_t HalCoreInfoType::get_processor_types_count(uint32_t processor_cl
return this->processor_classes_[processor_class_idx].size();
}

template <typename T>
inline T HalCoreInfoType::get_base_firmware_addr(uint32_t processor_class_idx, uint32_t processor_type_idx) const {
TT_ASSERT(processor_class_idx < this->processor_classes_.size());
TT_ASSERT(processor_type_idx < this->processor_classes_[processor_class_idx].size());
return this->processor_classes_[processor_class_idx][processor_type_idx].fw_base_addr;
}

template <typename T>
inline T HalCoreInfoType::get_binary_local_init_addr(uint32_t processor_class_idx, uint32_t processor_type_idx) const {
inline const HalJitBuildConfig &HalCoreInfoType::get_jit_build_config(uint32_t processor_class_idx, uint32_t processor_type_idx) const {
TT_ASSERT(processor_class_idx < this->processor_classes_.size());
TT_ASSERT(processor_type_idx < this->processor_classes_[processor_class_idx].size());
return this->processor_classes_[processor_class_idx][processor_type_idx].local_init_addr;
return this->processor_classes_[processor_class_idx][processor_type_idx];
}

class Hal {
Expand Down Expand Up @@ -191,10 +185,7 @@ class Hal {

uint32_t get_num_risc_processors() const;

template <typename T = DeviceAddr>
T get_base_firmware_addr(uint32_t programmable_core_type_index, uint32_t processor_class_idx, uint32_t processor_type_idx) const;
template <typename T = DeviceAddr>
T get_binary_local_init_addr(uint32_t programmable_core_type_index, uint32_t processor_class_idx, uint32_t processor_type_idx) const;
const HalJitBuildConfig &get_jit_build_config(uint32_t programmable_core_type_index, uint32_t processor_class_idx, uint32_t processor_type_idx) const;
};

inline uint32_t Hal::get_programmable_core_type_count() const {
Expand Down Expand Up @@ -284,16 +275,9 @@ inline bool Hal::get_supports_cbs(uint32_t programmable_core_type_index) const {
return this->core_info_[programmable_core_type_index].supports_cbs_;
}

template <typename T>
inline T Hal::get_base_firmware_addr(uint32_t programmable_core_type_index, uint32_t processor_class_idx, uint32_t processor_type_idx) const {
TT_ASSERT(programmable_core_type_index < this->core_info_.size());
return this->core_info_[programmable_core_type_index].get_base_firmware_addr(processor_class_idx, processor_type_idx);
}

template <typename T>
inline T Hal::get_binary_local_init_addr(uint32_t programmable_core_type_index, uint32_t processor_class_idx, uint32_t processor_type_idx) const {
inline const HalJitBuildConfig &Hal::get_jit_build_config(uint32_t programmable_core_type_index, uint32_t processor_class_idx, uint32_t processor_type_idx) const {
TT_ASSERT(programmable_core_type_index < this->core_info_.size());
return this->core_info_[programmable_core_type_index].get_binary_local_init_addr(processor_class_idx, processor_type_idx);
return this->core_info_[programmable_core_type_index].get_jit_build_config(processor_class_idx, processor_type_idx);
}

class HalSingleton : public Hal {
Expand Down
19 changes: 4 additions & 15 deletions tt_metal/llrt/llrt.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,7 @@ ll_api::memory get_risc_binary(string const &path,
uint64_t data_start = MEM_LOCAL_BASE;
uint64_t text_start = (relo_type == ll_api::memory::Relocate::XIP) ?
0 :
tt::tt_metal::hal.get_base_firmware_addr(core_type_idx, processor_class_idx, processor_type_idx);
tt::tt_metal::hal.get_jit_build_config(core_type_idx, processor_class_idx, processor_type_idx).fw_base_addr;
ptr->pack_data_into_text(text_start, data_start);
}

Expand Down Expand Up @@ -121,10 +121,6 @@ void write_launch_msg_to_core(chip_id_t chip, const CoreCoord core, launch_msg_t
}
}

void launch_erisc_app_fw_on_core(chip_id_t chip, CoreCoord core) {
llrt::write_hex_vec_to_core(chip, core, {0x1}, eth_l1_mem::address_map::LAUNCH_ERISC_APP_FLAG);
}

void print_worker_cores(chip_id_t chip_id) {
std::cout << std::endl << "worker cores: " << std::endl;
for (const CoreCoord &core : tt::Cluster::instance().get_soc_desc(chip_id).physical_workers) {
Expand All @@ -144,17 +140,16 @@ ll_api::memory read_mem_from_core(chip_id_t chip, const CoreCoord &core, const l
}


uint32_t generate_risc_startup_addr(bool is_eth_core) {
uint32_t generate_risc_startup_addr(uint32_t firmware_base) {
// Options for handling brisc fw not starting at mem[0]:
// 1) Program the register for the start address out of reset
// 1) Program the register for the start address out of reset - no reset PC register on GS/WH/BH
// 2) Encode a jump in crt0 for mem[0]
// 3) Write the jump to mem[0] here
// This does #3. #1 may be best, #2 gets messy (elf files
// drop any section before .init, crt0 needs ifdefs, etc)
constexpr uint32_t jal_opcode = 0x6f;
constexpr uint32_t jal_max_offset = 0x0007ffff;
uint32_t opcode = jal_opcode;
uint32_t firmware_base = is_eth_core ? MEM_IERISC_FIRMWARE_BASE : MEM_BRISC_FIRMWARE_BASE;
assert(firmware_base < jal_max_offset);
// See riscv spec for offset encoding below
uint32_t jal_offset_bit_20 = 0;
Expand All @@ -170,17 +165,11 @@ uint32_t generate_risc_startup_addr(bool is_eth_core) {
return jal_offset | opcode;
}

void program_risc_startup_addr(chip_id_t chip_id, const CoreCoord &core) {
std::vector<uint32_t> jump_to_fw;
jump_to_fw.push_back(generate_risc_startup_addr(is_ethernet_core(core, chip_id)));
write_hex_vec_to_core(chip_id, core, jump_to_fw, 0);
}

bool test_load_write_read_risc_binary(
ll_api::memory &mem, chip_id_t chip_id, const CoreCoord &core, uint32_t core_type_idx, uint32_t processor_class_idx, uint32_t processor_type_idx) {
assert(is_worker_core(core, chip_id) or is_ethernet_core(core, chip_id));

uint64_t local_init_addr = tt::tt_metal::hal.get_binary_local_init_addr(core_type_idx, processor_class_idx, processor_type_idx);
uint64_t local_init_addr = tt::tt_metal::hal.get_jit_build_config(core_type_idx, processor_class_idx, processor_type_idx).local_init_addr;

log_debug(tt::LogLLRuntime, "hex_vec size = {}, size_in_bytes = {}", mem.size(), mem.size()*sizeof(uint32_t));
mem.process_spans([&](std::vector<uint32_t>::const_iterator mem_ptr, uint64_t addr, uint32_t len_words) {
Expand Down
5 changes: 1 addition & 4 deletions tt_metal/llrt/llrt.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -75,8 +75,6 @@ CoreCoord logical_core_from_ethernet_core(chip_id_t chip_id, CoreCoord &physical

void write_launch_msg_to_core(chip_id_t chip, CoreCoord core, launch_msg_t *msg, go_msg_t * go_msg, uint64_t addr, bool send_go = true);

void launch_erisc_app_fw_on_core(chip_id_t chip, CoreCoord core);

void print_worker_cores(chip_id_t chip_id = 0);

inline bool is_worker_core(const CoreCoord &core, chip_id_t chip_id) {
Expand All @@ -91,8 +89,7 @@ inline bool is_ethernet_core(const CoreCoord &core, chip_id_t chip_id) {
soc_desc.physical_ethernet_cores.end();
}

uint32_t generate_risc_startup_addr(bool is_eth_core);
void program_risc_startup_addr(chip_id_t chip_id, const CoreCoord &core);
uint32_t generate_risc_startup_addr(uint32_t firmware_base);

bool test_load_write_read_risc_binary(
ll_api::memory &mem, chip_id_t chip_id, const CoreCoord &core, uint32_t core_type_idx, uint32_t processor_class_idx, uint32_t processor_type_idx);
Expand Down
2 changes: 2 additions & 0 deletions tt_metal/llrt/wormhole/wh_hal_active_eth.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,8 @@ HalCoreInfoType create_active_eth_mem_map() {
processor_types[0] = HalJitBuildConfig{
.fw_base_addr = eth_l1_mem::address_map::FIRMWARE_BASE,
.local_init_addr = eth_l1_mem::address_map::FIRMWARE_BASE,
.fw_launch_addr = eth_l1_mem::address_map::LAUNCH_ERISC_APP_FLAG,
.fw_launch_addr_value = 0x1,
};
processor_classes[processor_class_idx] = processor_types;
}
Expand Down
3 changes: 3 additions & 0 deletions tt_metal/llrt/wormhole/wh_hal_idle_eth.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
#include "dev_mem_map.h"
#include "dev_msgs.h"
#include "noc/noc_parameters.h"
#include "llrt/llrt.hpp"

#include "hal.hpp"
#include "hal_asserts.hpp"
Expand Down Expand Up @@ -68,6 +69,8 @@ HalCoreInfoType create_idle_eth_mem_map() {
processor_types[0] = HalJitBuildConfig{
.fw_base_addr = MEM_IERISC_FIRMWARE_BASE,
.local_init_addr = MEM_IERISC_INIT_LOCAL_L1_BASE_SCRATCH,
.fw_launch_addr = 0x0,
.fw_launch_addr_value = llrt::generate_risc_startup_addr(MEM_IERISC_FIRMWARE_BASE),
};
processor_classes[processor_class_idx] = processor_types;
}
Expand Down
Loading

0 comments on commit d9c8f61

Please sign in to comment.