Skip to content

Commit

Permalink
Add address and value that will launch fw on a programmable core into…
Browse files Browse the repository at this point in the history
… the HalJitBuildConfig
  • Loading branch information
abhullar-tt committed Dec 23, 2024
1 parent 1dd808f commit d767af2
Show file tree
Hide file tree
Showing 13 changed files with 157 additions and 112 deletions.
5 changes: 4 additions & 1 deletion tt_metal/impl/debug/watcher_device_reader.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -414,7 +414,10 @@ void WatcherDeviceReader::DumpL1Status(CoreDescriptor& core, const launch_msg_t*
// Read L1 address 0, looking for memory corruption
std::vector<uint32_t> data;
data = tt::llrt::read_hex_vec_from_core(device->id(), core.coord, MEM_L1_BASE, sizeof(uint32_t));
if (data[0] != llrt::generate_risc_startup_addr(false)) {
TT_ASSERT(core.type == CoreType::WORKER);
uint32_t core_type_idx = hal.get_programmable_core_type_index(HalProgrammableCoreType::TENSIX);
auto fw_launch_value = hal.get_jit_build_config(core_type_idx, 0, 0).fw_launch_addr_value;
if (data[0] != fw_launch_value) {
LogRunningKernels(core, launch_msg);
TT_THROW("Watcher found corruption at L1[0] on core {}: read {}", core.coord.str(), data[0]);
}
Expand Down
10 changes: 4 additions & 6 deletions tt_metal/impl/device/device.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -430,10 +430,10 @@ void Device::initialize_firmware(const HalProgrammableCoreType &core_type, CoreC
this->initialize_device_bank_to_noc_tables(core_type, virtual_core);
uint32_t core_type_idx = hal.get_programmable_core_type_index(core_type);
uint32_t processor_class_count = hal.get_processor_classes_count(core_type);
auto jit_build_config = hal.get_jit_build_config(core_type_idx, 0, 0); // Only the first risc needs to be programmed

switch (core_type) {
case HalProgrammableCoreType::TENSIX: {
llrt::program_risc_startup_addr(this->id(), virtual_core);
for (uint32_t processor_class = 0; processor_class < processor_class_count; processor_class++) {
auto [build_idx, num_build_states] = this->build_processor_type_to_index(core_type_idx, processor_class);
for (uint32_t riscv_id = build_idx; riscv_id < (build_idx + num_build_states); riscv_id++) {
Expand Down Expand Up @@ -489,11 +489,6 @@ void Device::initialize_firmware(const HalProgrammableCoreType &core_type, CoreC
}
}
}
if (is_idle_eth) {
llrt::program_risc_startup_addr(this->id(), virtual_core);
} else {
llrt::launch_erisc_app_fw_on_core(this->id(), virtual_core);
}
// Ethernet worker core. Launch messages will be sent by FD infra if it's enabled
// Idle ethernet core. Used by FD infra. Host will write launch messages during init.
launch_msg->kernel_config.mode = (this->using_slow_dispatch() or is_idle_eth) ? DISPATCH_MODE_HOST : DISPATCH_MODE_DEV;
Expand All @@ -503,6 +498,9 @@ void Device::initialize_firmware(const HalProgrammableCoreType &core_type, CoreC
TT_THROW("Unsupported programable core type {} to initialize build states", magic_enum::enum_name(core_type));
}

tt::Cluster::instance().write_core(
&jit_build_config.fw_launch_addr_value, sizeof(uint32_t), tt_cxy_pair(this->id_, virtual_core), jit_build_config.fw_launch_addr);

// Initialize each entry in the launch_msg ring buffer with the correct dispatch mode - Cores that don't get a valid
// launch_message during program execution need to at least have the correct dispatch mode.
// When using Fast Dispatch on Tensix:
Expand Down
2 changes: 2 additions & 0 deletions tt_metal/llrt/blackhole/bh_hal_active_eth.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,8 @@ HalCoreInfoType create_active_eth_mem_map() {
.fw_base_addr = eth_l1_mem::address_map::FIRMWARE_BASE,
.local_init_addr = eth_l1_mem::address_map::FIRMWARE_BASE, // this will be uplifted in subsequent commits
// enabling active erisc
.fw_launch_addr = 0xFFB14008,
.fw_launch_addr_value = (uint32_t)eth_l1_mem::address_map::FIRMWARE_BASE,
};
processor_classes[processor_class_idx] = processor_types;
}
Expand Down
23 changes: 18 additions & 5 deletions tt_metal/llrt/blackhole/bh_hal_idle_eth.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -72,19 +72,32 @@ HalCoreInfoType create_idle_eth_mem_map() {
std::vector<std::vector<HalJitBuildConfig>> processor_classes(NumEthDispatchClasses);
std::vector<HalJitBuildConfig> processor_types(1);
for (std::uint8_t processor_class_idx = 0; processor_class_idx < NumEthDispatchClasses; processor_class_idx++) {
DeviceAddr fw_base, local_init;
DeviceAddr fw_base, local_init, fw_launch;
uint32_t fw_launch_value;
switch (processor_class_idx) {
case 0: {
fw_base = MEM_IERISC_FIRMWARE_BASE;
local_init = MEM_IERISC_INIT_LOCAL_L1_BASE_SCRATCH;
} break;
fw_launch = 0xFFB14000;
fw_launch_value = fw_base;
}
break;
case 1: {
fw_base = MEM_SLAVE_IERISC_FIRMWARE_BASE;
local_init = MEM_SLAVE_IERISC_INIT_LOCAL_L1_BASE_SCRATCH;
} break;
default: TT_THROW("Unexpected processor class {} for Blackhole Idle Ethernet", processor_class_idx);
fw_launch = 0xFFB14008;
fw_launch_value = fw_base;
}
break;
default:
TT_THROW("Unexpected processor class {} for Blackhole Idle Ethernet", processor_class_idx);
}
processor_types[0] = HalJitBuildConfig{.fw_base_addr = fw_base, .local_init_addr = local_init};
processor_types[0] = HalJitBuildConfig{
.fw_base_addr = fw_base,
.local_init_addr = local_init,
.fw_launch_addr = fw_launch,
.fw_launch_addr_value = fw_launch_value
};
processor_classes[processor_class_idx] = processor_types;
}

Expand Down
39 changes: 30 additions & 9 deletions tt_metal/llrt/blackhole/bh_hal_tensix.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
#include "dev_mem_map.h"
#include "dev_msgs.h"
#include "noc/noc_parameters.h"
#include "tensix.h"

#include "hal.hpp"
#include "hal_asserts.hpp"
Expand All @@ -31,7 +32,7 @@ HalCoreInfoType create_tensix_mem_map() {
std::vector<DeviceAddr> mem_map_bases;

mem_map_bases.resize(static_cast<std::size_t>(HalL1MemAddrType::COUNT));
mem_map_bases[static_cast<std::size_t>(HalL1MemAddrType::BASE)] = MEM_L1_BASE;
mem_map_bases[static_cast<std::size_t>(HalL1MemAddrType::BASE)] = MEM_L1_BASE;
mem_map_bases[static_cast<std::size_t>(HalL1MemAddrType::BARRIER)] = MEM_L1_BARRIER;
mem_map_bases[static_cast<std::size_t>(HalL1MemAddrType::MAILBOX)] = MEM_MAILBOX_BASE;
mem_map_bases[static_cast<std::size_t>(HalL1MemAddrType::LAUNCH)] = GET_MAILBOX_ADDRESS_HOST(launch);
Expand Down Expand Up @@ -71,37 +72,57 @@ HalCoreInfoType create_tensix_mem_map() {
uint32_t num_processors = processor_class_idx == (NumTensixDispatchClasses - 1) ? 3 : 1;
processor_types.resize(num_processors);
for (size_t processor_type_idx = 0; processor_type_idx < processor_types.size(); processor_type_idx++) {
DeviceAddr fw_base, local_init;
DeviceAddr fw_base, local_init, fw_launch;
uint32_t fw_launch_value;
switch (processor_class_idx) {
case 0: {
fw_base = MEM_BRISC_FIRMWARE_BASE;
local_init = MEM_BRISC_INIT_LOCAL_L1_BASE_SCRATCH;
} break;
fw_launch = 0x0; // BRISC is hardcoded to have reset PC of 0
fw_launch_value = generate_risc_startup_addr(fw_base);
}
break;
case 1: {
fw_base = MEM_NCRISC_FIRMWARE_BASE;
local_init = MEM_NCRISC_INIT_LOCAL_L1_BASE_SCRATCH;
} break;
fw_launch = RISCV_DEBUG_REG_NCRISC_RESET_PC;
fw_launch_value = fw_base;
}
break;
case 2: {
switch (processor_type_idx) {
case 0: {
fw_base = MEM_TRISC0_FIRMWARE_BASE;
local_init = MEM_TRISC0_INIT_LOCAL_L1_BASE_SCRATCH;
} break;
fw_launch = RISCV_DEBUG_REG_TRISC0_RESET_PC;
fw_launch_value = fw_base;
}
break;
case 1: {
fw_base = MEM_TRISC1_FIRMWARE_BASE;
local_init = MEM_TRISC1_INIT_LOCAL_L1_BASE_SCRATCH;
} break;
fw_launch = RISCV_DEBUG_REG_TRISC1_RESET_PC;
fw_launch_value = fw_base;
}
break;
case 2: {
fw_base = MEM_TRISC2_FIRMWARE_BASE;
local_init = MEM_TRISC2_INIT_LOCAL_L1_BASE_SCRATCH;
} break;
fw_launch = RISCV_DEBUG_REG_TRISC2_RESET_PC;
fw_launch_value = fw_base;
}
break;
}
} break;
default: TT_THROW("Unexpected processor class {} for Blackhole Tensix", processor_class_idx);
}

processor_types[processor_type_idx] =
HalJitBuildConfig{.fw_base_addr = fw_base, .local_init_addr = local_init};
processor_types[processor_type_idx] = HalJitBuildConfig{
.fw_base_addr = fw_base,
.local_init_addr = local_init,
.fw_launch_addr = fw_launch,
.fw_launch_addr_value = fw_launch_value
};
}
processor_classes[processor_class_idx] = processor_types;
}
Expand Down
36 changes: 28 additions & 8 deletions tt_metal/llrt/grayskull/gs_hal.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -86,37 +86,57 @@ void Hal::initialize_gs() {
uint32_t num_processors = processor_class_idx == (NumTensixDispatchClasses - 1) ? 3 : 1;
processor_types.resize(num_processors);
for (size_t processor_type_idx = 0; processor_type_idx < processor_types.size(); processor_type_idx++) {
DeviceAddr fw_base, local_init;
DeviceAddr fw_base, local_init, fw_launch;
uint32_t fw_launch_value;
switch (processor_class_idx) {
case 0: {
fw_base = MEM_BRISC_FIRMWARE_BASE;
local_init = MEM_BRISC_INIT_LOCAL_L1_BASE_SCRATCH;
} break;
fw_launch = 0x0; // BRISC is hardcoded to have reset PC of 0
fw_launch_value = generate_risc_startup_addr(fw_base);
}
break;
case 1: {
fw_base = MEM_NCRISC_FIRMWARE_BASE;
local_init = MEM_NCRISC_INIT_LOCAL_L1_BASE_SCRATCH;
} break;
fw_launch = 0; // fix me
fw_launch_value = fw_base;
}
break;
case 2: {
switch (processor_type_idx) {
case 0: {
fw_base = MEM_TRISC0_FIRMWARE_BASE;
local_init = MEM_TRISC0_INIT_LOCAL_L1_BASE_SCRATCH;
} break;
fw_launch = 0; // fix me
fw_launch_value = fw_base;
}
break;
case 1: {
fw_base = MEM_TRISC1_FIRMWARE_BASE;
local_init = MEM_TRISC1_INIT_LOCAL_L1_BASE_SCRATCH;
} break;
fw_launch = 0; // fix me
fw_launch_value = fw_base;
}
break;
case 2: {
fw_base = MEM_TRISC2_FIRMWARE_BASE;
local_init = MEM_TRISC2_INIT_LOCAL_L1_BASE_SCRATCH;
} break;
fw_launch = 0; // fix me
fw_launch_value = fw_base;
}
break;
}
} break;
default: TT_THROW("Unexpected processor class {} for Blackhole Tensix", processor_class_idx);
}

processor_types[processor_type_idx] =
HalJitBuildConfig{.fw_base_addr = fw_base, .local_init_addr = local_init};
processor_types[processor_type_idx] = HalJitBuildConfig{
.fw_base_addr = fw_base,
.local_init_addr = local_init,
.fw_launch_addr = fw_launch,
.fw_launch_addr_value = fw_launch_value
};
}
processor_classes[processor_class_idx] = processor_types;
}
Expand Down
25 changes: 25 additions & 0 deletions tt_metal/llrt/hal.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -63,5 +63,30 @@ HalCoreInfoType::HalCoreInfoType(
mem_map_sizes_(mem_map_sizes),
supports_cbs_(supports_cbs) {}

uint32_t generate_risc_startup_addr(uint32_t firmware_base) {
// Options for handling brisc fw not starting at mem[0]:
// 1) Program the register for the start address out of reset - no reset PC register on GS/WH/BH
// 2) Encode a jump in crt0 for mem[0]
// 3) Write the jump to mem[0] here
// This does #3. #1 may be best, #2 gets messy (elf files
// drop any section before .init, crt0 needs ifdefs, etc)
constexpr uint32_t jal_opcode = 0x6f;
constexpr uint32_t jal_max_offset = 0x0007ffff;
uint32_t opcode = jal_opcode;
assert(firmware_base < jal_max_offset);
// See riscv spec for offset encoding below
uint32_t jal_offset_bit_20 = 0;
uint32_t jal_offset_bits_10_to_1 = (firmware_base & 0x7fe) << 20;
uint32_t jal_offset_bit_11 = (firmware_base & 0x800) << 9;
uint32_t jal_offset_bits_19_to_12 = (firmware_base & 0xff000) << 0;
uint32_t jal_offset =
jal_offset_bit_20 |
jal_offset_bits_10_to_1 |
jal_offset_bit_11 |
jal_offset_bits_19_to_12;

return jal_offset | opcode;
}

} // namespace tt_metal
} // namespace tt
44 changes: 13 additions & 31 deletions tt_metal/llrt/hal.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -60,9 +60,14 @@ enum class HalMemType : uint8_t { L1 = 0, DRAM = 1, HOST = 2, COUNT = 3 };

using DeviceAddr = std::uint64_t;

// Note: nsidwell will be removing need for fw_base_addr and local_init_addr
// fw_launch_addr is programmed with fw_launch_addr_value on the master risc
// of a given progammable core to start FW
struct HalJitBuildConfig {
DeviceAddr fw_base_addr;
DeviceAddr local_init_addr;
DeviceAddr fw_launch_addr;
uint32_t fw_launch_addr_value;
};

class Hal;
Expand Down Expand Up @@ -94,10 +99,7 @@ class HalCoreInfoType {
uint32_t get_dev_size(HalL1MemAddrType addr_type) const;
uint32_t get_processor_classes_count() const;
uint32_t get_processor_types_count(uint32_t processor_class_idx) const;
template <typename T = DeviceAddr>
T get_base_firmware_addr(uint32_t processor_class_idx, uint32_t processor_type_idx) const;
template <typename T = DeviceAddr>
T get_binary_local_init_addr(uint32_t processor_class_idx, uint32_t processor_type_idx) const;
const HalJitBuildConfig &get_jit_build_config(uint32_t processor_class_idx, uint32_t processor_type_idx) const;
};

template <typename T>
Expand All @@ -120,18 +122,10 @@ inline uint32_t HalCoreInfoType::get_processor_types_count(uint32_t processor_cl
return this->processor_classes_[processor_class_idx].size();
}

template <typename T>
inline T HalCoreInfoType::get_base_firmware_addr(uint32_t processor_class_idx, uint32_t processor_type_idx) const {
inline const HalJitBuildConfig &HalCoreInfoType::get_jit_build_config(uint32_t processor_class_idx, uint32_t processor_type_idx) const {
TT_ASSERT(processor_class_idx < this->processor_classes_.size());
TT_ASSERT(processor_type_idx < this->processor_classes_[processor_class_idx].size());
return this->processor_classes_[processor_class_idx][processor_type_idx].fw_base_addr;
}

template <typename T>
inline T HalCoreInfoType::get_binary_local_init_addr(uint32_t processor_class_idx, uint32_t processor_type_idx) const {
TT_ASSERT(processor_class_idx < this->processor_classes_.size());
TT_ASSERT(processor_type_idx < this->processor_classes_[processor_class_idx].size());
return this->processor_classes_[processor_class_idx][processor_type_idx].local_init_addr;
return this->processor_classes_[processor_class_idx][processor_type_idx];
}

class Hal {
Expand Down Expand Up @@ -209,11 +203,7 @@ class Hal {

uint32_t get_num_risc_processors() const;

template <typename T = DeviceAddr>
T get_base_firmware_addr(
uint32_t programmable_core_type_index, uint32_t processor_class_idx, uint32_t processor_type_idx) const;
template <typename T = DeviceAddr>
T get_binary_local_init_addr(
const HalJitBuildConfig &get_jit_build_config(
uint32_t programmable_core_type_index, uint32_t processor_class_idx, uint32_t processor_type_idx) const;

uint64_t relocate_dev_addr(uint64_t addr, uint64_t local_init_addr = 0) {
Expand Down Expand Up @@ -307,20 +297,10 @@ inline bool Hal::get_supports_cbs(uint32_t programmable_core_type_index) const {
return this->core_info_[programmable_core_type_index].supports_cbs_;
}

template <typename T>
inline T Hal::get_base_firmware_addr(
inline const HalJitBuildConfig &Hal::get_jit_build_config(
uint32_t programmable_core_type_index, uint32_t processor_class_idx, uint32_t processor_type_idx) const {
TT_ASSERT(programmable_core_type_index < this->core_info_.size());
return this->core_info_[programmable_core_type_index].get_base_firmware_addr(
processor_class_idx, processor_type_idx);
}

template <typename T>
inline T Hal::get_binary_local_init_addr(
uint32_t programmable_core_type_index, uint32_t processor_class_idx, uint32_t processor_type_idx) const {
TT_ASSERT(programmable_core_type_index < this->core_info_.size());
return this->core_info_[programmable_core_type_index].get_binary_local_init_addr(
processor_class_idx, processor_type_idx);
return this->core_info_[programmable_core_type_index].get_jit_build_config(processor_class_idx, processor_type_idx);
}

class HalSingleton : public Hal {
Expand All @@ -342,6 +322,8 @@ class HalSingleton : public Hal {

inline auto& hal = HalSingleton::getInstance(); // inline variable requires C++17

uint32_t generate_risc_startup_addr(uint32_t firmware_base); // used by Tensix initializers to build HalJitBuildConfig

} // namespace tt_metal
} // namespace tt

Expand Down
Loading

0 comments on commit d767af2

Please sign in to comment.