Skip to content

Commit

Permalink
Fix address types (#328)
Browse files Browse the repository at this point in the history
### Issue
#303

### Description
Using signed integer for address is wrong and can cause bugs due to sign
extension since UMD represents addresses as uint64_t internally. 32 bits
is unnecessarily restrictive (there are address spaces in the chips that
reach beyond 0x7fffffff that I might want to configure a static window
for).

### List of the changes
* Use uint64_t instead of int32_t in `configure_tlb` method
* Convert sizes/constants in some header files from int32_t to uint32_t
* Remove unnecessary `std::` prefixing

### Testing
CI

### API Changes
This PR has API changes, but it shouldn't break anything.
  • Loading branch information
joelsmithTT authored Nov 26, 2024
1 parent db8a0d5 commit 766cc54
Show file tree
Hide file tree
Showing 6 changed files with 295 additions and 250 deletions.
25 changes: 11 additions & 14 deletions device/cluster.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -171,11 +171,14 @@ struct tt_4_byte_aligned_buffer {
namespace tt::umd {

bool Cluster::address_in_tlb_space(
uint32_t address, uint32_t size_in_bytes, int32_t tlb_index, uint64_t tlb_size, std::uint32_t chip) {
return (
(tlb_config_map.at(chip).find(tlb_index) != tlb_config_map.at(chip).end()) &&
address >= tlb_config_map.at(chip).at(tlb_index) &&
(address + size_in_bytes <= tlb_config_map.at(chip).at(tlb_index) + tlb_size));
uint64_t address, uint32_t size_in_bytes, int32_t tlb_index, uint64_t tlb_size, std::uint32_t chip) {
const auto& tlb_map = tlb_config_map.at(chip);
const auto it = tlb_map.find(tlb_index);
if (it != tlb_map.end()) {
auto mapped_address = it->second;
return address >= mapped_address && (address + size_in_bytes <= mapped_address + tlb_size);
}
return false;
}

std::unordered_map<chip_id_t, tt_SocDescriptor>& Cluster::get_virtual_soc_descriptors() {
Expand Down Expand Up @@ -1113,7 +1116,7 @@ void Cluster::write_device_memory(
const void* mem_ptr,
uint32_t size_in_bytes,
tt_cxy_pair target,
std::uint32_t address,
uint64_t address,
const std::string& fallback_tlb) {
PCIDevice* dev = get_pci_device(target.chip);
const uint8_t* buffer_addr = static_cast<const uint8_t*>(mem_ptr);
Expand Down Expand Up @@ -1164,13 +1167,7 @@ void Cluster::write_device_memory(
}

void Cluster::read_device_memory(
void* mem_ptr,
tt_cxy_pair target,
std::uint32_t address,
std::uint32_t size_in_bytes,
const std::string& fallback_tlb) {
// Assume that mem_ptr has been allocated adequate memory on host when this function is called. Otherwise, this
// function will cause a segfault.
void* mem_ptr, tt_cxy_pair target, uint64_t address, uint32_t size_in_bytes, const std::string& fallback_tlb) {
log_debug(
LogSiliconDriver,
"Cluster::read_device_memory to chip:{} {}-{} at 0x{:x} size_in_bytes: {}",
Expand Down Expand Up @@ -1379,7 +1376,7 @@ std::optional<std::tuple<uint32_t, uint32_t>> Cluster::get_tlb_data_from_target(
}

void Cluster::configure_tlb(
chip_id_t logical_device_id, tt_xy_pair core, std::int32_t tlb_index, std::int32_t address, uint64_t ordering) {
chip_id_t logical_device_id, tt_xy_pair core, int32_t tlb_index, uint64_t address, uint64_t ordering) {
log_assert(
ordering == TLB_DATA::Strict || ordering == TLB_DATA::Posted || ordering == TLB_DATA::Relaxed,
"Invalid ordering specified in Cluster::configure_tlb");
Expand Down
20 changes: 8 additions & 12 deletions device/cluster.h
Original file line number Diff line number Diff line change
Expand Up @@ -278,8 +278,8 @@ class tt_device {
virtual void configure_tlb(
chip_id_t logical_device_id,
tt_xy_pair core,
std::int32_t tlb_index,
std::int32_t address,
int32_t tlb_index,
uint64_t address,
uint64_t ordering = TLB_DATA::Relaxed) {
throw std::runtime_error("---- tt_device::configure_tlb is not implemented\n");
}
Expand Down Expand Up @@ -721,8 +721,8 @@ class Cluster : public tt_device {
virtual void configure_tlb(
chip_id_t logical_device_id,
tt_xy_pair core,
std::int32_t tlb_index,
std::int32_t address,
int32_t tlb_index,
uint64_t address,
uint64_t ordering = TLB_DATA::Posted);
virtual void set_fallback_tlb_ordering_mode(const std::string& fallback_tlb, uint64_t ordering = TLB_DATA::Posted);
virtual void setup_core_to_tlb_map(
Expand Down Expand Up @@ -877,7 +877,7 @@ class Cluster : public tt_device {
const void* mem_ptr,
uint32_t size_in_bytes,
tt_cxy_pair target,
std::uint32_t address,
uint64_t address,
const std::string& fallback_tlb);
void write_to_non_mmio_device(
const void* mem_ptr,
Expand All @@ -887,11 +887,7 @@ class Cluster : public tt_device {
bool broadcast = false,
std::vector<int> broadcast_header = {});
void read_device_memory(
void* mem_ptr,
tt_cxy_pair target,
std::uint32_t address,
std::uint32_t size_in_bytes,
const std::string& fallback_tlb);
void* mem_ptr, tt_cxy_pair target, uint64_t address, uint32_t size_in_bytes, const std::string& fallback_tlb);
void read_from_non_mmio_device(void* mem_ptr, tt_cxy_pair core, uint64_t address, uint32_t size_in_bytes);
void read_mmio_device_register(
void* mem_ptr, tt_cxy_pair core, uint64_t addr, uint32_t size, const std::string& fallback_tlb);
Expand Down Expand Up @@ -948,7 +944,7 @@ class Cluster : public tt_device {
uint32_t* return_3 = nullptr,
uint32_t* return_4 = nullptr);
bool address_in_tlb_space(
uint32_t address, uint32_t size_in_bytes, int32_t tlb_index, uint64_t tlb_size, uint32_t chip);
uint64_t address, uint32_t size_in_bytes, int32_t tlb_index, uint64_t tlb_size, uint32_t chip);
std::shared_ptr<boost::interprocess::named_mutex> get_mutex(const std::string& tlb_name, int pci_interface_id);
virtual uint32_t get_harvested_noc_rows_for_chip(
int logical_device_id); // Returns one-hot encoded harvesting mask for PCIe mapped chips
Expand Down Expand Up @@ -1012,7 +1008,7 @@ class Cluster : public tt_device {
std::unordered_map<chip_id_t, std::unordered_set<tt_xy_pair>> workers_per_chip = {};
std::unordered_set<tt_xy_pair> eth_cores = {};
std::unordered_set<tt_xy_pair> dram_cores = {};
std::map<chip_id_t, std::unordered_map<std::int32_t, std::int32_t>> tlb_config_map = {};
std::map<chip_id_t, std::unordered_map<int32_t, uint64_t>> tlb_config_map = {};
std::set<chip_id_t> all_target_mmio_devices;

// Note that these maps holds only entries for local PCIe chips.
Expand Down
66 changes: 33 additions & 33 deletions src/firmware/riscv/blackhole/eth_l1_address_map.h
Original file line number Diff line number Diff line change
Expand Up @@ -9,35 +9,35 @@ namespace eth_l1_mem {


struct address_map {

// Sizes
static constexpr std::int32_t FIRMWARE_SIZE = 32 * 1024;
static constexpr std::int32_t ERISC_BARRIER_SIZE = 0x20; // 32 bytes reserved for Barrier
static constexpr std::int32_t COMMAND_Q_SIZE = 4 * 1024;
static constexpr std::int32_t DATA_BUFFER_SIZE_HOST = 4 * 1024;
static constexpr std::int32_t DATA_BUFFER_SIZE_ETH = 4 * 1024;
static constexpr std::int32_t DATA_BUFFER_SIZE_NOC = 16 * 1024;
static constexpr std::int32_t DATA_BUFFER_SIZE = 24 * 1024;
static constexpr std::int32_t EPOCH_RUNTIME_CONFIG_SIZE = 128; //
static constexpr std::int32_t OVERLAY_BLOB_SIZE = (32 * 1024) - EPOCH_RUNTIME_CONFIG_SIZE;
static constexpr std::int32_t OVERLAY_MAX_EXTRA_BLOB_SIZE = (0 * 1024);
static constexpr std::int32_t TILE_HEADER_BUF_SIZE = 32 * 1024; //
static constexpr std::int32_t NCRISC_L1_EPOCH_Q_SIZE = 32;
static constexpr std::int32_t FW_L1_BLOCK_SIZE = OVERLAY_BLOB_SIZE + EPOCH_RUNTIME_CONFIG_SIZE + TILE_HEADER_BUF_SIZE;
static constexpr std::int32_t FW_DRAM_BLOCK_SIZE = OVERLAY_BLOB_SIZE + OVERLAY_MAX_EXTRA_BLOB_SIZE + EPOCH_RUNTIME_CONFIG_SIZE + TILE_HEADER_BUF_SIZE;
static constexpr uint32_t FIRMWARE_SIZE = 32 * 1024;
static constexpr uint32_t ERISC_BARRIER_SIZE = 0x20; // 32 bytes reserved for Barrier
static constexpr uint32_t COMMAND_Q_SIZE = 4 * 1024;
static constexpr uint32_t DATA_BUFFER_SIZE_HOST = 4 * 1024;
static constexpr uint32_t DATA_BUFFER_SIZE_ETH = 4 * 1024;
static constexpr uint32_t DATA_BUFFER_SIZE_NOC = 16 * 1024;
static constexpr uint32_t DATA_BUFFER_SIZE = 24 * 1024;
static constexpr uint32_t EPOCH_RUNTIME_CONFIG_SIZE = 128; //
static constexpr uint32_t OVERLAY_BLOB_SIZE = (32 * 1024) - EPOCH_RUNTIME_CONFIG_SIZE;
static constexpr uint32_t OVERLAY_MAX_EXTRA_BLOB_SIZE = (0 * 1024);
static constexpr uint32_t TILE_HEADER_BUF_SIZE = 32 * 1024; //
static constexpr uint32_t NCRISC_L1_EPOCH_Q_SIZE = 32;
static constexpr uint32_t FW_L1_BLOCK_SIZE = OVERLAY_BLOB_SIZE + EPOCH_RUNTIME_CONFIG_SIZE + TILE_HEADER_BUF_SIZE;
static constexpr uint32_t FW_DRAM_BLOCK_SIZE = OVERLAY_BLOB_SIZE + OVERLAY_MAX_EXTRA_BLOB_SIZE + EPOCH_RUNTIME_CONFIG_SIZE + TILE_HEADER_BUF_SIZE;
// Base addresses
static constexpr std::int32_t FIRMWARE_BASE = 0x9040;
static constexpr std::int32_t ERISC_BARRIER_BASE = 0x11FE0;
static constexpr std::int32_t L1_EPOCH_Q_BASE = 0x9000; // Epoch Q start in L1.
static constexpr std::int32_t L1_DRAM_POLLING_CTRL_BASE = 0x9020;
static constexpr std::int32_t COMMAND_Q_BASE = L1_EPOCH_Q_BASE + FIRMWARE_SIZE;
static constexpr std::int32_t DATA_BUFFER_BASE = COMMAND_Q_BASE + COMMAND_Q_SIZE;
static constexpr std::int32_t TILE_HEADER_BUFFER_BASE = DATA_BUFFER_BASE + DATA_BUFFER_SIZE;
static constexpr std::int32_t EPOCH_RUNTIME_CONFIG_BASE = TILE_HEADER_BUFFER_BASE + TILE_HEADER_BUF_SIZE;
static constexpr std::int32_t OVERLAY_BLOB_BASE = EPOCH_RUNTIME_CONFIG_BASE + EPOCH_RUNTIME_CONFIG_SIZE;
static constexpr std::int32_t DATA_BUFFER_SPACE_BASE = OVERLAY_BLOB_BASE + OVERLAY_BLOB_SIZE;

static std::int32_t OVERLAY_FULL_BLOB_SIZE() {
static constexpr uint32_t FIRMWARE_BASE = 0x9040;
static constexpr uint32_t ERISC_BARRIER_BASE = 0x11FE0;
static constexpr uint32_t L1_EPOCH_Q_BASE = 0x9000; // Epoch Q start in L1.
static constexpr uint32_t L1_DRAM_POLLING_CTRL_BASE = 0x9020;
static constexpr uint32_t COMMAND_Q_BASE = L1_EPOCH_Q_BASE + FIRMWARE_SIZE;
static constexpr uint32_t DATA_BUFFER_BASE = COMMAND_Q_BASE + COMMAND_Q_SIZE;
static constexpr uint32_t TILE_HEADER_BUFFER_BASE = DATA_BUFFER_BASE + DATA_BUFFER_SIZE;
static constexpr uint32_t EPOCH_RUNTIME_CONFIG_BASE = TILE_HEADER_BUFFER_BASE + TILE_HEADER_BUF_SIZE;
static constexpr uint32_t OVERLAY_BLOB_BASE = EPOCH_RUNTIME_CONFIG_BASE + EPOCH_RUNTIME_CONFIG_SIZE;
static constexpr uint32_t DATA_BUFFER_SPACE_BASE = OVERLAY_BLOB_BASE + OVERLAY_BLOB_SIZE;

static uint32_t OVERLAY_FULL_BLOB_SIZE() {
return OVERLAY_BLOB_SIZE + OVERLAY_MAX_EXTRA_BLOB_SIZE;
};

Expand All @@ -48,15 +48,15 @@ template<std::size_t A, std::size_t B> struct TAssertEquality {

static constexpr bool _DATA_BUFFER_SPACE_BASE_CORRECT = TAssertEquality<DATA_BUFFER_SPACE_BASE, 0x28000>::_cResult;

static constexpr std::int32_t MAX_SIZE = 256*1024;
static constexpr std::int32_t MAX_L1_LOADING_SIZE = 1 * 256 * 1024;
static constexpr std::int32_t RISC_LOCAL_MEM_BASE = 0xffb00000; // Actaul local memory address as seen from risc firmware
static constexpr uint32_t MAX_SIZE = 256*1024;
static constexpr uint32_t MAX_L1_LOADING_SIZE = 1 * 256 * 1024;

static constexpr uint32_t RISC_LOCAL_MEM_BASE = 0xffb00000; // Actaul local memory address as seen from risc firmware
// As part of the init risc firmware will copy local memory data from
// l1 locations listed above into internal local memory that starts
// l1 locations listed above into internal local memory that starts
// at RISC_LOCAL_MEM_BASE address

static constexpr std::uint32_t FW_VERSION_ADDR = 0x210;
static constexpr uint32_t FW_VERSION_ADDR = 0x210;
};
} // namespace llk

Loading

0 comments on commit 766cc54

Please sign in to comment.