Skip to content

Commit

Permalink
Sysmem/device read and write API cleanup (#69)
Browse files Browse the repository at this point in the history
  • Loading branch information
pjanevskiTT authored Sep 30, 2024
1 parent 5add3dd commit 46c78d4
Show file tree
Hide file tree
Showing 13 changed files with 192 additions and 217 deletions.
46 changes: 1 addition & 45 deletions device/tt_device.h
Original file line number Diff line number Diff line change
Expand Up @@ -366,22 +366,11 @@ class tt_device
// Only implement this for Silicon Backend
throw std::runtime_error("---- tt_device::write_to_device is not implemented\n");
}

virtual void broadcast_write_to_cluster(const void *mem_ptr, uint32_t size_in_bytes, uint64_t address, const std::set<chip_id_t>& chips_to_exclude, std::set<uint32_t>& rows_to_exclude, std::set<uint32_t>& columns_to_exclude, const std::string& fallback_tlb) {
throw std::runtime_error("---- tt_device::broadcast_write_to_cluster is not implemented\n");
}

/**
* Write uint32_t vector to specified device, core and address (defined for Silicon).
*
* @param vec Data to write.
* @param core Chip and core being targeted.
* @param addr Address to write to.
* @param tlb_to_use Specifies fallback/dynamic TLB to use.
*/
virtual void write_to_device(std::vector<uint32_t> &vec, tt_cxy_pair core, uint64_t addr, const std::string& tlb_to_use) {
throw std::runtime_error("---- tt_device::write_to_device is not implemented\n");
}

/**
* Read uint32_t data from a specified device, core and address to host memory (defined for Silicon).
*
Expand All @@ -396,19 +385,6 @@ class tt_device
throw std::runtime_error("---- tt_device::read_from_device is not implemented\n");
}

/**
* Read a uint32_t vector from a specified device, core and address to host memory (defined for Silicon).
*
* @param vec Vector to fill with data.
* @param core Chip and core to target.
* @param addr Address to read from.
* @param size Number of bytes to read.
* @param fallback_tlb Specifies fallback/dynamic TLB to use.
*/
virtual void read_from_device(std::vector<uint32_t> &vec, tt_cxy_pair core, uint64_t addr, uint32_t size, const std::string& tlb_to_use) {
throw std::runtime_error("---- tt_device::read_from_device is not implemented\n");
}

/**
* Write uint32_t vector to specified address and channel on host (defined for Silicon).
*
Expand All @@ -417,25 +393,9 @@ class tt_device
* @param channel Host channel to target.
* @param src_device_id Chip to target.
*/
virtual void write_to_sysmem(std::vector<uint32_t>& vec, uint64_t addr, uint16_t channel, chip_id_t src_device_id) {
throw std::runtime_error("---- tt_device::write_to_sysmem is not implemented\n");
}

virtual void write_to_sysmem(const void* mem_ptr, std::uint32_t size, uint64_t addr, uint16_t channel, chip_id_t src_device_id) {
throw std::runtime_error("---- tt_device::write_to_sysmem is not implemented\n");
}
/**
* Read uint32_t vector from specified address and channel on host (defined for Silicon).
*
* @param vec Vector to fill with data.
* @param addr Address to read from.
* @param channel Host channel to read data from.
* @param size Number of bytes to read.
* @param src_device_id Chip being targeted.
*/
virtual void read_from_sysmem(std::vector<uint32_t> &vec, uint64_t addr, uint16_t channel, uint32_t size, chip_id_t src_device_id) {
throw std::runtime_error("---- tt_device::read_from_sysmem is not implemented\n");
}
virtual void read_from_sysmem(void* mem_ptr, uint64_t addr, uint16_t channel, uint32_t size, chip_id_t src_device_id) {
throw std::runtime_error("---- tt_device::read_from_sysmem is not implemented\n");
}
Expand Down Expand Up @@ -670,14 +630,10 @@ class tt_SiliconDevice: public tt_device

// Runtime Functions
virtual void write_to_device(const void *mem_ptr, uint32_t size_in_bytes, tt_cxy_pair core, uint64_t addr, const std::string& tlb_to_use);
virtual void write_to_device(std::vector<uint32_t> &vec, tt_cxy_pair core, uint64_t addr, const std::string& tlb_to_use);
void broadcast_write_to_cluster(const void *mem_ptr, uint32_t size_in_bytes, uint64_t address, const std::set<chip_id_t>& chips_to_exclude, std::set<uint32_t>& rows_to_exclude, std::set<uint32_t>& columns_to_exclude, const std::string& fallback_tlb);

virtual void read_from_device(void* mem_ptr, tt_cxy_pair core, uint64_t addr, uint32_t size, const std::string& fallback_tlb);
virtual void read_from_device(std::vector<uint32_t> &vec, tt_cxy_pair core, uint64_t addr, uint32_t size, const std::string& tlb_to_use);
virtual void write_to_sysmem(std::vector<uint32_t>& vec, uint64_t addr, uint16_t channel, chip_id_t src_device_id);
virtual void write_to_sysmem(const void* mem_ptr, std::uint32_t size, uint64_t addr, uint16_t channel, chip_id_t src_device_id);
virtual void read_from_sysmem(std::vector<uint32_t> &vec, uint64_t addr, uint16_t channel, uint32_t size, chip_id_t src_device_id);
virtual void read_from_sysmem(void* mem_ptr, uint64_t addr, uint16_t channel, uint32_t size, chip_id_t src_device_id);
virtual void wait_for_non_mmio_flush();
void l1_membar(const chip_id_t chip, const std::string& fallback_tlb, const std::unordered_set<tt_xy_pair>& cores = {});
Expand Down
37 changes: 10 additions & 27 deletions device/tt_silicon_driver.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2023,7 +2023,7 @@ void tt_SiliconDevice::write_to_non_mmio_device(
write_to_sysmem(broadcast_header.data(), broadcast_header.size() * sizeof(uint32_t), host_dram_block_addr, host_dram_channel, mmio_capable_chip_logical);
}
// Write payload to sysmem
write_to_sysmem(data_block, host_dram_block_addr + BROADCAST_HEADER_SIZE * broadcast, host_dram_channel, mmio_capable_chip_logical);
write_to_sysmem(data_block.data(), data_block.size() * DATA_WORD_SIZE, host_dram_block_addr + BROADCAST_HEADER_SIZE * broadcast, host_dram_channel, mmio_capable_chip_logical);

} else {
uint32_t buf_address = eth_interface_params.eth_routing_data_buffer_addr + req_wr_ptr * max_block_size;
Expand Down Expand Up @@ -2243,7 +2243,8 @@ void tt_SiliconDevice::read_from_non_mmio_device(void* mem_ptr, tt_cxy_pair core
} else {
// Read 4 byte aligned block from device/sysmem
if (use_dram) {
read_from_sysmem(data_block, host_dram_block_addr, host_dram_channel, block_size, mmio_capable_chip_logical);
size_buffer_to_capacity(data_block, block_size);
read_from_sysmem(data_block.data(), host_dram_block_addr, host_dram_channel, block_size, mmio_capable_chip_logical);
} else {
uint32_t buf_address = eth_interface_params.eth_routing_data_buffer_addr + resp_rd_ptr * max_block_size;
size_buffer_to_capacity(data_block, block_size);
Expand Down Expand Up @@ -2659,32 +2660,25 @@ int tt_SiliconDevice::remote_arc_msg(int chip, uint32_t msg_code, bool wait_for_
void tt_SiliconDevice::write_to_sysmem(const void* mem_ptr, std::uint32_t size, uint64_t addr, uint16_t channel, chip_id_t src_device_id) {
write_buffer(mem_ptr, size, addr, channel, src_device_id);
}
void tt_SiliconDevice::write_to_sysmem(std::vector<uint32_t>& vec, uint64_t addr, uint16_t channel, chip_id_t src_device_id) {
write_buffer(vec.data(), vec.size() * sizeof(uint32_t), addr, channel, src_device_id);
}

void tt_SiliconDevice::read_from_sysmem(void* mem_ptr, uint64_t addr, uint16_t channel, uint32_t size, chip_id_t src_device_id) {
read_buffer(mem_ptr, addr, channel, size, src_device_id);
}
void tt_SiliconDevice::read_from_sysmem(std::vector<uint32_t> &vec, uint64_t addr, uint16_t channel, uint32_t size, chip_id_t src_device_id) {
size_buffer_to_capacity(vec, size);
read_buffer(vec.data(), addr, channel, size, src_device_id);
}

void tt_SiliconDevice::set_membar_flag(const chip_id_t chip, const std::unordered_set<tt_xy_pair>& cores, const uint32_t barrier_value, const uint32_t barrier_addr, const std::string& fallback_tlb) {
tt_driver_atomics::sfence(); // Ensure that writes before this do not get reordered
std::unordered_set<tt_xy_pair> cores_synced = {};
std::vector<uint32_t> barrier_val_vec = {barrier_value};
for (const auto& core : cores) {
write_to_device(barrier_val_vec, tt_cxy_pair(chip, core), barrier_addr, fallback_tlb);
write_to_device(barrier_val_vec.data(), barrier_val_vec.size() * sizeof(uint32_t), tt_cxy_pair(chip, core), barrier_addr, fallback_tlb);
}
tt_driver_atomics::sfence(); // Ensure that all writes in the Host WC buffer are flushed
while (cores_synced.size() != cores.size()) {
for(const auto& core : cores) {
if (cores_synced.find(core) == cores_synced.end()) {
std::vector<uint32_t> readback_vec = {};
read_from_device(readback_vec, tt_cxy_pair(chip, core), barrier_addr, sizeof(std::uint32_t), fallback_tlb);
if (readback_vec.at(0) == barrier_value) {
uint32_t readback_val;
read_from_device(&readback_val, tt_cxy_pair(chip, core), barrier_addr, sizeof(std::uint32_t), fallback_tlb);
if (readback_val == barrier_value) {
cores_synced.insert(core);
}
else {
Expand Down Expand Up @@ -2797,11 +2791,6 @@ void tt_SiliconDevice::write_to_device(const void *mem_ptr, uint32_t size, tt_cx
}
}

void tt_SiliconDevice::write_to_device(std::vector<uint32_t> &vec, tt_cxy_pair core, uint64_t addr, const std::string& fallback_tlb) {
// Overloaded device writer that accepts a vector
write_to_device(vec.data(), vec.size() * sizeof(uint32_t), core, addr, fallback_tlb);
}

void tt_SiliconDevice::read_mmio_device_register(void* mem_ptr, tt_cxy_pair core, uint64_t addr, uint32_t size, const std::string& fallback_tlb) {
PCIDevice *pci_device = get_pci_device(core.chip);

Expand Down Expand Up @@ -2854,12 +2843,6 @@ void tt_SiliconDevice::read_from_device(void* mem_ptr, tt_cxy_pair core, uint64_
}
}

void tt_SiliconDevice::read_from_device(std::vector<uint32_t> &vec, tt_cxy_pair core, uint64_t addr, uint32_t size, const std::string& fallback_tlb) {
size_buffer_to_capacity(vec, size);
read_from_device(vec.data(), core, addr, size, fallback_tlb);
}


int tt_SiliconDevice::arc_msg(int logical_device_id, uint32_t msg_code, bool wait_for_done, uint32_t arg0, uint32_t arg1, int timeout, uint32_t *return_3, uint32_t *return_4) {
log_assert(arch_name != tt::ARCH::BLACKHOLE, "ARC messages not supported in Blackhole");
if(ndesc -> is_chip_mmio_capable(logical_device_id)) {
Expand Down Expand Up @@ -3003,11 +2986,11 @@ void tt_SiliconDevice::deassert_resets_and_set_power_state() {

void tt_SiliconDevice::verify_eth_fw() {
for(const auto& chip : target_devices_in_cluster) {
std::vector<uint32_t> mem_vector;
uint32_t fw_version;
std::vector<uint32_t> fw_versions;
for (tt_xy_pair &eth_core : get_soc_descriptor(chip).ethernet_cores) {
read_from_device(mem_vector, tt_cxy_pair(chip, eth_core), l1_address_params.fw_version_addr, sizeof(uint32_t), "LARGE_READ_TLB");
fw_versions.push_back(mem_vector.at(0));
read_from_device(&fw_version, tt_cxy_pair(chip, eth_core), l1_address_params.fw_version_addr, sizeof(uint32_t), "LARGE_READ_TLB");
fw_versions.push_back(fw_version);
}
verify_sw_fw_versions(chip, SW_VERSION, fw_versions);
eth_fw_version = tt_version(fw_versions.at(0));
Expand Down
Loading

0 comments on commit 46c78d4

Please sign in to comment.