From 32103af59c56a8f4972692cc3af9ea9768d3b94e Mon Sep 17 00:00:00 2001 From: pjanevski Date: Mon, 23 Sep 2024 09:24:47 +0000 Subject: [PATCH 1/3] Remove rw device/sysmem API for vector --- device/tt_device.h | 46 +---------- device/tt_silicon_driver.cpp | 36 +++------ tests/blackhole/test_silicon_driver_bh.cpp | 79 ++++++++++--------- tests/emulation/test_emulation_device.cpp | 8 +- tests/galaxy/test_galaxy_common.cpp | 16 ++-- tests/galaxy/test_umd_concurrent_threads.cpp | 36 +++++---- tests/galaxy/test_umd_remote_api.cpp | 15 ++-- .../galaxy/test_umd_remote_api_stability.cpp | 1 - tests/grayskull/test_silicon_driver.cpp | 53 +++++++------ tests/microbenchmark/test_rw_tensix.cpp | 13 +-- tests/simulation/test_simulation_device.cpp | 9 ++- tests/test_utils/device_test_utils.hpp | 25 ++++++ tests/wormhole/test_silicon_driver_wh.cpp | 72 ++++++++--------- 13 files changed, 192 insertions(+), 217 deletions(-) create mode 100644 tests/test_utils/device_test_utils.hpp diff --git a/device/tt_device.h b/device/tt_device.h index b700adf1..f8098d4d 100644 --- a/device/tt_device.h +++ b/device/tt_device.h @@ -366,22 +366,11 @@ class tt_device // Only implement this for Silicon Backend throw std::runtime_error("---- tt_device::write_to_device is not implemented\n"); } + virtual void broadcast_write_to_cluster(const void *mem_ptr, uint32_t size_in_bytes, uint64_t address, const std::set& chips_to_exclude, std::set& rows_to_exclude, std::set& columns_to_exclude, const std::string& fallback_tlb) { throw std::runtime_error("---- tt_device::broadcast_write_to_cluster is not implemented\n"); } - /** - * Write uint32_t vector to specified device, core and address (defined for Silicon). - * - * @param vec Data to write. - * @param core Chip and core being targeted. - * @param addr Address to write to. - * @param tlb_to_use Specifies fallback/dynamic TLB to use. - */ - virtual void write_to_device(std::vector &vec, tt_cxy_pair core, uint64_t addr, const std::string& tlb_to_use) { - throw std::runtime_error("---- tt_device::write_to_device is not implemented\n"); - } - /** * Read uint32_t data from a specified device, core and address to host memory (defined for Silicon). * @@ -396,19 +385,6 @@ class tt_device throw std::runtime_error("---- tt_device::read_from_device is not implemented\n"); } - /** - * Read a uint32_t vector from a specified device, core and address to host memory (defined for Silicon). - * - * @param vec Vector to fill with data. - * @param core Chip and core to target. - * @param addr Address to read from. - * @param size Number of bytes to read. - * @param fallback_tlb Specifies fallback/dynamic TLB to use. - */ - virtual void read_from_device(std::vector &vec, tt_cxy_pair core, uint64_t addr, uint32_t size, const std::string& tlb_to_use) { - throw std::runtime_error("---- tt_device::read_from_device is not implemented\n"); - } - /** * Write uint32_t vector to specified address and channel on host (defined for Silicon). * @@ -417,25 +393,9 @@ class tt_device * @param channel Host channel to target. * @param src_device_id Chip to target. */ - virtual void write_to_sysmem(std::vector& vec, uint64_t addr, uint16_t channel, chip_id_t src_device_id) { - throw std::runtime_error("---- tt_device::write_to_sysmem is not implemented\n"); - } - virtual void write_to_sysmem(const void* mem_ptr, std::uint32_t size, uint64_t addr, uint16_t channel, chip_id_t src_device_id) { throw std::runtime_error("---- tt_device::write_to_sysmem is not implemented\n"); } - /** - * Read uint32_t vector from specified address and channel on host (defined for Silicon). - * - * @param vec Vector to fill with data. - * @param addr Address to read from. - * @param channel Host channel to read data from. - * @param size Number of bytes to read. - * @param src_device_id Chip being targeted. - */ - virtual void read_from_sysmem(std::vector &vec, uint64_t addr, uint16_t channel, uint32_t size, chip_id_t src_device_id) { - throw std::runtime_error("---- tt_device::read_from_sysmem is not implemented\n"); - } virtual void read_from_sysmem(void* mem_ptr, uint64_t addr, uint16_t channel, uint32_t size, chip_id_t src_device_id) { throw std::runtime_error("---- tt_device::read_from_sysmem is not implemented\n"); } @@ -670,14 +630,10 @@ class tt_SiliconDevice: public tt_device // Runtime Functions virtual void write_to_device(const void *mem_ptr, uint32_t size_in_bytes, tt_cxy_pair core, uint64_t addr, const std::string& tlb_to_use); - virtual void write_to_device(std::vector &vec, tt_cxy_pair core, uint64_t addr, const std::string& tlb_to_use); void broadcast_write_to_cluster(const void *mem_ptr, uint32_t size_in_bytes, uint64_t address, const std::set& chips_to_exclude, std::set& rows_to_exclude, std::set& columns_to_exclude, const std::string& fallback_tlb); virtual void read_from_device(void* mem_ptr, tt_cxy_pair core, uint64_t addr, uint32_t size, const std::string& fallback_tlb); - virtual void read_from_device(std::vector &vec, tt_cxy_pair core, uint64_t addr, uint32_t size, const std::string& tlb_to_use); - virtual void write_to_sysmem(std::vector& vec, uint64_t addr, uint16_t channel, chip_id_t src_device_id); virtual void write_to_sysmem(const void* mem_ptr, std::uint32_t size, uint64_t addr, uint16_t channel, chip_id_t src_device_id); - virtual void read_from_sysmem(std::vector &vec, uint64_t addr, uint16_t channel, uint32_t size, chip_id_t src_device_id); virtual void read_from_sysmem(void* mem_ptr, uint64_t addr, uint16_t channel, uint32_t size, chip_id_t src_device_id); virtual void wait_for_non_mmio_flush(); void l1_membar(const chip_id_t chip, const std::string& fallback_tlb, const std::unordered_set& cores = {}); diff --git a/device/tt_silicon_driver.cpp b/device/tt_silicon_driver.cpp index 36558dab..851a2d9c 100644 --- a/device/tt_silicon_driver.cpp +++ b/device/tt_silicon_driver.cpp @@ -2023,7 +2023,7 @@ void tt_SiliconDevice::write_to_non_mmio_device( write_to_sysmem(broadcast_header.data(), broadcast_header.size() * sizeof(uint32_t), host_dram_block_addr, host_dram_channel, mmio_capable_chip_logical); } // Write payload to sysmem - write_to_sysmem(data_block, host_dram_block_addr + BROADCAST_HEADER_SIZE * broadcast, host_dram_channel, mmio_capable_chip_logical); + write_to_sysmem(data_block.data(), data_block.size() * DATA_WORD_SIZE, host_dram_block_addr + BROADCAST_HEADER_SIZE * broadcast, host_dram_channel, mmio_capable_chip_logical); } else { uint32_t buf_address = eth_interface_params.eth_routing_data_buffer_addr + req_wr_ptr * max_block_size; @@ -2243,7 +2243,7 @@ void tt_SiliconDevice::read_from_non_mmio_device(void* mem_ptr, tt_cxy_pair core } else { // Read 4 byte aligned block from device/sysmem if (use_dram) { - read_from_sysmem(data_block, host_dram_block_addr, host_dram_channel, block_size, mmio_capable_chip_logical); + read_from_sysmem(data_block.data(), host_dram_block_addr, host_dram_channel, block_size, mmio_capable_chip_logical); } else { uint32_t buf_address = eth_interface_params.eth_routing_data_buffer_addr + resp_rd_ptr * max_block_size; size_buffer_to_capacity(data_block, block_size); @@ -2659,32 +2659,25 @@ int tt_SiliconDevice::remote_arc_msg(int chip, uint32_t msg_code, bool wait_for_ void tt_SiliconDevice::write_to_sysmem(const void* mem_ptr, std::uint32_t size, uint64_t addr, uint16_t channel, chip_id_t src_device_id) { write_buffer(mem_ptr, size, addr, channel, src_device_id); } -void tt_SiliconDevice::write_to_sysmem(std::vector& vec, uint64_t addr, uint16_t channel, chip_id_t src_device_id) { - write_buffer(vec.data(), vec.size() * sizeof(uint32_t), addr, channel, src_device_id); -} void tt_SiliconDevice::read_from_sysmem(void* mem_ptr, uint64_t addr, uint16_t channel, uint32_t size, chip_id_t src_device_id) { read_buffer(mem_ptr, addr, channel, size, src_device_id); } -void tt_SiliconDevice::read_from_sysmem(std::vector &vec, uint64_t addr, uint16_t channel, uint32_t size, chip_id_t src_device_id) { - size_buffer_to_capacity(vec, size); - read_buffer(vec.data(), addr, channel, size, src_device_id); -} void tt_SiliconDevice::set_membar_flag(const chip_id_t chip, const std::unordered_set& cores, const uint32_t barrier_value, const uint32_t barrier_addr, const std::string& fallback_tlb) { tt_driver_atomics::sfence(); // Ensure that writes before this do not get reordered std::unordered_set cores_synced = {}; std::vector barrier_val_vec = {barrier_value}; for (const auto& core : cores) { - write_to_device(barrier_val_vec, tt_cxy_pair(chip, core), barrier_addr, fallback_tlb); + write_to_device(barrier_val_vec.data(), barrier_val_vec.size() * sizeof(uint32_t), tt_cxy_pair(chip, core), barrier_addr, fallback_tlb); } tt_driver_atomics::sfence(); // Ensure that all writes in the Host WC buffer are flushed while (cores_synced.size() != cores.size()) { for(const auto& core : cores) { if (cores_synced.find(core) == cores_synced.end()) { - std::vector readback_vec = {}; - read_from_device(readback_vec, tt_cxy_pair(chip, core), barrier_addr, sizeof(std::uint32_t), fallback_tlb); - if (readback_vec.at(0) == barrier_value) { + uint32_t readback_val; + read_from_device(&readback_val, tt_cxy_pair(chip, core), barrier_addr, sizeof(std::uint32_t), fallback_tlb); + if (readback_val == barrier_value) { cores_synced.insert(core); } else { @@ -2797,11 +2790,6 @@ void tt_SiliconDevice::write_to_device(const void *mem_ptr, uint32_t size, tt_cx } } -void tt_SiliconDevice::write_to_device(std::vector &vec, tt_cxy_pair core, uint64_t addr, const std::string& fallback_tlb) { - // Overloaded device writer that accepts a vector - write_to_device(vec.data(), vec.size() * sizeof(uint32_t), core, addr, fallback_tlb); -} - void tt_SiliconDevice::read_mmio_device_register(void* mem_ptr, tt_cxy_pair core, uint64_t addr, uint32_t size, const std::string& fallback_tlb) { PCIDevice *pci_device = get_pci_device(core.chip); @@ -2854,12 +2842,6 @@ void tt_SiliconDevice::read_from_device(void* mem_ptr, tt_cxy_pair core, uint64_ } } -void tt_SiliconDevice::read_from_device(std::vector &vec, tt_cxy_pair core, uint64_t addr, uint32_t size, const std::string& fallback_tlb) { - size_buffer_to_capacity(vec, size); - read_from_device(vec.data(), core, addr, size, fallback_tlb); -} - - int tt_SiliconDevice::arc_msg(int logical_device_id, uint32_t msg_code, bool wait_for_done, uint32_t arg0, uint32_t arg1, int timeout, uint32_t *return_3, uint32_t *return_4) { log_assert(arch_name != tt::ARCH::BLACKHOLE, "ARC messages not supported in Blackhole"); if(ndesc -> is_chip_mmio_capable(logical_device_id)) { @@ -3003,11 +2985,11 @@ void tt_SiliconDevice::deassert_resets_and_set_power_state() { void tt_SiliconDevice::verify_eth_fw() { for(const auto& chip : target_devices_in_cluster) { - std::vector mem_vector; + uint32_t fw_version; std::vector fw_versions; for (tt_xy_pair ð_core : get_soc_descriptor(chip).ethernet_cores) { - read_from_device(mem_vector, tt_cxy_pair(chip, eth_core), l1_address_params.fw_version_addr, sizeof(uint32_t), "LARGE_READ_TLB"); - fw_versions.push_back(mem_vector.at(0)); + read_from_device(&fw_version, tt_cxy_pair(chip, eth_core), l1_address_params.fw_version_addr, sizeof(uint32_t), "LARGE_READ_TLB"); + fw_versions.push_back(fw_version); } verify_sw_fw_versions(chip, SW_VERSION, fw_versions); eth_fw_version = tt_version(fw_versions.at(0)); diff --git a/tests/blackhole/test_silicon_driver_bh.cpp b/tests/blackhole/test_silicon_driver_bh.cpp index 23816841..daf9c526 100644 --- a/tests/blackhole/test_silicon_driver_bh.cpp +++ b/tests/blackhole/test_silicon_driver_bh.cpp @@ -15,6 +15,7 @@ #include "device/blackhole/blackhole_implementation.h" #include "device/tt_cluster_descriptor.h" #include "tests/test_utils/generate_cluster_desc.hpp" +#include "tests/test_utils/device_test_utils.hpp" void set_params_for_remote_txn(tt_SiliconDevice& device) { // Populate address map and NOC parameters that the driver needs for remote transactions @@ -188,18 +189,18 @@ TEST(SiliconDriverBH, CreateDestroy) { // std::uint32_t dynamic_write_address = 0x40000000; // for(int loop = 0; loop < 100; loop++){ // Write to each core a 100 times at different statically mapped addresses // for(auto& core : device.get_virtual_soc_descriptors().at(i).workers) { -// device.write_to_device(vector_to_write, tt_cxy_pair(i, core), address, ""); -// device.write_to_device(vector_to_write, tt_cxy_pair(i, core), dynamic_write_address, "SMALL_READ_WRITE_TLB"); +// device.write_to_device(vector_to_write.data(), vector_to_write.size() * sizeof(std::uint32_t), tt_cxy_pair(i, core), address, ""); +// device.write_to_device(vector_to_write.data(), vector_to_write.size() * sizeof(std::uint32_t), tt_cxy_pair(i, core), dynamic_write_address, "SMALL_READ_WRITE_TLB"); // device.wait_for_non_mmio_flush(); // Barrier to ensure that all writes over ethernet were commited -// device.read_from_device(readback_vec, tt_cxy_pair(i, core), address, 40, ""); -// device.read_from_device(dynamic_readback_vec, tt_cxy_pair(i, core), dynamic_write_address, 40, "SMALL_READ_WRITE_TLB"); +// test_utils::read_data_from_device(device, readback_vec, tt_cxy_pair(i, core), address, 40, ""); +// test_utils::read_data_from_device(device, dynamic_readback_vec, tt_cxy_pair(i, core), dynamic_write_address, 40, "SMALL_READ_WRITE_TLB"); // ASSERT_EQ(vector_to_write, readback_vec) << "Vector read back from core " << core.x << "-" << core.y << "does not match what was written"; // ASSERT_EQ(vector_to_write, dynamic_readback_vec) << "Vector read back from core " << core.x << "-" << core.y << "does not match what was written"; // device.wait_for_non_mmio_flush(); -// device.write_to_device(zeros, tt_cxy_pair(i, core), dynamic_write_address, "SMALL_READ_WRITE_TLB"); // Clear any written data -// device.write_to_device(zeros, tt_cxy_pair(i, core), address, ""); // Clear any written data +// device.write_to_device(zeros.data(), zeros.size() * sizeof(std::uint32_t), tt_cxy_pair(i, core), dynamic_write_address, "SMALL_READ_WRITE_TLB"); // Clear any written data +// device.write_to_device(zeros.data(), zeros.size() * sizeof(std::uint32_t), tt_cxy_pair(i, core), address, ""); // Clear any written data // device.wait_for_non_mmio_flush(); // readback_vec = {}; // dynamic_readback_vec = {}; @@ -256,7 +257,7 @@ TEST(SiliconDriverBH, UnalignedStaticTLB_RW) { for(auto& core : device.get_virtual_soc_descriptors().at(i).workers) { device.write_to_device(write_vec.data(), size, tt_cxy_pair(i, core), address, ""); device.wait_for_non_mmio_flush(); - device.read_from_device(readback_vec.data(), tt_cxy_pair(i, core), address, size, ""); + test_utils::read_data_from_device(device, readback_vec, tt_cxy_pair(i, core), address, size, ""); ASSERT_EQ(readback_vec, write_vec); readback_vec = std::vector(size, 0); device.write_to_sysmem(write_vec.data(), size, 0, 0, 0); @@ -315,12 +316,12 @@ TEST(SiliconDriverBH, StaticTLB_RW) { std::uint32_t address = l1_mem::address_map::NCRISC_FIRMWARE_BASE; for(int loop = 0; loop < 1; loop++){ // Write to each core a 100 times at different statically mapped addresses for(auto& core : device.get_virtual_soc_descriptors().at(i).workers) { - device.write_to_device(vector_to_write, tt_cxy_pair(i, core), address, ""); + device.write_to_device(vector_to_write.data(), vector_to_write.size() * sizeof(std::uint32_t), tt_cxy_pair(i, core), address, ""); device.wait_for_non_mmio_flush(); // Barrier to ensure that all writes over ethernet were commited - device.read_from_device(readback_vec, tt_cxy_pair(i, core), address, 40, ""); + test_utils::read_data_from_device(device, readback_vec, tt_cxy_pair(i, core), address, 40, ""); ASSERT_EQ(vector_to_write, readback_vec) << "Vector read back from core " << core.x << "-" << core.y << "does not match what was written"; device.wait_for_non_mmio_flush(); - device.write_to_device(zeros, tt_cxy_pair(i, core), address, "SMALL_READ_WRITE_TLB"); // Clear any written data + device.write_to_device(zeros.data(), zeros.size() * sizeof(std::uint32_t), tt_cxy_pair(i, core), address, "SMALL_READ_WRITE_TLB"); // Clear any written data device.wait_for_non_mmio_flush(); readback_vec = {}; } @@ -354,12 +355,12 @@ TEST(SiliconDriverBH, DynamicTLB_RW) { std::uint32_t address = l1_mem::address_map::NCRISC_FIRMWARE_BASE; for(int loop = 0; loop < 100; loop++){ // Write to each core a 100 times at different statically mapped addresses for(auto& core : device.get_virtual_soc_descriptors().at(i).workers) { - device.write_to_device(vector_to_write, tt_cxy_pair(i, core), address, "DYNAMIC_TLB_BASE_INDEX"); + device.write_to_device(vector_to_write.data(), vector_to_write.size() * sizeof(std::uint32_t), tt_cxy_pair(i, core), address, "DYNAMIC_TLB_BASE_INDEX"); device.wait_for_non_mmio_flush(); // Barrier to ensure that all writes over ethernet were commited - device.read_from_device(readback_vec, tt_cxy_pair(i, core), address, 40, "DYNAMIC_TLB_BASE_INDEX"); + test_utils::read_data_from_device(device, readback_vec, tt_cxy_pair(i, core), address, 40, "DYNAMIC_TLB_BASE_INDEX"); ASSERT_EQ(vector_to_write, readback_vec) << "Vector read back from core " << core.x << "-" << core.y << "does not match what was written"; device.wait_for_non_mmio_flush(); - device.write_to_device(zeros, tt_cxy_pair(i, core), address, "DYNAMIC_TLB_BASE_INDEX"); + device.write_to_device(zeros.data(), zeros.size() * sizeof(std::uint32_t), tt_cxy_pair(i, core), address, "DYNAMIC_TLB_BASE_INDEX"); device.wait_for_non_mmio_flush(); readback_vec = {}; } @@ -377,12 +378,12 @@ TEST(SiliconDriverBH, DynamicTLB_RW) { for (int ch=0; ch chan = device.get_virtual_soc_descriptors().at(i).dram_cores.at(ch); tt_xy_pair subchan = chan.at(0); - device.write_to_device(vector_to_write, tt_cxy_pair(i, subchan), address, "DYNAMIC_TLB_BASE_INDEX"); + device.write_to_device(vector_to_write.data(), vector_to_write.size() * sizeof(std::uint32_t), tt_cxy_pair(i, subchan), address, "DYNAMIC_TLB_BASE_INDEX"); device.wait_for_non_mmio_flush(); // Barrier to ensure that all writes over ethernet were commited - device.read_from_device(readback_vec, tt_cxy_pair(i, subchan), address, 40, "DYNAMIC_TLB_BASE_INDEX"); + test_utils::read_data_from_device(device, readback_vec, tt_cxy_pair(i, subchan), address, 40, "DYNAMIC_TLB_BASE_INDEX"); ASSERT_EQ(vector_to_write, readback_vec) << "Vector read back from core " << subchan.x << "-" << subchan.y << "does not match what was written"; device.wait_for_non_mmio_flush(); - device.write_to_device(zeros, tt_cxy_pair(i, subchan), address, "DYNAMIC_TLB_BASE_INDEX"); + device.write_to_device(zeros.data(), zeros.size() * sizeof(std::uint32_t), tt_cxy_pair(i, subchan), address, "DYNAMIC_TLB_BASE_INDEX"); device.wait_for_non_mmio_flush(); readback_vec = {}; address += 0x20; // Increment by uint32_t size for each write @@ -417,8 +418,8 @@ TEST(SiliconDriverBH, MultiThreadedDevice) { std::uint32_t address = l1_mem::address_map::NCRISC_FIRMWARE_BASE; for(int loop = 0; loop < 100; loop++) { for(auto& core : device.get_virtual_soc_descriptors().at(0).workers) { - device.write_to_device(vector_to_write, tt_cxy_pair(0, core), address, "SMALL_READ_WRITE_TLB"); - device.read_from_device(readback_vec, tt_cxy_pair(0, core), address, 40, "SMALL_READ_WRITE_TLB"); + device.write_to_device(vector_to_write.data(), vector_to_write.size() * sizeof(std::uint32_t), tt_cxy_pair(0, core), address, "SMALL_READ_WRITE_TLB"); + test_utils::read_data_from_device(device, readback_vec, tt_cxy_pair(0, core), address, 40, "SMALL_READ_WRITE_TLB"); ASSERT_EQ(vector_to_write, readback_vec) << "Vector read back from core " << core.x << "-" << core.y << "does not match what was written"; readback_vec = {}; } @@ -433,8 +434,8 @@ TEST(SiliconDriverBH, MultiThreadedDevice) { for(auto& core_ls : device.get_virtual_soc_descriptors().at(0).dram_cores) { for(int loop = 0; loop < 100; loop++) { for(auto& core : core_ls) { - device.write_to_device(vector_to_write, tt_cxy_pair(0, core), address, "SMALL_READ_WRITE_TLB"); - device.read_from_device(readback_vec, tt_cxy_pair(0, core), address, 40, "SMALL_READ_WRITE_TLB"); + device.write_to_device(vector_to_write.data(), vector_to_write.size() * sizeof(std::uint32_t), tt_cxy_pair(0, core), address, "SMALL_READ_WRITE_TLB"); + test_utils::read_data_from_device(device, readback_vec, tt_cxy_pair(0, core), address, 40, "SMALL_READ_WRITE_TLB"); ASSERT_EQ(vector_to_write, readback_vec) << "Vector read back from core " << core.x << "-" << core.y << "does not match what was written"; readback_vec = {}; } @@ -482,20 +483,20 @@ TEST(SiliconDriverBH, MultiThreadedMemBar) { std::vector readback_membar_vec = {}; for(auto& core : device.get_virtual_soc_descriptors().at(0).workers) { - device.read_from_device(readback_membar_vec, tt_cxy_pair(0, core), l1_mem::address_map::L1_BARRIER_BASE, 4, "SMALL_READ_WRITE_TLB"); + test_utils::read_data_from_device(device, readback_membar_vec, tt_cxy_pair(0, core), l1_mem::address_map::L1_BARRIER_BASE, 4, "SMALL_READ_WRITE_TLB"); ASSERT_EQ(readback_membar_vec.at(0), 187); // Ensure that memory barriers were correctly initialized on all workers readback_membar_vec = {}; } for(int chan = 0; chan < device.get_virtual_soc_descriptors().at(0).get_num_dram_channels(); chan++) { auto core = device.get_virtual_soc_descriptors().at(0).get_core_for_dram_channel(chan, 0); - device.read_from_device(readback_membar_vec, tt_cxy_pair(0, core), 0, 4, "SMALL_READ_WRITE_TLB"); + test_utils::read_data_from_device(device, readback_membar_vec, tt_cxy_pair(0, core), 0, 4, "SMALL_READ_WRITE_TLB"); ASSERT_EQ(readback_membar_vec.at(0), 187); // Ensure that memory barriers were correctly initialized on all DRAM readback_membar_vec = {}; } for(auto& core : device.get_virtual_soc_descriptors().at(0).ethernet_cores) { - device.read_from_device(readback_membar_vec, tt_cxy_pair(0, core), eth_l1_mem::address_map::ERISC_BARRIER_BASE, 4, "SMALL_READ_WRITE_TLB"); + test_utils::read_data_from_device(device, readback_membar_vec, tt_cxy_pair(0, core), eth_l1_mem::address_map::ERISC_BARRIER_BASE, 4, "SMALL_READ_WRITE_TLB"); ASSERT_EQ(readback_membar_vec.at(0), 187); // Ensure that memory barriers were correctly initialized on all ethernet cores readback_membar_vec = {}; } @@ -517,11 +518,11 @@ TEST(SiliconDriverBH, MultiThreadedMemBar) { for(int loop = 0; loop < 50; loop++) { for(auto& core : device.get_virtual_soc_descriptors().at(0).workers) { std::vector readback_vec = {}; - device.write_to_device(vec1, tt_cxy_pair(0, core), address, ""); + device.write_to_device(vec1.data(), vec1.size() * sizeof(std::uint32_t), tt_cxy_pair(0, core), address, ""); device.l1_membar(0, "SMALL_READ_WRITE_TLB", {core}); - device.read_from_device(readback_vec, tt_cxy_pair(0, core), address, 4*vec1.size(), ""); + test_utils::read_data_from_device(device, readback_vec, tt_cxy_pair(0, core), address, 4*vec1.size(), ""); ASSERT_EQ(readback_vec, vec1); - device.write_to_device(zeros, tt_cxy_pair(0, core), address, ""); + device.write_to_device(zeros.data(), zeros.size() * sizeof(std::uint32_t), tt_cxy_pair(0, core), address, ""); readback_vec = {}; } @@ -533,11 +534,11 @@ TEST(SiliconDriverBH, MultiThreadedMemBar) { for(int loop = 0; loop < 50; loop++) { for(auto& core : device.get_virtual_soc_descriptors().at(0).workers) { std::vector readback_vec = {}; - device.write_to_device(vec2, tt_cxy_pair(0, core), address, ""); + device.write_to_device(vec2.data(), vec2.size() * sizeof(std::uint32_t), tt_cxy_pair(0, core), address, ""); device.l1_membar(0, "SMALL_READ_WRITE_TLB", {core}); - device.read_from_device(readback_vec, tt_cxy_pair(0, core), address, 4*vec2.size(), ""); + test_utils::read_data_from_device(device, readback_vec, tt_cxy_pair(0, core), address, 4*vec2.size(), ""); ASSERT_EQ(readback_vec, vec2); - device.write_to_device(zeros, tt_cxy_pair(0, core), address, "") ; + device.write_to_device(zeros.data(), zeros.size() * sizeof(std::uint32_t), tt_cxy_pair(0, core), address, "") ; readback_vec = {}; } } @@ -547,13 +548,13 @@ TEST(SiliconDriverBH, MultiThreadedMemBar) { th2.join(); for(auto& core : device.get_virtual_soc_descriptors().at(0).workers) { - device.read_from_device(readback_membar_vec, tt_cxy_pair(0, core), l1_mem::address_map::L1_BARRIER_BASE, 4, "SMALL_READ_WRITE_TLB"); + test_utils::read_data_from_device(device, readback_membar_vec, tt_cxy_pair(0, core), l1_mem::address_map::L1_BARRIER_BASE, 4, "SMALL_READ_WRITE_TLB"); ASSERT_EQ(readback_membar_vec.at(0), 187); // Ensure that memory barriers end up in the correct sate for workers readback_membar_vec = {}; } for(auto& core : device.get_virtual_soc_descriptors().at(0).ethernet_cores) { - device.read_from_device(readback_membar_vec, tt_cxy_pair(0, core), eth_l1_mem::address_map::ERISC_BARRIER_BASE, 4, "SMALL_READ_WRITE_TLB"); + test_utils::read_data_from_device(device, readback_membar_vec, tt_cxy_pair(0, core), eth_l1_mem::address_map::ERISC_BARRIER_BASE, 4, "SMALL_READ_WRITE_TLB"); ASSERT_EQ(readback_membar_vec.at(0), 187); // Ensure that memory barriers end up in the correct sate for ethernet cores readback_membar_vec = {}; } @@ -599,16 +600,16 @@ TEST(SiliconDriverBH, DISABLED_BroadcastWrite) { // Cannot broadcast to tensix/e for(const auto i : target_devices) { for(const auto& core : device.get_virtual_soc_descriptors().at(i).workers) { if(rows_to_exclude.find(core.y) != rows_to_exclude.end()) continue; - device.read_from_device(readback_vec, tt_cxy_pair(i, core), address, vector_to_write.size() * 4, "LARGE_READ_TLB"); + test_utils::read_data_from_device(device, readback_vec, tt_cxy_pair(i, core), address, vector_to_write.size() * 4, "LARGE_READ_TLB"); ASSERT_EQ(vector_to_write, readback_vec) << "Vector read back from core " << core.x << "-" << core.y << "does not match what was broadcasted"; - device.write_to_device(zeros, tt_cxy_pair(i, core), address, "LARGE_WRITE_TLB"); // Clear any written data + device.write_to_device(zeros.data(), zeros.size() * sizeof(std::uint32_t), tt_cxy_pair(i, core), address, "LARGE_WRITE_TLB"); // Clear any written data readback_vec = {}; } for(int chan = 0; chan < device.get_virtual_soc_descriptors().at(i).get_num_dram_channels(); chan++) { const auto& core = device.get_virtual_soc_descriptors().at(i).get_core_for_dram_channel(chan, 0); - device.read_from_device(readback_vec, tt_cxy_pair(i, core), address, vector_to_write.size() * 4, "LARGE_READ_TLB"); + test_utils::read_data_from_device(device, readback_vec, tt_cxy_pair(i, core), address, vector_to_write.size() * 4, "LARGE_READ_TLB"); ASSERT_EQ(vector_to_write, readback_vec) << "Vector read back from DRAM core " << i << " " << core.x << "-" << core.y << " does not match what was broadcasted " << size; - device.write_to_device(zeros, tt_cxy_pair(i, core), address, "LARGE_WRITE_TLB"); // Clear any written data + device.write_to_device(zeros.data(), zeros.size() * sizeof(std::uint32_t), tt_cxy_pair(i, core), address, "LARGE_WRITE_TLB"); // Clear any written data readback_vec = {}; } } @@ -663,16 +664,16 @@ TEST(SiliconDriverBH, DISABLED_VirtualCoordinateBroadcast) { // same problem as for(const auto i : target_devices) { for(const auto& core : device.get_virtual_soc_descriptors().at(i).workers) { if(rows_to_exclude.find(core.y) != rows_to_exclude.end()) continue; - device.read_from_device(readback_vec, tt_cxy_pair(i, core), address, vector_to_write.size() * 4, "LARGE_READ_TLB"); + test_utils::read_data_from_device(device, readback_vec, tt_cxy_pair(i, core), address, vector_to_write.size() * 4, "LARGE_READ_TLB"); ASSERT_EQ(vector_to_write, readback_vec) << "Vector read back from core " << core.x << "-" << core.y << "does not match what was broadcasted"; - device.write_to_device(zeros, tt_cxy_pair(i, core), address, "LARGE_WRITE_TLB"); // Clear any written data + device.write_to_device(zeros.data(), zeros.size() * sizeof(std::uint32_t), tt_cxy_pair(i, core), address, "LARGE_WRITE_TLB"); // Clear any written data readback_vec = {}; } for(int chan = 0; chan < device.get_virtual_soc_descriptors().at(i).get_num_dram_channels(); chan++) { const auto& core = device.get_virtual_soc_descriptors().at(i).get_core_for_dram_channel(chan, 0); - device.read_from_device(readback_vec, tt_cxy_pair(i, core), address, vector_to_write.size() * 4, "LARGE_READ_TLB"); + test_utils::read_data_from_device(device, readback_vec, tt_cxy_pair(i, core), address, vector_to_write.size() * 4, "LARGE_READ_TLB"); ASSERT_EQ(vector_to_write, readback_vec) << "Vector read back from DRAM core " << i << " " << core.x << "-" << core.y << " does not match what was broadcasted " << size; - device.write_to_device(zeros, tt_cxy_pair(i, core), address, "LARGE_WRITE_TLB"); // Clear any written data + device.write_to_device(zeros.data(), zeros.size() * sizeof(std::uint32_t), tt_cxy_pair(i, core), address, "LARGE_WRITE_TLB"); // Clear any written data readback_vec = {}; } } diff --git a/tests/emulation/test_emulation_device.cpp b/tests/emulation/test_emulation_device.cpp index aef96112..c78af81b 100644 --- a/tests/emulation/test_emulation_device.cpp +++ b/tests/emulation/test_emulation_device.cpp @@ -24,14 +24,14 @@ TEST(EmulationDeviceGS, BasicEmuTest) { for (auto &byte : wdata) { byte = rand(); } - device.write_to_device(wdata, tt_cxy_pair(0, core), l1_addr, "l1"); - device.read_from_device(rdata, tt_cxy_pair(0, core), l1_addr, size, "l1"); + device.write_to_device(wdata.data(), wdata.size() * sizeof(std::uint32_t), tt_cxy_pair(0, core), l1_addr, "l1"); + test_utils::read_data_from_device(device, rdata, tt_cxy_pair(0, core), l1_addr, size, "l1"); ASSERT_EQ(wdata, rdata) << "Vector read back from core " << core.x << "-" << core.y << "does not match what was written"; device.deassert_risc_reset(); - device.write_to_device(wdata, tt_cxy_pair(0, tt_xy_pair(phys_x, phys_y)), l1_addr, "l1"); + device.write_to_device(wdata.data(), wdata.size() * sizeof(std::uint32_t), tt_cxy_pair(0, tt_xy_pair(phys_x, phys_y)), l1_addr, "l1"); device.assert_risc_reset(); - device.write_to_device(wdata, tt_cxy_pair(0, tt_xy_pair(phys_x, phys_y)), l1_addr, "l1"); + device.write_to_device(wdata.data(), wdata.size() * sizeof(std::uint32_t), tt_cxy_pair(0, tt_xy_pair(phys_x, phys_y)), l1_addr, "l1"); } catch (const std::exception &e) { diff --git a/tests/galaxy/test_galaxy_common.cpp b/tests/galaxy/test_galaxy_common.cpp index 7df56d81..b9d33586 100644 --- a/tests/galaxy/test_galaxy_common.cpp +++ b/tests/galaxy/test_galaxy_common.cpp @@ -3,13 +3,16 @@ // SPDX-License-Identifier: Apache-2.0 #include "test_galaxy_common.h" + +#include "tests/test_utils/device_test_utils.hpp" + void move_data( tt_SiliconDevice& device, tt_multichip_core_addr sender_core, tt_multichip_core_addr receiver_core, uint32_t size) { std::vector readback_vec = {}; - device.read_from_device( - readback_vec, tt_cxy_pair(sender_core.chip, sender_core.core), sender_core.addr, size, "SMALL_READ_WRITE_TLB"); + test_utils::read_data_from_device( + device, readback_vec, tt_cxy_pair(sender_core.chip, sender_core.core), sender_core.addr, size, "SMALL_READ_WRITE_TLB"); device.write_to_device( - readback_vec, tt_cxy_pair(receiver_core.chip, receiver_core.core), receiver_core.addr, "SMALL_READ_WRITE_TLB"); + readback_vec.data(), readback_vec.size() * sizeof(std::uint32_t), tt_cxy_pair(receiver_core.chip, receiver_core.core), receiver_core.addr, "SMALL_READ_WRITE_TLB"); device.wait_for_non_mmio_flush(); // Barrier to ensure that all writes over ethernet were commited return; @@ -21,11 +24,12 @@ void broadcast_data( std::vector receiver_cores, uint32_t size) { std::vector readback_vec = {}; - device.read_from_device( - readback_vec, tt_cxy_pair(sender_core.chip, sender_core.core), sender_core.addr, size, "SMALL_READ_WRITE_TLB"); + test_utils::read_data_from_device( + device, readback_vec, tt_cxy_pair(sender_core.chip, sender_core.core), sender_core.addr, size, "SMALL_READ_WRITE_TLB"); for (const auto& receiver_core : receiver_cores) { device.write_to_device( - readback_vec, + readback_vec.data(), + readback_vec.size() * sizeof(std::uint32_t), tt_cxy_pair(receiver_core.chip, receiver_core.core), receiver_core.addr, "SMALL_READ_WRITE_TLB"); diff --git a/tests/galaxy/test_umd_concurrent_threads.cpp b/tests/galaxy/test_umd_concurrent_threads.cpp index c948a4c1..7c193e48 100644 --- a/tests/galaxy/test_umd_concurrent_threads.cpp +++ b/tests/galaxy/test_umd_concurrent_threads.cpp @@ -17,6 +17,7 @@ #include "test_galaxy_common.h" #include "tests/wormhole/test_wh_common.h" #include "tests/test_utils/generate_cluster_desc.hpp" +#include "tests/test_utils/device_test_utils.hpp" static const std::string SOC_DESC_PATH = "tests/soc_descs/wormhole_b0_8x10.yaml"; @@ -71,14 +72,14 @@ TEST(GalaxyConcurrentThreads, WriteToAllChipsL1) { std::uint32_t address = l1_mem::address_map::NCRISC_FIRMWARE_BASE; for (const auto& chip : target_devices_th1) { for (auto& core : sdesc_per_chip.at(chip).workers) { - device.write_to_device(vector_to_write_th1, tt_cxy_pair(chip, core), address, "SMALL_READ_WRITE_TLB"); + device.write_to_device(vector_to_write_th1.data(), vector_to_write_th1.size() * sizeof(std::uint32_t), tt_cxy_pair(chip, core), address, "SMALL_READ_WRITE_TLB"); } } device.wait_for_non_mmio_flush(); for (auto& chip : target_devices_th1) { for (auto& core : sdesc_per_chip.at(chip).workers) { - device.read_from_device( - readback_vec, tt_cxy_pair(chip, core), address, write_size, "SMALL_READ_WRITE_TLB"); + test_utils::read_data_from_device( + device, readback_vec, tt_cxy_pair(chip, core), address, write_size, "SMALL_READ_WRITE_TLB"); EXPECT_EQ(vector_to_write_th1, readback_vec) << "Vector read back from core " << core.x << "-" << core.y << "does not match what was written"; readback_vec = {}; @@ -92,14 +93,14 @@ TEST(GalaxyConcurrentThreads, WriteToAllChipsL1) { std::uint32_t address = l1_mem::address_map::NCRISC_FIRMWARE_BASE; for (const auto& chip : target_devices_th2) { for (auto& core : sdesc_per_chip.at(chip).workers) { - device.write_to_device(vector_to_write_th2, tt_cxy_pair(chip, core), address, "SMALL_READ_WRITE_TLB"); + device.write_to_device(vector_to_write_th2.data(), vector_to_write_th2.size() * sizeof(std::uint32_t), tt_cxy_pair(chip, core), address, "SMALL_READ_WRITE_TLB"); } } device.wait_for_non_mmio_flush(); for (const auto& chip : target_devices_th2) { for (auto& core : sdesc_per_chip.at(chip).workers) { - device.read_from_device( - readback_vec, tt_cxy_pair(chip, core), address, write_size, "SMALL_READ_WRITE_TLB"); + test_utils::read_data_from_device( + device, readback_vec, tt_cxy_pair(chip, core), address, write_size, "SMALL_READ_WRITE_TLB"); EXPECT_EQ(vector_to_write_th2, readback_vec) << "Vector read back from core " << core.x << "-" << core.y << "does not match what was written"; readback_vec = {}; @@ -165,14 +166,14 @@ TEST(GalaxyConcurrentThreads, WriteToAllChipsDram) { std::uint32_t address = 0x4000000; for (const auto& chip : target_devices_th1) { for (auto& core : dram_cores) { - device.write_to_device(vector_to_write, tt_cxy_pair(chip, core), address, "SMALL_READ_WRITE_TLB"); + device.write_to_device(vector_to_write.data(), vector_to_write.size() * sizeof(std::uint32_t), tt_cxy_pair(chip, core), address, "SMALL_READ_WRITE_TLB"); } } device.wait_for_non_mmio_flush(); for (const auto& chip : target_devices_th1) { for (auto& core : dram_cores) { - device.read_from_device( - readback_vec, tt_cxy_pair(chip, core), address, write_size, "SMALL_READ_WRITE_TLB"); + test_utils::read_data_from_device( + device, readback_vec, tt_cxy_pair(chip, core), address, write_size, "SMALL_READ_WRITE_TLB"); EXPECT_EQ(vector_to_write, readback_vec) << "Vector read back from dram core " << core.x << "-" << core.y << "does not match what was written"; readback_vec = {}; @@ -185,14 +186,14 @@ TEST(GalaxyConcurrentThreads, WriteToAllChipsDram) { std::uint32_t address = 0x5000000; for (const auto& chip : target_devices_th2) { for (auto& core : sdesc_per_chip.at(chip).workers) { - device.write_to_device(vector_to_write, tt_cxy_pair(chip, core), address, "SMALL_READ_WRITE_TLB"); + device.write_to_device(vector_to_write.data(), vector_to_write.size() * sizeof(std::uint32_t), tt_cxy_pair(chip, core), address, "SMALL_READ_WRITE_TLB"); } } device.wait_for_non_mmio_flush(); for (const auto& chip : target_devices_th2) { for (auto& core : sdesc_per_chip.at(chip).workers) { - device.read_from_device( - readback_vec, tt_cxy_pair(chip, core), address, write_size, "SMALL_READ_WRITE_TLB"); + test_utils::read_data_from_device( + device, readback_vec, tt_cxy_pair(chip, core), address, write_size, "SMALL_READ_WRITE_TLB"); EXPECT_EQ(vector_to_write, readback_vec) << "Vector read back from dram core " << core.x << "-" << core.y << "does not match what was written"; readback_vec = {}; @@ -244,8 +245,9 @@ TEST(GalaxyConcurrentThreads, PushInputsWhileSignalingCluster) { chip_id_t mmio_chip = cluster_desc->get_chips_with_mmio().begin()->first; std::vector readback_vec = {}; std::uint32_t address = 0x0; - device.write_to_device(large_vector, tt_cxy_pair(mmio_chip, tt_xy_pair(0, 0)), address, "SMALL_READ_WRITE_TLB"); - device.read_from_device( + device.write_to_device(large_vector.data(), large_vector.size() * sizeof(std::uint32_t), tt_cxy_pair(mmio_chip, tt_xy_pair(0, 0)), address, "SMALL_READ_WRITE_TLB"); + test_utils::read_data_from_device( + device, readback_vec, tt_cxy_pair(mmio_chip, tt_xy_pair(0, 0)), address, @@ -261,14 +263,14 @@ TEST(GalaxyConcurrentThreads, PushInputsWhileSignalingCluster) { std::uint32_t address = l1_mem::address_map::NCRISC_FIRMWARE_BASE; for (const auto& chip : target_devices) { for (auto& core : sdesc_per_chip.at(chip).workers) { - device.write_to_device(small_vector, tt_cxy_pair(chip, core), address, "SMALL_READ_WRITE_TLB"); + device.write_to_device(small_vector.data(), small_vector.size() * sizeof(std::uint32_t), tt_cxy_pair(chip, core), address, "SMALL_READ_WRITE_TLB"); } } device.wait_for_non_mmio_flush(); for (const auto& chip : target_devices) { for (auto& core : sdesc_per_chip.at(chip).workers) { - device.read_from_device( - readback_vec, tt_cxy_pair(chip, core), address, small_vector.size() * 4, "SMALL_READ_WRITE_TLB"); + test_utils::read_data_from_device( + device, readback_vec, tt_cxy_pair(chip, core), address, small_vector.size() * 4, "SMALL_READ_WRITE_TLB"); EXPECT_EQ(small_vector, readback_vec) << "Vector read back from core " << core.x << "-" << core.y << "does not match what was written"; readback_vec = {}; diff --git a/tests/galaxy/test_umd_remote_api.cpp b/tests/galaxy/test_umd_remote_api.cpp index 3edd2854..c4c489d9 100644 --- a/tests/galaxy/test_umd_remote_api.cpp +++ b/tests/galaxy/test_umd_remote_api.cpp @@ -16,6 +16,7 @@ #include "test_galaxy_common.h" #include "tests/wormhole/test_wh_common.h" #include "tests/test_utils/generate_cluster_desc.hpp" +#include "tests/test_utils/device_test_utils.hpp" static const std::string SOC_DESC_PATH = "tests/soc_descs/wormhole_b0_8x10.yaml"; @@ -65,7 +66,7 @@ void run_remote_read_write_test(uint32_t vector_size, bool dram_write) { for (const auto& core : target_cores) { tt_cxy_pair target_core = tt_cxy_pair(chip, core); auto start = std::chrono::high_resolution_clock::now(); - device.write_to_device(vector_to_write, target_core, address, "SMALL_READ_WRITE_TLB"); + device.write_to_device(vector_to_write.data(), vector_to_write.size() * sizeof(std::uint32_t), target_core, address, "SMALL_READ_WRITE_TLB"); device.wait_for_non_mmio_flush(); // Barrier to ensure that all writes over ethernet were commited auto end = std::chrono::high_resolution_clock::now(); auto duration = double(std::chrono::duration_cast(end - start).count()); @@ -73,7 +74,7 @@ void run_remote_read_write_test(uint32_t vector_size, bool dram_write) { // std::cout << " chip " << chip << " core " << target_core.str() << " " << duration << std::endl; start = std::chrono::high_resolution_clock::now(); - device.read_from_device(readback_vec, target_core, address, write_size, "SMALL_READ_WRITE_TLB"); + test_utils::read_data_from_device(device, readback_vec, target_core, address, write_size, "SMALL_READ_WRITE_TLB"); end = std::chrono::high_resolution_clock::now(); duration = double(std::chrono::duration_cast(end - start).count()); // std::cout << " read chip " << chip << " core " << target_core.str()<< " " << duration << std::endl; @@ -165,7 +166,7 @@ void run_data_mover_test( std::vector send_bw; // Set up data in sender core device.write_to_device( - vector_to_write, tt_cxy_pair(sender_core.chip, sender_core.core), sender_core.addr, "SMALL_READ_WRITE_TLB"); + vector_to_write.data(), vector_to_write.size() * sizeof(std::uint32_t), tt_cxy_pair(sender_core.chip, sender_core.core), sender_core.addr, "SMALL_READ_WRITE_TLB"); device.wait_for_non_mmio_flush(); // Barrier to ensure that all writes over ethernet were commited // Send data from sender core to receiver core @@ -177,7 +178,8 @@ void run_data_mover_test( // std::cout << "move data duration "<< duration << std::endl; // Verify data is correct in receiver core - device.read_from_device( + test_utils::read_data_from_device( + device, readback_vec, tt_cxy_pair(receiver_core.chip, receiver_core.core), receiver_core.addr, @@ -282,7 +284,7 @@ void run_data_broadcast_test( std::vector send_bw; // Set up data in sender core device.write_to_device( - vector_to_write, tt_cxy_pair(sender_core.chip, sender_core.core), sender_core.addr, "SMALL_READ_WRITE_TLB"); + vector_to_write.data(), vector_to_write.size() * sizeof(std::uint32_t), tt_cxy_pair(sender_core.chip, sender_core.core), sender_core.addr, "SMALL_READ_WRITE_TLB"); device.wait_for_non_mmio_flush(); // Barrier to ensure that all writes over ethernet were commited // Send data from sender core to receiver core @@ -295,7 +297,8 @@ void run_data_broadcast_test( // Verify data is correct in receiver core for (const auto& receiver_core : receiver_cores) { - device.read_from_device( + test_utils::read_data_from_device( + device, readback_vec, tt_cxy_pair(receiver_core.chip, receiver_core.core), receiver_core.addr, diff --git a/tests/galaxy/test_umd_remote_api_stability.cpp b/tests/galaxy/test_umd_remote_api_stability.cpp index ecf99862..9dac7ce7 100644 --- a/tests/galaxy/test_umd_remote_api_stability.cpp +++ b/tests/galaxy/test_umd_remote_api_stability.cpp @@ -23,7 +23,6 @@ #include "tests/galaxy/test_galaxy_common.h" #include "tests/wormhole/test_wh_common.h" - namespace tt::umd::test::utils { diff --git a/tests/grayskull/test_silicon_driver.cpp b/tests/grayskull/test_silicon_driver.cpp index d890d8a9..1798b0e1 100644 --- a/tests/grayskull/test_silicon_driver.cpp +++ b/tests/grayskull/test_silicon_driver.cpp @@ -10,6 +10,7 @@ #include "device/wormhole/wormhole_implementation.h" #include "l1_address_map.h" #include "tests/test_utils/generate_cluster_desc.hpp" +#include "tests/test_utils/device_test_utils.hpp" TEST(SiliconDriverGS, CreateDestroySequential) { std::set target_devices = {0}; @@ -115,20 +116,20 @@ TEST(SiliconDriverGS, HarvestingRuntime) { std::uint32_t dynamic_write_address = 0x30000000; for(int loop = 0; loop < 100; loop++){ // Write to each core a 100 times at different statically mapped addresses for(auto& core : device.get_virtual_soc_descriptors().at(i).workers) { - device.write_to_device(vector_to_write, tt_cxy_pair(i, core), address, ""); - device.write_to_device(vector_to_write, tt_cxy_pair(i, core), dynamic_write_address, "SMALL_READ_WRITE_TLB"); + device.write_to_device(vector_to_write.data(), vector_to_write.size() * sizeof(std::uint32_t), tt_cxy_pair(i, core), address, ""); + device.write_to_device(vector_to_write.data(), vector_to_write.size() * sizeof(std::uint32_t), tt_cxy_pair(i, core), dynamic_write_address, "SMALL_READ_WRITE_TLB"); auto start_time = std::chrono::high_resolution_clock::now(); while(!(vector_to_write == readback_vec)) { float wait_duration = std::chrono::duration_cast(std::chrono::high_resolution_clock::now() - start_time).count(); if(wait_duration > timeout_in_seconds) { break; } - device.read_from_device(readback_vec, tt_cxy_pair(i, core), address, 40, ""); - device.read_from_device(dynamic_readback_vec, tt_cxy_pair(i, core), dynamic_write_address, 40, "SMALL_READ_WRITE_TLB"); + test_utils::read_data_from_device(device, readback_vec, tt_cxy_pair(i, core), address, 40, ""); + test_utils::read_data_from_device(device, dynamic_readback_vec, tt_cxy_pair(i, core), dynamic_write_address, 40, "SMALL_READ_WRITE_TLB"); } ASSERT_EQ(vector_to_write, readback_vec) << "Vector read back from core " << core.x << "-" << core.y << "does not match what was written"; - device.write_to_device(zeros, tt_cxy_pair(i, core), address, "SMALL_READ_WRITE_TLB"); // Clear any written data - device.write_to_device(zeros, tt_cxy_pair(i, core), dynamic_write_address, "SMALL_READ_WRITE_TLB"); // Clear any written data + device.write_to_device(zeros.data(), zeros.size() * sizeof(std::uint32_t), tt_cxy_pair(i, core), address, "SMALL_READ_WRITE_TLB"); // Clear any written data + device.write_to_device(zeros.data(), zeros.size() * sizeof(std::uint32_t), tt_cxy_pair(i, core), dynamic_write_address, "SMALL_READ_WRITE_TLB"); // Clear any written data readback_vec = {}; dynamic_readback_vec = {}; } @@ -176,17 +177,17 @@ TEST(SiliconDriverGS, StaticTLB_RW) { std::uint32_t address = l1_mem::address_map::DATA_BUFFER_SPACE_BASE; for(int loop = 0; loop < 100; loop++){ // Write to each core a 100 times at different statically mapped addresses for(auto& core : device.get_virtual_soc_descriptors().at(i).workers) { - device.write_to_device(vector_to_write, tt_cxy_pair(i, core), address, ""); + device.write_to_device(vector_to_write.data(), vector_to_write.size() * sizeof(std::uint32_t), tt_cxy_pair(i, core), address, ""); auto start_time = std::chrono::high_resolution_clock::now(); while(!(vector_to_write == readback_vec)) { float wait_duration = std::chrono::duration_cast(std::chrono::high_resolution_clock::now() - start_time).count(); if(wait_duration > timeout_in_seconds) { break; } - device.read_from_device(readback_vec, tt_cxy_pair(i, core), address, 40, ""); + test_utils::read_data_from_device(device, readback_vec, tt_cxy_pair(i, core), address, 40, ""); } ASSERT_EQ(vector_to_write, readback_vec) << "Vector read back from core " << core.x << "-" << core.y << "does not match what was written"; - device.write_to_device(zeros, tt_cxy_pair(i, core), address, "SMALL_READ_WRITE_TLB"); // Clear any written data + device.write_to_device(zeros.data(), zeros.size() * sizeof(std::uint32_t), tt_cxy_pair(i, core), address, "SMALL_READ_WRITE_TLB"); // Clear any written data readback_vec = {}; } address += 0x20; // Increment by uint32_t size for each write @@ -217,18 +218,18 @@ TEST(SiliconDriverGS, DynamicTLB_RW) { std::uint32_t address = l1_mem::address_map::DATA_BUFFER_SPACE_BASE; for(int loop = 0; loop < 100; loop++){ // Write to each core a 100 times at different statically mapped addresses for(auto& core : device.get_virtual_soc_descriptors().at(i).workers) { - device.write_to_device(vector_to_write, tt_cxy_pair(i, core), address, "SMALL_READ_WRITE_TLB"); + device.write_to_device(vector_to_write.data(), vector_to_write.size() * sizeof(std::uint32_t), tt_cxy_pair(i, core), address, "SMALL_READ_WRITE_TLB"); auto start_time = std::chrono::high_resolution_clock::now(); while(!(vector_to_write == readback_vec)) { float wait_duration = std::chrono::duration_cast(std::chrono::high_resolution_clock::now() - start_time).count(); if(wait_duration > timeout_in_seconds) { break; } - device.read_from_device(readback_vec, tt_cxy_pair(i, core), address, 40, "SMALL_READ_WRITE_TLB"); + test_utils::read_data_from_device(device, readback_vec, tt_cxy_pair(i, core), address, 40, "SMALL_READ_WRITE_TLB"); } ASSERT_EQ(vector_to_write, readback_vec) << "Vector read back from core " << core.x << "-" << core.y << "does not match what was written"; - device.write_to_device(zeros, tt_cxy_pair(i, core), address, "SMALL_READ_WRITE_TLB"); // Clear any written data + device.write_to_device(zeros.data(), zeros.size() * sizeof(std::uint32_t), tt_cxy_pair(i, core), address, "SMALL_READ_WRITE_TLB"); // Clear any written data readback_vec = {}; } address += 0x20; // Increment by uint32_t size for each write @@ -259,14 +260,14 @@ TEST(SiliconDriverGS, MultiThreadedDevice) { std::uint32_t address = l1_mem::address_map::DATA_BUFFER_SPACE_BASE; for(int loop = 0; loop < 100; loop++) { for(auto& core : device.get_virtual_soc_descriptors().at(0).workers) { - device.write_to_device(vector_to_write, tt_cxy_pair(0, core), address, "SMALL_READ_WRITE_TLB"); + device.write_to_device(vector_to_write.data(), vector_to_write.size() * sizeof(std::uint32_t), tt_cxy_pair(0, core), address, "SMALL_READ_WRITE_TLB"); auto start_time = std::chrono::high_resolution_clock::now(); while(!(vector_to_write == readback_vec)) { float wait_duration = std::chrono::duration_cast(std::chrono::high_resolution_clock::now() - start_time).count(); if(wait_duration > timeout_in_seconds) { break; } - device.read_from_device(readback_vec, tt_cxy_pair(0, core), address, 40, "SMALL_READ_WRITE_TLB"); + test_utils::read_data_from_device(device, readback_vec, tt_cxy_pair(0, core), address, 40, "SMALL_READ_WRITE_TLB"); } ASSERT_EQ(vector_to_write, readback_vec) << "Vector read back from core " << core.x << "-" << core.y << "does not match what was written"; readback_vec = {}; @@ -283,14 +284,14 @@ TEST(SiliconDriverGS, MultiThreadedDevice) { for(auto& core_ls : device.get_virtual_soc_descriptors().at(0).dram_cores) { for(int loop = 0; loop < 100; loop++) { for(auto& core : core_ls) { - device.write_to_device(vector_to_write, tt_cxy_pair(0, core), address, "SMALL_READ_WRITE_TLB"); + device.write_to_device(vector_to_write.data(), vector_to_write.size() * sizeof(std::uint32_t), tt_cxy_pair(0, core), address, "SMALL_READ_WRITE_TLB"); auto start_time = std::chrono::high_resolution_clock::now(); while(!(vector_to_write == readback_vec)) { float wait_duration = std::chrono::duration_cast(std::chrono::high_resolution_clock::now() - start_time).count(); if(wait_duration > timeout_in_seconds) { break; } - device.read_from_device(readback_vec, tt_cxy_pair(0, core), address, 40, "SMALL_READ_WRITE_TLB"); + test_utils::read_data_from_device(device, readback_vec, tt_cxy_pair(0, core), address, 40, "SMALL_READ_WRITE_TLB"); } ASSERT_EQ(vector_to_write, readback_vec) << "Vector read back from core " << core.x << "-" << core.y << "does not match what was written"; readback_vec = {}; @@ -344,20 +345,20 @@ TEST(SiliconDriverGS, MultiThreadedMemBar) { // this tests takes ~5 mins to run device.deassert_risc_reset(); std::vector readback_membar_vec = {}; for(auto& core : device.get_virtual_soc_descriptors().at(0).workers) { - device.read_from_device(readback_membar_vec, tt_cxy_pair(0, core), 0, 4, "SMALL_READ_WRITE_TLB"); + test_utils::read_data_from_device(device, readback_membar_vec, tt_cxy_pair(0, core), 0, 4, "SMALL_READ_WRITE_TLB"); ASSERT_EQ(readback_membar_vec.at(0), 187); // Ensure that memory barriers were correctly initialized on all workers readback_membar_vec = {}; } for(auto& core : device.get_virtual_soc_descriptors().at(0).workers) { - device.read_from_device(readback_membar_vec, tt_cxy_pair(0, core), 0, 4, "SMALL_READ_WRITE_TLB"); + test_utils::read_data_from_device(device, readback_membar_vec, tt_cxy_pair(0, core), 0, 4, "SMALL_READ_WRITE_TLB"); ASSERT_EQ(readback_membar_vec.at(0), 187); // Ensure that memory barriers were correctly initialized on all workers readback_membar_vec = {}; } for(int chan = 0; chan < device.get_virtual_soc_descriptors().at(0).get_num_dram_channels(); chan++) { auto core = device.get_virtual_soc_descriptors().at(0).get_core_for_dram_channel(chan, 0); - device.read_from_device(readback_membar_vec, tt_cxy_pair(0, core), 0, 4, "SMALL_READ_WRITE_TLB"); + test_utils::read_data_from_device(device, readback_membar_vec, tt_cxy_pair(0, core), 0, 4, "SMALL_READ_WRITE_TLB"); ASSERT_EQ(readback_membar_vec.at(0), 187); // Ensure that memory barriers were correctly initialized on all DRAM readback_membar_vec = {}; } @@ -379,11 +380,11 @@ TEST(SiliconDriverGS, MultiThreadedMemBar) { // this tests takes ~5 mins to run for(int loop = 0; loop < 100; loop++) { for(auto& core : device.get_virtual_soc_descriptors().at(0).workers) { std::vector readback_vec = {}; - device.write_to_device(vec1, tt_cxy_pair(0, core), address, ""); + device.write_to_device(vec1.data(), vec1.size() * sizeof(std::uint32_t), tt_cxy_pair(0, core), address, ""); device.l1_membar(0, "", {core}); - device.read_from_device(readback_vec, tt_cxy_pair(0, core), address, 4*vec1.size(), ""); + test_utils::read_data_from_device(device, readback_vec, tt_cxy_pair(0, core), address, 4*vec1.size(), ""); ASSERT_EQ(readback_vec, vec1); - device.write_to_device(zeros, tt_cxy_pair(0, core), address, ""); + device.write_to_device(zeros.data(), zeros.size() * sizeof(std::uint32_t), tt_cxy_pair(0, core), address, ""); readback_vec = {}; } } @@ -394,11 +395,11 @@ TEST(SiliconDriverGS, MultiThreadedMemBar) { // this tests takes ~5 mins to run for(int loop = 0; loop < 100; loop++) { for(auto& core : device.get_virtual_soc_descriptors().at(0).workers) { std::vector readback_vec = {}; - device.write_to_device(vec2, tt_cxy_pair(0, core), address, ""); + device.write_to_device(vec2.data(), vec2.size() * sizeof(std::uint32_t), tt_cxy_pair(0, core), address, ""); device.l1_membar(0, "", {core}); - device.read_from_device(readback_vec, tt_cxy_pair(0, core), address, 4*vec2.size(), ""); + test_utils::read_data_from_device(device, readback_vec, tt_cxy_pair(0, core), address, 4*vec2.size(), ""); ASSERT_EQ(readback_vec, vec2); - device.write_to_device(zeros, tt_cxy_pair(0, core), address, "") ; + device.write_to_device(zeros.data(), zeros.size() * sizeof(std::uint32_t), tt_cxy_pair(0, core), address, "") ; readback_vec = {}; } } @@ -408,7 +409,7 @@ TEST(SiliconDriverGS, MultiThreadedMemBar) { // this tests takes ~5 mins to run th2.join(); for(auto& core : device.get_virtual_soc_descriptors().at(0).workers) { - device.read_from_device(readback_membar_vec, tt_cxy_pair(0, core), 0, 4, "SMALL_READ_WRITE_TLB"); + test_utils::read_data_from_device(device, readback_membar_vec, tt_cxy_pair(0, core), 0, 4, "SMALL_READ_WRITE_TLB"); ASSERT_EQ(readback_membar_vec.at(0), 187); // Ensure that memory barriers end up in correct sate workers readback_membar_vec = {}; } diff --git a/tests/microbenchmark/test_rw_tensix.cpp b/tests/microbenchmark/test_rw_tensix.cpp index f58d72ab..07d40383 100644 --- a/tests/microbenchmark/test_rw_tensix.cpp +++ b/tests/microbenchmark/test_rw_tensix.cpp @@ -3,6 +3,7 @@ #include "nanobench.h" #include "device_fixture.hpp" +#include "tests/test_utils/device_test_utils.hpp" std::uint32_t generate_random_address(std::uint32_t max, std::uint32_t min=0) { ankerl::nanobench::Rng gen(80085); @@ -22,11 +23,11 @@ TEST_F(uBenchmarkFixture, WriteAllCores32Bytes) { wname << "Write to device core (" << core.x << ", " << core.y << ")"; // Write 32 bytes through static tlbs bench_static.title("Write 32 bytes").unit("writes").minEpochIterations(50).output(nullptr).run(wname.str(), [&] { - device->write_to_device(vector_to_write, tt_cxy_pair(0, core), address, "SMALL_READ_WRITE_TLB"); + device->write_to_device(vector_to_write.data(), vector_to_write.size() * sizeof(std::uint32_t), tt_cxy_pair(0, core), address, "SMALL_READ_WRITE_TLB"); }); // Write through "fallback/dynamic" tlb bench_dynamic.title("Write 32 bytes fallback").unit("writes").minEpochIterations(50).output(nullptr).run(wname.str(), [&] { - device->write_to_device(vector_to_write, tt_cxy_pair(0, core), bad_address, "SMALL_READ_WRITE_TLB"); + device->write_to_device(vector_to_write.data(), vector_to_write.size() * sizeof(std::uint32_t), tt_cxy_pair(0, core), bad_address, "SMALL_READ_WRITE_TLB"); }); wname.clear(); } @@ -47,11 +48,11 @@ TEST_F(uBenchmarkFixture, ReadAllCores32Bytes){ // Read through static tlbs rname << "Read from device core (" << core.x << ", " << core.y << ")"; bench_static.title("Read 32 bytes").unit("reads").minEpochIterations(50).output(nullptr).run(rname.str(), [&] { - device->read_from_device(readback_vec, tt_cxy_pair(0, core), address, 0x20, "SMALL_READ_WRITE_TLB"); + test_utils::read_data_from_device(*device, readback_vec, tt_cxy_pair(0, core), address, 0x20, "SMALL_READ_WRITE_TLB"); }); // Read through "fallback/dynamic" tlb bench_dynamic.title("Read 32 bytes fallback").unit("reads").minEpochIterations(50).output(nullptr).run(rname.str(), [&] { - device->read_from_device(readback_vec, tt_cxy_pair(0, core), bad_address, 0x20, "SMALL_READ_WRITE_TLB"); + test_utils::read_data_from_device(*device, readback_vec.data(), tt_cxy_pair(0, core), bad_address, 0x20, "SMALL_READ_WRITE_TLB"); }); rname.clear(); } @@ -69,7 +70,7 @@ TEST_F(uBenchmarkFixture, Write32BytesRandomAddr){ std::stringstream wname; wname << "Write to device core (" << core.x << ", " << core.y << ") @ address " << std::hex << address; bench.title("Write 32 bytes random address").unit("writes").minEpochIterations(50).output(nullptr).run(wname.str(), [&] { - device->write_to_device(vector_to_write, tt_cxy_pair(0, core), address, "SMALL_READ_WRITE_TLB"); + device->write_to_device(vector_to_write.data(), vector_to_write.size() * sizeof(std::uint32_t), tt_cxy_pair(0, core), address, "SMALL_READ_WRITE_TLB"); }); wname.clear(); } @@ -86,7 +87,7 @@ TEST_F(uBenchmarkFixture, Read32BytesRandomAddr){ std::stringstream rname; rname << "Read from device core (" << core.x << ", " << core.y << ") @ address " << std::hex << address; bench.title("Read 32 bytes random address").unit("reads").minEpochIterations(50).output(nullptr).run(rname.str(), [&] { - device->read_from_device(readback_vec, tt_cxy_pair(0, core), address, 0x20, "SMALL_READ_WRITE_TLB"); + test_utils::read_data_from_device(*device, readback_vec, tt_cxy_pair(0, core), address, 0x20, "SMALL_READ_WRITE_TLB"); }); rname.clear(); } diff --git a/tests/simulation/test_simulation_device.cpp b/tests/simulation/test_simulation_device.cpp index 88b52921..2cf4ff91 100644 --- a/tests/simulation/test_simulation_device.cpp +++ b/tests/simulation/test_simulation_device.cpp @@ -4,6 +4,7 @@ #include #include "device_fixture.hpp" +#include "tests/test_utils/device_test_utils.hpp" std::vector generate_data(uint32_t size_in_bytes){ size_t size = size_in_bytes/sizeof(uint32_t); @@ -37,7 +38,7 @@ TEST_P(LoopbackAllCoresParam, LoopbackSingleTensix){ tt_cxy_pair core = {0, GetParam()}; device->write_to_device(wdata.data(), wdata.size()*sizeof(uint32_t), core, 0x100, ""); - device->read_from_device(rdata.data(), core, 0x100, wdata.size()*sizeof(uint32_t), ""); + test_utils::read_data_from_device(*device, rdata, core, 0x100, wdata.size()*sizeof(uint32_t), ""); ASSERT_EQ(wdata, rdata); } @@ -49,7 +50,7 @@ bool loopback_stress_size(std::unique_ptr &device, tt_xy_pa std::vector rdata(wdata.size(), 0); device->write_to_device(wdata.data(), wdata.size()*sizeof(uint32_t), tt_cxy_pair{0, core}, addr, ""); - device->read_from_device(rdata.data(), tt_cxy_pair{0, core}, addr, wdata.size()*sizeof(uint32_t), ""); + test_utils::read_data_from_device(*device, rdata, tt_cxy_pair{0, core}, addr, wdata.size()*sizeof(uint32_t), ""); return wdata == rdata; } @@ -79,8 +80,8 @@ TEST_F(SimulationDeviceFixture, LoopbackTwoTensix){ device->write_to_device(wdata1.data(), wdata1.size()*sizeof(uint32_t), core1, 0x100, ""); device->write_to_device(wdata2.data(), wdata2.size()*sizeof(uint32_t), core2, 0x100, ""); - device->read_from_device(rdata1.data(), core1, 0x100, wdata1.size()*sizeof(uint32_t), ""); - device->read_from_device(rdata2.data(), core2, 0x100, wdata2.size()*sizeof(uint32_t), ""); + test_utils::read_data_from_device(*device, rdata1, core1, 0x100, wdata1.size()*sizeof(uint32_t), ""); + test_utils::read_data_from_device(*device, rdata2, core2, 0x100, wdata2.size()*sizeof(uint32_t), ""); ASSERT_EQ(wdata1, rdata1); ASSERT_EQ(wdata2, rdata2); diff --git a/tests/test_utils/device_test_utils.hpp b/tests/test_utils/device_test_utils.hpp new file mode 100644 index 00000000..1c5cf528 --- /dev/null +++ b/tests/test_utils/device_test_utils.hpp @@ -0,0 +1,25 @@ +#pragma once + +#include +#include +#include + +#include "tt_device.h" + +namespace test_utils { + +template +static void size_buffer_to_capacity(std::vector &data_buf, std::size_t size_in_bytes) { + std::size_t target_size = 0; + if (size_in_bytes > 0) { + target_size = ((size_in_bytes - 1) / sizeof(T)) + 1; + } + data_buf.resize(target_size); +} + +static void read_data_from_device(tt_device& device, std::vector &vec, tt_cxy_pair core, uint64_t addr, uint32_t size, const std::string& tlb_to_use) { + size_buffer_to_capacity(vec, size); + device.read_from_device(vec.data(), core, addr, size, tlb_to_use); +} + +} diff --git a/tests/wormhole/test_silicon_driver_wh.cpp b/tests/wormhole/test_silicon_driver_wh.cpp index 6551b3cc..5459f813 100644 --- a/tests/wormhole/test_silicon_driver_wh.cpp +++ b/tests/wormhole/test_silicon_driver_wh.cpp @@ -15,6 +15,7 @@ #include "device/tt_cluster_descriptor.h" #include "device/wormhole/wormhole_implementation.h" #include "tests/test_utils/generate_cluster_desc.hpp" +#include "tests/test_utils/device_test_utils.hpp" void set_params_for_remote_txn(tt_SiliconDevice& device) { // Populate address map and NOC parameters that the driver needs for remote transactions @@ -174,18 +175,18 @@ TEST(SiliconDriverWH, HarvestingRuntime) { std::uint32_t dynamic_write_address = 0x40000000; for(int loop = 0; loop < 100; loop++){ // Write to each core a 100 times at different statically mapped addresses for(auto& core : device.get_virtual_soc_descriptors().at(i).workers) { - device.write_to_device(vector_to_write, tt_cxy_pair(i, core), address, ""); - device.write_to_device(vector_to_write, tt_cxy_pair(i, core), dynamic_write_address, "SMALL_READ_WRITE_TLB"); + device.write_to_device(vector_to_write.data(), vector_to_write.size() * sizeof(std::uint32_t), tt_cxy_pair(i, core), address, ""); + device.write_to_device(vector_to_write.data(), vector_to_write.size() * sizeof(std::uint32_t), tt_cxy_pair(i, core), dynamic_write_address, "SMALL_READ_WRITE_TLB"); device.wait_for_non_mmio_flush(); // Barrier to ensure that all writes over ethernet were commited - device.read_from_device(readback_vec, tt_cxy_pair(i, core), address, 40, ""); - device.read_from_device(dynamic_readback_vec, tt_cxy_pair(i, core), dynamic_write_address, 40, "SMALL_READ_WRITE_TLB"); + test_utils::read_data_from_device(device, readback_vec, tt_cxy_pair(i, core), address, 40, ""); + test_utils::read_data_from_device(device, dynamic_readback_vec, tt_cxy_pair(i, core), dynamic_write_address, 40, "SMALL_READ_WRITE_TLB"); ASSERT_EQ(vector_to_write, readback_vec) << "Vector read back from core " << core.x << "-" << core.y << "does not match what was written"; ASSERT_EQ(vector_to_write, dynamic_readback_vec) << "Vector read back from core " << core.x << "-" << core.y << "does not match what was written"; device.wait_for_non_mmio_flush(); - device.write_to_device(zeros, tt_cxy_pair(i, core), dynamic_write_address, "SMALL_READ_WRITE_TLB"); // Clear any written data - device.write_to_device(zeros, tt_cxy_pair(i, core), address, ""); // Clear any written data + device.write_to_device(zeros.data(), zeros.size() * sizeof(std::uint32_t), tt_cxy_pair(i, core), dynamic_write_address, "SMALL_READ_WRITE_TLB"); // Clear any written data + device.write_to_device(zeros.data(), zeros.size() * sizeof(std::uint32_t), tt_cxy_pair(i, core), address, ""); // Clear any written data device.wait_for_non_mmio_flush(); readback_vec = {}; dynamic_readback_vec = {}; @@ -260,7 +261,6 @@ TEST(SiliconDriverWH, UnalignedStaticTLB_RW) { device.close_device(); } - TEST(SiliconDriverWH, StaticTLB_RW) { auto get_static_tlb_index_callback = [] (tt_xy_pair target) { return get_static_tlb_index(target); @@ -300,12 +300,12 @@ TEST(SiliconDriverWH, StaticTLB_RW) { std::uint32_t address = l1_mem::address_map::NCRISC_FIRMWARE_BASE; for(int loop = 0; loop < 100; loop++){ // Write to each core a 100 times at different statically mapped addresses for(auto& core : device.get_virtual_soc_descriptors().at(i).workers) { - device.write_to_device(vector_to_write, tt_cxy_pair(i, core), address, ""); + device.write_to_device(vector_to_write.data(), vector_to_write.size() * sizeof(std::uint32_t), tt_cxy_pair(i, core), address, ""); device.wait_for_non_mmio_flush(); // Barrier to ensure that all writes over ethernet were commited - device.read_from_device(readback_vec, tt_cxy_pair(i, core), address, 40, ""); + test_utils::read_data_from_device(device, readback_vec, tt_cxy_pair(i, core), address, 40, ""); ASSERT_EQ(vector_to_write, readback_vec) << "Vector read back from core " << core.x << "-" << core.y << "does not match what was written"; device.wait_for_non_mmio_flush(); - device.write_to_device(zeros, tt_cxy_pair(i, core), address, "SMALL_READ_WRITE_TLB"); // Clear any written data + device.write_to_device(zeros.data(), zeros.size() * sizeof(std::uint32_t), tt_cxy_pair(i, core), address, "SMALL_READ_WRITE_TLB"); // Clear any written data device.wait_for_non_mmio_flush(); readback_vec = {}; } @@ -338,12 +338,12 @@ TEST(SiliconDriverWH, DynamicTLB_RW) { std::uint32_t address = l1_mem::address_map::NCRISC_FIRMWARE_BASE; for(int loop = 0; loop < 100; loop++){ // Write to each core a 100 times at different statically mapped addresses for(auto& core : device.get_virtual_soc_descriptors().at(i).workers) { - device.write_to_device(vector_to_write, tt_cxy_pair(i, core), address, "SMALL_READ_WRITE_TLB"); + device.write_to_device(vector_to_write.data(), vector_to_write.size() * sizeof(std::uint32_t), tt_cxy_pair(i, core), address, "SMALL_READ_WRITE_TLB"); device.wait_for_non_mmio_flush(); // Barrier to ensure that all writes over ethernet were commited - device.read_from_device(readback_vec, tt_cxy_pair(i, core), address, 40, "SMALL_READ_WRITE_TLB"); + test_utils::read_data_from_device(device, readback_vec, tt_cxy_pair(i, core), address, 40, "SMALL_READ_WRITE_TLB"); ASSERT_EQ(vector_to_write, readback_vec) << "Vector read back from core " << core.x << "-" << core.y << "does not match what was written"; device.wait_for_non_mmio_flush(); - device.write_to_device(zeros, tt_cxy_pair(i, core), address, "SMALL_READ_WRITE_TLB"); + device.write_to_device(zeros.data(), zeros.size() * sizeof(std::uint32_t), tt_cxy_pair(i, core), address, "SMALL_READ_WRITE_TLB"); device.wait_for_non_mmio_flush(); readback_vec = {}; } @@ -376,8 +376,8 @@ TEST(SiliconDriverWH, MultiThreadedDevice) { std::uint32_t address = l1_mem::address_map::NCRISC_FIRMWARE_BASE; for(int loop = 0; loop < 100; loop++) { for(auto& core : device.get_virtual_soc_descriptors().at(0).workers) { - device.write_to_device(vector_to_write, tt_cxy_pair(0, core), address, "SMALL_READ_WRITE_TLB"); - device.read_from_device(readback_vec, tt_cxy_pair(0, core), address, 40, "SMALL_READ_WRITE_TLB"); + device.write_to_device(vector_to_write.data(), vector_to_write.size() * sizeof(std::uint32_t), tt_cxy_pair(0, core), address, "SMALL_READ_WRITE_TLB"); + test_utils::read_data_from_device(device, readback_vec, tt_cxy_pair(0, core), address, 40, "SMALL_READ_WRITE_TLB"); ASSERT_EQ(vector_to_write, readback_vec) << "Vector read back from core " << core.x << "-" << core.y << "does not match what was written"; readback_vec = {}; } @@ -392,8 +392,8 @@ TEST(SiliconDriverWH, MultiThreadedDevice) { for(auto& core_ls : device.get_virtual_soc_descriptors().at(0).dram_cores) { for(int loop = 0; loop < 100; loop++) { for(auto& core : core_ls) { - device.write_to_device(vector_to_write, tt_cxy_pair(0, core), address, "SMALL_READ_WRITE_TLB"); - device.read_from_device(readback_vec, tt_cxy_pair(0, core), address, 40, "SMALL_READ_WRITE_TLB"); + device.write_to_device(vector_to_write.data(), vector_to_write.size() * sizeof(std::uint32_t), tt_cxy_pair(0, core), address, "SMALL_READ_WRITE_TLB"); + test_utils::read_data_from_device(device, readback_vec, tt_cxy_pair(0, core), address, 40, "SMALL_READ_WRITE_TLB"); ASSERT_EQ(vector_to_write, readback_vec) << "Vector read back from core " << core.x << "-" << core.y << "does not match what was written"; readback_vec = {}; } @@ -445,20 +445,20 @@ TEST(SiliconDriverWH, MultiThreadedMemBar) { std::vector readback_membar_vec = {}; for(auto& core : device.get_virtual_soc_descriptors().at(0).workers) { - device.read_from_device(readback_membar_vec, tt_cxy_pair(0, core), l1_mem::address_map::L1_BARRIER_BASE, 4, "SMALL_READ_WRITE_TLB"); + test_utils::read_data_from_device(device, readback_membar_vec, tt_cxy_pair(0, core), l1_mem::address_map::L1_BARRIER_BASE, 4, "SMALL_READ_WRITE_TLB"); ASSERT_EQ(readback_membar_vec.at(0), 187); // Ensure that memory barriers were correctly initialized on all workers readback_membar_vec = {}; } for(int chan = 0; chan < device.get_virtual_soc_descriptors().at(0).get_num_dram_channels(); chan++) { auto core = device.get_virtual_soc_descriptors().at(0).get_core_for_dram_channel(chan, 0); - device.read_from_device(readback_membar_vec, tt_cxy_pair(0, core), 0, 4, "SMALL_READ_WRITE_TLB"); + test_utils::read_data_from_device(device, readback_membar_vec, tt_cxy_pair(0, core), 0, 4, "SMALL_READ_WRITE_TLB"); ASSERT_EQ(readback_membar_vec.at(0), 187); // Ensure that memory barriers were correctly initialized on all DRAM readback_membar_vec = {}; } for(auto& core : device.get_virtual_soc_descriptors().at(0).ethernet_cores) { - device.read_from_device(readback_membar_vec, tt_cxy_pair(0, core), eth_l1_mem::address_map::ERISC_BARRIER_BASE, 4, "SMALL_READ_WRITE_TLB"); + test_utils::read_data_from_device(device, readback_membar_vec, tt_cxy_pair(0, core), eth_l1_mem::address_map::ERISC_BARRIER_BASE, 4, "SMALL_READ_WRITE_TLB"); ASSERT_EQ(readback_membar_vec.at(0), 187); // Ensure that memory barriers were correctly initialized on all ethernet cores readback_membar_vec = {}; } @@ -480,11 +480,11 @@ TEST(SiliconDriverWH, MultiThreadedMemBar) { for(int loop = 0; loop < 50; loop++) { for(auto& core : device.get_virtual_soc_descriptors().at(0).workers) { std::vector readback_vec = {}; - device.write_to_device(vec1, tt_cxy_pair(0, core), address, ""); + device.write_to_device(vec1.data(), vec1.size() * sizeof(std::uint32_t), tt_cxy_pair(0, core), address, ""); device.l1_membar(0, "SMALL_READ_WRITE_TLB", {core}); - device.read_from_device(readback_vec, tt_cxy_pair(0, core), address, 4*vec1.size(), ""); + test_utils::read_data_from_device(device, readback_vec, tt_cxy_pair(0, core), address, 4*vec1.size(), ""); ASSERT_EQ(readback_vec, vec1); - device.write_to_device(zeros, tt_cxy_pair(0, core), address, ""); + device.write_to_device(zeros.data(), zeros.size() * sizeof(std::uint32_t), tt_cxy_pair(0, core), address, ""); readback_vec = {}; } @@ -496,11 +496,11 @@ TEST(SiliconDriverWH, MultiThreadedMemBar) { for(int loop = 0; loop < 50; loop++) { for(auto& core : device.get_virtual_soc_descriptors().at(0).workers) { std::vector readback_vec = {}; - device.write_to_device(vec2, tt_cxy_pair(0, core), address, ""); + device.write_to_device(vec2.data(), vec2.size() * sizeof(std::uint32_t), tt_cxy_pair(0, core), address, ""); device.l1_membar(0, "SMALL_READ_WRITE_TLB", {core}); - device.read_from_device(readback_vec, tt_cxy_pair(0, core), address, 4*vec2.size(), ""); + test_utils::read_data_from_device(device, readback_vec, tt_cxy_pair(0, core), address, 4*vec2.size(), ""); ASSERT_EQ(readback_vec, vec2); - device.write_to_device(zeros, tt_cxy_pair(0, core), address, "") ; + device.write_to_device(zeros.data(), zeros.size() * sizeof(std::uint32_t), tt_cxy_pair(0, core), address, "") ; readback_vec = {}; } } @@ -510,13 +510,13 @@ TEST(SiliconDriverWH, MultiThreadedMemBar) { th2.join(); for(auto& core : device.get_virtual_soc_descriptors().at(0).workers) { - device.read_from_device(readback_membar_vec, tt_cxy_pair(0, core), l1_mem::address_map::L1_BARRIER_BASE, 4, "SMALL_READ_WRITE_TLB"); + test_utils::read_data_from_device(device, readback_membar_vec, tt_cxy_pair(0, core), l1_mem::address_map::L1_BARRIER_BASE, 4, "SMALL_READ_WRITE_TLB"); ASSERT_EQ(readback_membar_vec.at(0), 187); // Ensure that memory barriers end up in the correct sate for workers readback_membar_vec = {}; } for(auto& core : device.get_virtual_soc_descriptors().at(0).ethernet_cores) { - device.read_from_device(readback_membar_vec, tt_cxy_pair(0, core), eth_l1_mem::address_map::ERISC_BARRIER_BASE, 4, "SMALL_READ_WRITE_TLB"); + test_utils::read_data_from_device(device, readback_membar_vec, tt_cxy_pair(0, core), eth_l1_mem::address_map::ERISC_BARRIER_BASE, 4, "SMALL_READ_WRITE_TLB"); ASSERT_EQ(readback_membar_vec.at(0), 187); // Ensure that memory barriers end up in the correct sate for ethernet cores readback_membar_vec = {}; } @@ -562,16 +562,16 @@ TEST(SiliconDriverWH, BroadcastWrite) { for(const auto i : target_devices) { for(const auto& core : device.get_virtual_soc_descriptors().at(i).workers) { if(rows_to_exclude.find(core.y) != rows_to_exclude.end()) continue; - device.read_from_device(readback_vec, tt_cxy_pair(i, core), address, vector_to_write.size() * 4, "LARGE_READ_TLB"); + test_utils::read_data_from_device(device, readback_vec, tt_cxy_pair(i, core), address, vector_to_write.size() * 4, "LARGE_READ_TLB"); ASSERT_EQ(vector_to_write, readback_vec) << "Vector read back from core " << core.x << "-" << core.y << "does not match what was broadcasted"; - device.write_to_device(zeros, tt_cxy_pair(i, core), address, "LARGE_WRITE_TLB"); // Clear any written data + device.write_to_device(zeros.data(), zeros.size() * sizeof(std::uint32_t), tt_cxy_pair(i, core), address, "LARGE_WRITE_TLB"); // Clear any written data readback_vec = {}; } for(int chan = 0; chan < device.get_virtual_soc_descriptors().at(i).get_num_dram_channels(); chan++) { const auto& core = device.get_virtual_soc_descriptors().at(i).get_core_for_dram_channel(chan, 0); - device.read_from_device(readback_vec, tt_cxy_pair(i, core), address, vector_to_write.size() * 4, "LARGE_READ_TLB"); + test_utils::read_data_from_device(device, readback_vec, tt_cxy_pair(i, core), address, vector_to_write.size() * 4, "LARGE_READ_TLB"); ASSERT_EQ(vector_to_write, readback_vec) << "Vector read back from DRAM core " << i << " " << core.x << "-" << core.y << " does not match what was broadcasted " << size; - device.write_to_device(zeros, tt_cxy_pair(i, core), address, "LARGE_WRITE_TLB"); // Clear any written data + device.write_to_device(zeros.data(), zeros.size() * sizeof(std::uint32_t), tt_cxy_pair(i, core), address, "LARGE_WRITE_TLB"); // Clear any written data readback_vec = {}; } } @@ -626,16 +626,16 @@ TEST(SiliconDriverWH, VirtualCoordinateBroadcast) { for(const auto i : target_devices) { for(const auto& core : device.get_virtual_soc_descriptors().at(i).workers) { if(rows_to_exclude.find(core.y) != rows_to_exclude.end()) continue; - device.read_from_device(readback_vec, tt_cxy_pair(i, core), address, vector_to_write.size() * 4, "LARGE_READ_TLB"); + test_utils::read_data_from_device(device, readback_vec, tt_cxy_pair(i, core), address, vector_to_write.size() * 4, "LARGE_READ_TLB"); ASSERT_EQ(vector_to_write, readback_vec) << "Vector read back from core " << core.x << "-" << core.y << "does not match what was broadcasted"; - device.write_to_device(zeros, tt_cxy_pair(i, core), address, "LARGE_WRITE_TLB"); // Clear any written data + device.write_to_device(zeros.data(), zeros.size() * sizeof(std::uint32_t), tt_cxy_pair(i, core), address, "LARGE_WRITE_TLB"); // Clear any written data readback_vec = {}; } for(int chan = 0; chan < device.get_virtual_soc_descriptors().at(i).get_num_dram_channels(); chan++) { const auto& core = device.get_virtual_soc_descriptors().at(i).get_core_for_dram_channel(chan, 0); - device.read_from_device(readback_vec, tt_cxy_pair(i, core), address, vector_to_write.size() * 4, "LARGE_READ_TLB"); + test_utils::read_data_from_device(device, readback_vec, tt_cxy_pair(i, core), address, vector_to_write.size() * 4, "LARGE_READ_TLB"); ASSERT_EQ(vector_to_write, readback_vec) << "Vector read back from DRAM core " << i << " " << core.x << "-" << core.y << " does not match what was broadcasted " << size; - device.write_to_device(zeros, tt_cxy_pair(i, core), address, "LARGE_WRITE_TLB"); // Clear any written data + device.write_to_device(zeros.data(), zeros.size() * sizeof(std::uint32_t), tt_cxy_pair(i, core), address, "LARGE_WRITE_TLB"); // Clear any written data readback_vec = {}; } } From 08c2f5e1f320691636a63f02617267c30d46c622 Mon Sep 17 00:00:00 2001 From: pjanevski Date: Thu, 26 Sep 2024 09:45:20 +0000 Subject: [PATCH 2/3] Fix CI --- device/tt_silicon_driver.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/device/tt_silicon_driver.cpp b/device/tt_silicon_driver.cpp index 851a2d9c..04d80a52 100644 --- a/device/tt_silicon_driver.cpp +++ b/device/tt_silicon_driver.cpp @@ -2243,6 +2243,7 @@ void tt_SiliconDevice::read_from_non_mmio_device(void* mem_ptr, tt_cxy_pair core } else { // Read 4 byte aligned block from device/sysmem if (use_dram) { + size_buffer_to_capacity(data_block, block_size); read_from_sysmem(data_block.data(), host_dram_block_addr, host_dram_channel, block_size, mmio_capable_chip_logical); } else { uint32_t buf_address = eth_interface_params.eth_routing_data_buffer_addr + resp_rd_ptr * max_block_size; From 8234631486e9492bfbddadeee03197a5a65face3 Mon Sep 17 00:00:00 2001 From: pjanevski Date: Thu, 26 Sep 2024 13:31:41 +0000 Subject: [PATCH 3/3] Fix BH CI --- tests/blackhole/test_silicon_driver_bh.cpp | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/tests/blackhole/test_silicon_driver_bh.cpp b/tests/blackhole/test_silicon_driver_bh.cpp index daf9c526..a2545324 100644 --- a/tests/blackhole/test_silicon_driver_bh.cpp +++ b/tests/blackhole/test_silicon_driver_bh.cpp @@ -257,7 +257,7 @@ TEST(SiliconDriverBH, UnalignedStaticTLB_RW) { for(auto& core : device.get_virtual_soc_descriptors().at(i).workers) { device.write_to_device(write_vec.data(), size, tt_cxy_pair(i, core), address, ""); device.wait_for_non_mmio_flush(); - test_utils::read_data_from_device(device, readback_vec, tt_cxy_pair(i, core), address, size, ""); + device.read_from_device(readback_vec.data(), tt_cxy_pair(i, core), address, size, ""); ASSERT_EQ(readback_vec, write_vec); readback_vec = std::vector(size, 0); device.write_to_sysmem(write_vec.data(), size, 0, 0, 0); @@ -274,7 +274,6 @@ TEST(SiliconDriverBH, UnalignedStaticTLB_RW) { device.close_device(); } - TEST(SiliconDriverBH, StaticTLB_RW) { auto get_static_tlb_index_callback = [] (tt_xy_pair target) { return get_static_tlb_index(target);