From d5bba47d4cdc28fd70130e6fc6f709d36956c7a8 Mon Sep 17 00:00:00 2001 From: Bojan Rosko Date: Fri, 3 Jan 2025 15:38:55 +0000 Subject: [PATCH] init --- device/api/umd/device/tt_soc_descriptor.h | 59 +++++--------------- device/cluster.cpp | 53 ++++++++++-------- tests/api/test_chip.cpp | 14 ++--- tests/blackhole/test_cluster_bh.cpp | 47 ++++++++-------- tests/galaxy/test_umd_concurrent_threads.cpp | 20 +++---- tests/galaxy/test_umd_remote_api.cpp | 8 +-- tests/grayskull/test_cluster_gs.cpp | 33 +++++------ tests/microbenchmark/device_fixture.hpp | 2 +- tests/microbenchmark/test_rw_tensix.cpp | 2 +- tests/test_utils/stimulus_generators.hpp | 2 +- tests/wormhole/test_cluster_wh.cpp | 47 ++++++++-------- 11 files changed, 135 insertions(+), 152 deletions(-) diff --git a/device/api/umd/device/tt_soc_descriptor.h b/device/api/umd/device/tt_soc_descriptor.h index 70416e42..73fdc590 100644 --- a/device/api/umd/device/tt_soc_descriptor.h +++ b/device/api/umd/device/tt_soc_descriptor.h @@ -55,40 +55,6 @@ class tt_SocDescriptor { const size_t dram_harvesting_mask = 0, const size_t eth_harvesting_mask = 0); - // Copy constructor - tt_SocDescriptor(const tt_SocDescriptor &other) : - arch(other.arch), - grid_size(other.grid_size), - worker_grid_size(other.worker_grid_size), - cores(other.cores), - arc_cores(other.arc_cores), - workers(other.workers), - harvested_workers(other.harvested_workers), - pcie_cores(other.pcie_cores), - worker_log_to_routing_x(other.worker_log_to_routing_x), - worker_log_to_routing_y(other.worker_log_to_routing_y), - routing_x_to_worker_x(other.routing_x_to_worker_x), - routing_y_to_worker_y(other.routing_y_to_worker_y), - dram_cores(other.dram_cores), - dram_core_channel_map(other.dram_core_channel_map), - ethernet_cores(other.ethernet_cores), - ethernet_core_channel_map(other.ethernet_core_channel_map), - trisc_sizes(other.trisc_sizes), - device_descriptor_file_path(other.device_descriptor_file_path), - overlay_version(other.overlay_version), - unpacker_version(other.unpacker_version), - dst_size_alignment(other.dst_size_alignment), - packer_version(other.packer_version), - worker_l1_size(other.worker_l1_size), - eth_l1_size(other.eth_l1_size), - noc_translation_id_enabled(other.noc_translation_id_enabled), - dram_bank_size(other.dram_bank_size), - coordinate_manager(other.coordinate_manager), - cores_map(other.cores_map), - grid_size_map(other.grid_size_map), - harvested_cores_map(other.harvested_cores_map), - harvested_grid_size_map(other.harvested_grid_size_map) {} - // CoreCoord conversions. tt::umd::CoreCoord translate_coord_to(const tt::umd::CoreCoord core_coord, const CoordSystem coord_system) const; @@ -114,21 +80,11 @@ class tt_SocDescriptor { bool is_ethernet_core(const tt_xy_pair &core) const; tt::ARCH arch; - tt_xy_pair grid_size; - tt_xy_pair worker_grid_size; std::unordered_map cores; - std::vector arc_cores; - std::vector workers; - std::vector harvested_workers; - std::vector pcie_cores; std::unordered_map worker_log_to_routing_x; std::unordered_map worker_log_to_routing_y; std::unordered_map routing_x_to_worker_x; std::unordered_map routing_y_to_worker_y; - std::vector> dram_cores; // per channel list of dram cores - std::unordered_map> dram_core_channel_map; // map dram core to chan/subchan - std::vector ethernet_cores; // ethernet cores (index == channel id) - std::unordered_map ethernet_core_channel_map; std::vector trisc_sizes; // Most of software stack assumes same trisc size for whole chip.. std::string device_descriptor_file_path = std::string(""); @@ -141,6 +97,11 @@ class tt_SocDescriptor { bool noc_translation_id_enabled; uint64_t dram_bank_size; + // TODO: Move to private when remove_worker_row_from_descriptor is removed. + tt_xy_pair worker_grid_size; + std::vector workers; + std::vector harvested_workers; + private: void create_coordinate_manager( const size_t tensix_harvesting_mask, const size_t dram_harvesting_mask, const size_t eth_harvesting_mask); @@ -163,6 +124,16 @@ class tt_SocDescriptor { // has multiple NOC endpoints, so some UMD clients prefer vector of vectors returned. std::vector> dram_cores_core_coord; std::vector> harvested_dram_cores_core_coord; + + // Internal structures filled from soc descriptor yaml + tt_xy_pair grid_size; + std::vector arc_cores; + std::vector pcie_cores; + std::vector> dram_cores; // per channel list of dram cores + std::unordered_map> dram_core_channel_map; // map dram core to chan/subchan + std::vector ethernet_cores; // ethernet cores (index == channel id) + std::unordered_map ethernet_core_channel_map; + std::vector router_cores; }; // Allocates a new soc descriptor on the heap. Returns an owning pointer. diff --git a/device/cluster.cpp b/device/cluster.cpp index 35487ae4..47cfed1c 100644 --- a/device/cluster.cpp +++ b/device/cluster.cpp @@ -421,7 +421,9 @@ void Cluster::construct_cluster( } remote_transfer_ethernet_cores.at(logical_mmio_chip_id) .push_back(tt_cxy_pair( - logical_mmio_chip_id, soc_desc.ethernet_cores.at(i).x, soc_desc.ethernet_cores.at(i).y)); + logical_mmio_chip_id, + soc_desc.get_cores(CoreType::ETH).at(i).x, + soc_desc.get_cores(CoreType::ETH).at(i).y)); } } } @@ -618,7 +620,7 @@ void Cluster::configure_active_ethernet_cores_for_mmio_device( get_soc_descriptor(mmio_chip).arch == tt::ARCH::WORMHOLE_B0, "{} can only be called for Wormhole arch", __FUNCTION__); - auto& eth_cores = get_soc_descriptor(mmio_chip).ethernet_cores; + auto eth_cores = get_soc_descriptor(mmio_chip).get_cores(CoreType::ETH); // Cores 0, 1, 6, 7 are only available if in the active set static std::unordered_set eth_cores_available_if_active = { eth_cores.at(0), eth_cores.at(1), eth_cores.at(6), eth_cores.at(7)}; @@ -652,9 +654,12 @@ void Cluster::populate_cores() { for (const auto& [chip_id, chip] : chips_) { auto& soc_desc = chip->get_soc_descriptor(); workers_per_chip.insert( - {chip_id, std::unordered_set(soc_desc.workers.begin(), soc_desc.workers.end())}); + {chip_id, + std::unordered_set( + soc_desc.get_cores(CoreType::TENSIX).begin(), soc_desc.get_cores(CoreType::TENSIX).end())}); if (count == 0) { - eth_cores = std::unordered_set(soc_desc.ethernet_cores.begin(), soc_desc.ethernet_cores.end()); + eth_cores = std::unordered_set( + soc_desc.get_cores(CoreType::ETH).begin(), soc_desc.get_cores(CoreType::ETH).end()); for (std::uint32_t dram_idx = 0; dram_idx < soc_desc.get_num_dram_channels(); dram_idx++) { dram_cores.insert(soc_desc.get_core_for_dram_channel(dram_idx, 0)); } @@ -689,6 +694,7 @@ std::vector Cluster::extract_rows_to_remove( return row_coordinates_to_remove; } +// TODO: This will be removed very soon. void Cluster::remove_worker_row_from_descriptor( tt_SocDescriptor& full_soc_descriptor, const std::vector& row_coordinates_to_remove) { std::vector workers_to_keep; @@ -720,10 +726,11 @@ void Cluster::remove_worker_row_from_descriptor( } void Cluster::harvest_rows_in_soc_descriptor(tt::ARCH arch, tt_SocDescriptor& sdesc, uint32_t harvested_rows) { - std::uint32_t max_row_to_remove = - (*std::max_element((sdesc.workers).begin(), (sdesc.workers).end(), [](const auto& a, const auto& b) { - return a.y < b.y; - })).y; + std::uint32_t max_row_to_remove = (*std::max_element( + (sdesc.get_cores(CoreType::TENSIX)).begin(), + (sdesc.get_cores(CoreType::TENSIX)).end(), + [](const auto& a, const auto& b) { return a.y < b.y; })) + .y; std::vector row_coordinates_to_remove = extract_rows_to_remove(arch, max_row_to_remove, harvested_rows); remove_worker_row_from_descriptor(sdesc, row_coordinates_to_remove); } @@ -944,12 +951,13 @@ void Cluster::deassert_risc_reset_at_core(tt_cxy_pair core, const TensixSoftRese std::uint32_t target_device = core.chip; log_assert( std::find( - get_soc_descriptor(target_device).workers.begin(), get_soc_descriptor(target_device).workers.end(), core) != - get_soc_descriptor(target_device).workers.end() || + get_soc_descriptor(target_device).get_cores(CoreType::TENSIX).begin(), + get_soc_descriptor(target_device).get_cores(CoreType::TENSIX).end(), + core) != get_soc_descriptor(target_device).get_cores(CoreType::TENSIX).end() || std::find( - get_soc_descriptor(target_device).ethernet_cores.begin(), - get_soc_descriptor(target_device).ethernet_cores.end(), - core) != get_soc_descriptor(target_device).ethernet_cores.end(), + get_soc_descriptor(target_device).get_cores(CoreType::ETH).begin(), + get_soc_descriptor(target_device).get_cores(CoreType::ETH).end(), + core) != get_soc_descriptor(target_device).get_cores(CoreType::ETH).end(), "Cannot deassert reset on a non-tensix or harvested core"); bool target_is_mmio_capable = cluster_desc->is_chip_mmio_capable(target_device); if (target_is_mmio_capable) { @@ -971,12 +979,13 @@ void Cluster::assert_risc_reset_at_core(tt_cxy_pair core, const TensixSoftResetO std::uint32_t target_device = core.chip; log_assert( std::find( - get_soc_descriptor(target_device).workers.begin(), get_soc_descriptor(target_device).workers.end(), core) != - get_soc_descriptor(target_device).workers.end() || + get_soc_descriptor(target_device).get_cores(CoreType::TENSIX).begin(), + get_soc_descriptor(target_device).get_cores(CoreType::TENSIX).end(), + core) != get_soc_descriptor(target_device).get_cores(CoreType::TENSIX).end() || std::find( - get_soc_descriptor(target_device).ethernet_cores.begin(), - get_soc_descriptor(target_device).ethernet_cores.end(), - core) != get_soc_descriptor(target_device).ethernet_cores.end(), + get_soc_descriptor(target_device).get_cores(CoreType::ETH).begin(), + get_soc_descriptor(target_device).get_cores(CoreType::ETH).end(), + core) != get_soc_descriptor(target_device).get_cores(CoreType::ETH).end(), "Cannot assert reset on a non-tensix or harvested core"); bool target_is_mmio_capable = cluster_desc->is_chip_mmio_capable(target_device); if (target_is_mmio_capable) { @@ -2714,7 +2723,7 @@ int Cluster::remote_arc_msg( constexpr uint64_t ARC_RESET_SCRATCH_ADDR = 0x880030060; constexpr uint64_t ARC_RESET_MISC_CNTL_ADDR = 0x880030100; - auto core = tt_cxy_pair(chip, get_soc_descriptor(chip).arc_cores.at(0)); + auto core = tt_cxy_pair(chip, get_soc_descriptor(chip).get_cores(CoreType::ARC).at(0)); if ((msg_code & 0xff00) != 0xaa00) { log_error("Malformed message. msg_code is 0x{:x} but should be 0xaa..", msg_code); @@ -2957,7 +2966,7 @@ void Cluster::write_to_device( } else { log_assert(arch_name != tt::ARCH::BLACKHOLE, "Non-MMIO targets not supported in Blackhole"); log_assert( - (get_soc_descriptor(core.chip).ethernet_cores).size() > 0 && chips_.size() > 1, + (get_soc_descriptor(core.chip).get_cores(CoreType::ETH)).size() > 0 && chips_.size() > 1, "Cannot issue ethernet writes to a single chip cluster!"); write_to_non_mmio_device(mem_ptr, size, core, addr); } @@ -3027,7 +3036,7 @@ void Cluster::read_from_device( arch_name != tt::ARCH::BLACKHOLE, "Non-MMIO targets not supported in Blackhole"); // MT: Use only dynamic TLBs and never program static log_assert( - (get_soc_descriptor(core.chip).ethernet_cores).size() > 0 && chips_.size() > 1, + (get_soc_descriptor(core.chip).get_cores(CoreType::ETH)).size() > 0 && chips_.size() > 1, "Cannot issue ethernet reads from a single chip cluster!"); read_from_non_mmio_device(mem_ptr, core, addr, size); } @@ -3208,7 +3217,7 @@ void Cluster::verify_eth_fw() { for (const auto& chip : all_chip_ids_) { uint32_t fw_version; std::vector fw_versions; - for (const tt_xy_pair& eth_core : get_soc_descriptor(chip).ethernet_cores) { + for (const tt_xy_pair& eth_core : get_soc_descriptor(chip).get_cores(CoreType::ETH)) { read_from_device( &fw_version, tt_cxy_pair(chip, eth_core), diff --git a/tests/api/test_chip.cpp b/tests/api/test_chip.cpp index 105376ba..fa060b28 100644 --- a/tests/api/test_chip.cpp +++ b/tests/api/test_chip.cpp @@ -24,7 +24,7 @@ using namespace tt::umd; inline tt_cxy_pair get_tensix_chip_core_coord(const std::unique_ptr& umd_cluster) { chip_id_t any_mmio_chip = *umd_cluster->get_target_mmio_device_ids().begin(); const tt_SocDescriptor& soc_desc = umd_cluster->get_soc_descriptor(any_mmio_chip); - tt_xy_pair core = soc_desc.workers[0]; + tt_xy_pair core = soc_desc.get_cores(CoreType::TENSIX)[0]; return tt_cxy_pair(any_mmio_chip, core); } @@ -50,14 +50,14 @@ TEST(ApiChipTest, ManualTLBConfiguration) { if (!remote_chips.empty()) { chip_id_t any_remote_chip = *remote_chips.begin(); const tt_SocDescriptor& soc_desc = umd_cluster->get_soc_descriptor(any_remote_chip); - tt_xy_pair core = soc_desc.workers[0]; + tt_xy_pair core = soc_desc.get_cores(CoreType::TENSIX)[0]; EXPECT_THROW(umd_cluster->get_static_tlb_writer(tt_cxy_pair(any_remote_chip, core)), std::runtime_error); } // Expect to throw for non configured mmio chip. chip_id_t any_mmio_chip = *umd_cluster->get_target_mmio_device_ids().begin(); const tt_SocDescriptor& soc_desc = umd_cluster->get_soc_descriptor(any_mmio_chip); - tt_xy_pair core = soc_desc.workers[0]; + tt_xy_pair core = soc_desc.get_cores(CoreType::TENSIX)[0]; EXPECT_THROW(umd_cluster->get_static_tlb_writer(tt_cxy_pair(any_mmio_chip, core)), std::runtime_error); // TODO: This should be part of TTDevice interface, not Cluster or Chip. @@ -77,7 +77,7 @@ TEST(ApiChipTest, ManualTLBConfiguration) { // Each MMIO chip has it's own set of TLBs, so needs its own configuration. for (chip_id_t mmio_chip : umd_cluster->get_target_mmio_device_ids()) { const tt_SocDescriptor& soc_desc = umd_cluster->get_soc_descriptor(mmio_chip); - for (tt_xy_pair core : soc_desc.workers) { + for (tt_xy_pair core : soc_desc.get_cores(CoreType::TENSIX)) { umd_cluster->configure_tlb(mmio_chip, core, get_static_tlb_index(core), c_zero_address); } } @@ -86,10 +86,10 @@ TEST(ApiChipTest, ManualTLBConfiguration) { EXPECT_NO_THROW(umd_cluster->get_static_tlb_writer(tt_cxy_pair(any_mmio_chip, core))); // Expect to throw for non worker cores. - tt_xy_pair dram_core = soc_desc.dram_cores[0][0]; + tt_xy_pair dram_core = soc_desc.get_dram_cores()[0][0]; EXPECT_THROW(umd_cluster->get_static_tlb_writer(tt_cxy_pair(any_mmio_chip, dram_core)), std::runtime_error); - if (!soc_desc.ethernet_cores.empty()) { - tt_xy_pair eth_core = soc_desc.ethernet_cores[0]; + if (!soc_desc.get_cores(CoreType::ETH).empty()) { + tt_xy_pair eth_core = soc_desc.get_cores(CoreType::ETH)[0]; EXPECT_THROW(umd_cluster->get_static_tlb_writer(tt_cxy_pair(any_mmio_chip, eth_core)), std::runtime_error); } } diff --git a/tests/blackhole/test_cluster_bh.cpp b/tests/blackhole/test_cluster_bh.cpp index d3af9fc4..9d18e61b 100644 --- a/tests/blackhole/test_cluster_bh.cpp +++ b/tests/blackhole/test_cluster_bh.cpp @@ -125,7 +125,7 @@ TEST(SiliconDriverBH, CreateDestroy) { // ASSERT_EQ(cluster.using_harvested_soc_descriptors(), true) << "Expected Driver to have performed harvesting"; // for (const auto& chip : sdesc_per_chip) { -// ASSERT_EQ(chip.second.workers.size(), 48) +// ASSERT_EQ(chip.second.get_cores(CoreType::TENSIX).size(), 48) // << "Expected SOC descriptor with harvesting to have 48 workers for chip" << chip.first; // } // ASSERT_EQ(cluster.get_harvesting_masks_for_soc_descriptors().at(0), 30) @@ -161,7 +161,8 @@ TEST(SiliconDriverBH, CreateDestroy) { // ASSERT_EQ(cluster.using_harvested_soc_descriptors(), false) // << "SOC descriptors should not be modified when harvesting is disabled"; // for (const auto& chip : sdesc_per_chip) { -// ASSERT_EQ(chip.second.workers.size(), 1) << "Expected 1x1 SOC descriptor to be unmodified by driver"; +// ASSERT_EQ(chip.second.get_cores(CoreType::TENSIX).size(), 1) << "Expected 1x1 SOC descriptor to be unmodified +// by driver"; // } // } @@ -196,7 +197,7 @@ TEST(SiliconDriverBH, CreateDestroy) { // // Iterate over MMIO devices and only setup static TLBs for worker cores // if (std::find(mmio_devices.begin(), mmio_devices.end(), i) != mmio_devices.end()) { // auto& sdesc = cluster.get_soc_descriptor(i); -// for (auto& core : sdesc.workers) { +// for (auto& core : sdesc.get_cores(CoreType::TENSIX)) { // // Statically mapping a 1MB TLB to this core, starting from address NCRISC_FIRMWARE_BASE. // cluster.configure_tlb( // i, core, get_static_tlb_index_callback(core), l1_mem::address_map::NCRISC_FIRMWARE_BASE); @@ -217,7 +218,7 @@ TEST(SiliconDriverBH, CreateDestroy) { // std::uint32_t dynamic_write_address = 0x40000000; // for (int loop = 0; loop < 100; // loop++) { // Write to each core a 100 times at different statically mapped addresses -// for (auto& core : cluster.get_soc_descriptor(i).workers) { +// for (auto& core : cluster.get_soc_descriptor(i).get_cores(CoreType::TENSIX)) { // cluster.write_to_device( // vector_to_write.data(), // vector_to_write.size() * sizeof(std::uint32_t), @@ -284,7 +285,7 @@ TEST(SiliconDriverBH, UnalignedStaticTLB_RW) { // Iterate over MMIO devices and only setup static TLBs for worker cores if (std::find(mmio_devices.begin(), mmio_devices.end(), i) != mmio_devices.end()) { auto& sdesc = cluster.get_soc_descriptor(i); - for (auto& core : sdesc.workers) { + for (auto& core : sdesc.get_cores(CoreType::TENSIX)) { // Statically mapping a 1MB TLB to this core, starting from address NCRISC_FIRMWARE_BASE. cluster.configure_tlb( i, core, get_static_tlb_index_callback(core), l1_mem::address_map::NCRISC_FIRMWARE_BASE); @@ -305,7 +306,7 @@ TEST(SiliconDriverBH, UnalignedStaticTLB_RW) { std::vector readback_vec(size, 0); std::uint32_t address = l1_mem::address_map::NCRISC_FIRMWARE_BASE; for (int loop = 0; loop < 50; loop++) { - for (auto& core : cluster.get_soc_descriptor(i).workers) { + for (auto& core : cluster.get_soc_descriptor(i).get_cores(CoreType::TENSIX)) { cluster.write_to_device(write_vec.data(), size, tt_cxy_pair(i, core), address, ""); cluster.wait_for_non_mmio_flush(); cluster.read_from_device(readback_vec.data(), tt_cxy_pair(i, core), address, size, ""); @@ -339,7 +340,7 @@ TEST(SiliconDriverBH, StaticTLB_RW) { // Iterate over MMIO devices and only setup static TLBs for worker cores if (std::find(mmio_devices.begin(), mmio_devices.end(), i) != mmio_devices.end()) { auto& sdesc = cluster.get_soc_descriptor(i); - for (auto& core : sdesc.workers) { + for (auto& core : sdesc.get_cores(CoreType::TENSIX)) { // Statically mapping a 2MB TLB to this core, starting from address NCRISC_FIRMWARE_BASE. cluster.configure_tlb( i, core, get_static_tlb_index_callback(core), l1_mem::address_map::NCRISC_FIRMWARE_BASE); @@ -360,7 +361,7 @@ TEST(SiliconDriverBH, StaticTLB_RW) { std::uint32_t address = l1_mem::address_map::NCRISC_FIRMWARE_BASE; for (int loop = 0; loop < 1; loop++) { // Write to each core a 100 times at different statically mapped addresses - for (auto& core : cluster.get_soc_descriptor(i).workers) { + for (auto& core : cluster.get_soc_descriptor(i).get_cores(CoreType::TENSIX)) { cluster.write_to_device( vector_to_write.data(), vector_to_write.size() * sizeof(std::uint32_t), @@ -408,7 +409,7 @@ TEST(SiliconDriverBH, DynamicTLB_RW) { std::uint32_t address = l1_mem::address_map::NCRISC_FIRMWARE_BASE; for (int loop = 0; loop < 100; loop++) { // Write to each core a 100 times at different statically mapped addresses - for (auto& core : cluster.get_soc_descriptor(i).workers) { + for (auto& core : cluster.get_soc_descriptor(i).get_cores(CoreType::TENSIX)) { cluster.write_to_device( vector_to_write.data(), vector_to_write.size() * sizeof(std::uint32_t), @@ -443,7 +444,7 @@ TEST(SiliconDriverBH, DynamicTLB_RW) { for (int loop = 0; loop < 100; loop++) { // Write to each core a 100 times at different statically mapped addresses for (int ch = 0; ch < NUM_CHANNELS; ch++) { - std::vector chan = cluster.get_soc_descriptor(i).dram_cores.at(ch); + std::vector chan = cluster.get_soc_descriptor(i).get_dram_cores().at(ch); tt_xy_pair subchan = chan.at(0); cluster.write_to_device( vector_to_write.data(), @@ -493,7 +494,7 @@ TEST(SiliconDriverBH, MultiThreadedDevice) { std::vector readback_vec = {}; std::uint32_t address = l1_mem::address_map::NCRISC_FIRMWARE_BASE; for (int loop = 0; loop < 100; loop++) { - for (auto& core : cluster.get_soc_descriptor(0).workers) { + for (auto& core : cluster.get_soc_descriptor(0).get_cores(CoreType::TENSIX)) { cluster.write_to_device( vector_to_write.data(), vector_to_write.size() * sizeof(std::uint32_t), @@ -514,7 +515,7 @@ TEST(SiliconDriverBH, MultiThreadedDevice) { std::vector vector_to_write = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9}; std::vector readback_vec = {}; std::uint32_t address = 0x30000000; - for (auto& core_ls : cluster.get_soc_descriptor(0).dram_cores) { + for (auto& core_ls : cluster.get_soc_descriptor(0).get_dram_cores()) { for (int loop = 0; loop < 100; loop++) { for (auto& core : core_ls) { cluster.write_to_device( @@ -556,7 +557,7 @@ TEST(SiliconDriverBH, MultiThreadedMemBar) { for (int i = 0; i < target_devices.size(); i++) { // Iterate over devices and only setup static TLBs for functional worker cores auto& sdesc = cluster.get_soc_descriptor(i); - for (auto& core : sdesc.workers) { + for (auto& core : sdesc.get_cores(CoreType::TENSIX)) { // Statically mapping a 1MB TLB to this core, starting from address DATA_BUFFER_SPACE_BASE. cluster.configure_tlb(i, core, get_static_tlb_index_callback(core), base_addr); } @@ -566,7 +567,7 @@ TEST(SiliconDriverBH, MultiThreadedMemBar) { cluster.start_device(default_params); std::vector readback_membar_vec = {}; - for (auto& core : cluster.get_soc_descriptor(0).workers) { + for (auto& core : cluster.get_soc_descriptor(0).get_cores(CoreType::TENSIX)) { test_utils::read_data_from_device( cluster, readback_membar_vec, @@ -588,7 +589,7 @@ TEST(SiliconDriverBH, MultiThreadedMemBar) { readback_membar_vec = {}; } - for (auto& core : cluster.get_soc_descriptor(0).ethernet_cores) { + for (auto& core : cluster.get_soc_descriptor(0).get_cores(CoreType::ETH)) { test_utils::read_data_from_device( cluster, readback_membar_vec, @@ -617,7 +618,7 @@ TEST(SiliconDriverBH, MultiThreadedMemBar) { std::thread th1 = std::thread([&] { std::uint32_t address = base_addr; for (int loop = 0; loop < 50; loop++) { - for (auto& core : cluster.get_soc_descriptor(0).workers) { + for (auto& core : cluster.get_soc_descriptor(0).get_cores(CoreType::TENSIX)) { std::vector readback_vec = {}; cluster.write_to_device( vec1.data(), vec1.size() * sizeof(std::uint32_t), tt_cxy_pair(0, core), address, ""); @@ -635,7 +636,7 @@ TEST(SiliconDriverBH, MultiThreadedMemBar) { std::thread th2 = std::thread([&] { std::uint32_t address = base_addr + vec1.size() * 4; for (int loop = 0; loop < 50; loop++) { - for (auto& core : cluster.get_soc_descriptor(0).workers) { + for (auto& core : cluster.get_soc_descriptor(0).get_cores(CoreType::TENSIX)) { std::vector readback_vec = {}; cluster.write_to_device( vec2.data(), vec2.size() * sizeof(std::uint32_t), tt_cxy_pair(0, core), address, ""); @@ -653,7 +654,7 @@ TEST(SiliconDriverBH, MultiThreadedMemBar) { th1.join(); th2.join(); - for (auto& core : cluster.get_soc_descriptor(0).workers) { + for (auto& core : cluster.get_soc_descriptor(0).get_cores(CoreType::TENSIX)) { test_utils::read_data_from_device( cluster, readback_membar_vec, @@ -666,7 +667,7 @@ TEST(SiliconDriverBH, MultiThreadedMemBar) { readback_membar_vec = {}; } - for (auto& core : cluster.get_soc_descriptor(0).ethernet_cores) { + for (auto& core : cluster.get_soc_descriptor(0).get_cores(CoreType::ETH)) { test_utils::read_data_from_device( cluster, readback_membar_vec, @@ -732,7 +733,7 @@ TEST(SiliconDriverBH, DISABLED_BroadcastWrite) { // Cannot broadcast to tensix/ cluster.wait_for_non_mmio_flush(); for (const auto i : target_devices) { - for (const auto& core : cluster.get_soc_descriptor(i).workers) { + for (const auto& core : cluster.get_soc_descriptor(i).get_cores(CoreType::TENSIX)) { if (rows_to_exclude.find(core.y) != rows_to_exclude.end()) { continue; } @@ -827,7 +828,7 @@ TEST(SiliconDriverBH, DISABLED_VirtualCoordinateBroadcast) { // same problem as cluster.wait_for_non_mmio_flush(); for (const auto i : target_devices) { - for (const auto& core : cluster.get_soc_descriptor(i).workers) { + for (const auto& core : cluster.get_soc_descriptor(i).get_cores(CoreType::TENSIX)) { if (rows_to_exclude.find(core.y) != rows_to_exclude.end()) { continue; } @@ -881,7 +882,7 @@ TEST(SiliconDriverBH, SysmemTestWithPcie) { cluster.start_device(tt_device_params{}); // no special parameters const chip_id_t mmio_chip_id = 0; - const auto PCIE = cluster.get_soc_descriptor(mmio_chip_id).pcie_cores.at(0); + const auto PCIE = cluster.get_soc_descriptor(mmio_chip_id).get_cores(CoreType::PCIE).at(0); const tt_cxy_pair PCIE_CORE(mmio_chip_id, PCIE.x, PCIE.y); const size_t test_size_bytes = 0x4000; // Arbitrarilly chosen, but small size so the test runs quickly. @@ -952,7 +953,7 @@ TEST(SiliconDriverBH, RandomSysmemTestWithPcie) { cluster.start_device(tt_device_params{}); // no special parameters const chip_id_t mmio_chip_id = 0; - const auto PCIE = cluster.get_soc_descriptor(mmio_chip_id).pcie_cores.at(0); + const auto PCIE = cluster.get_soc_descriptor(mmio_chip_id).get_cores(CoreType::PCIE).at(0); const tt_cxy_pair PCIE_CORE(mmio_chip_id, PCIE.x, PCIE.y); const size_t ONE_GIG = 1 << 30; const size_t num_tests = 0x20000; // runs in a reasonable amount of time diff --git a/tests/galaxy/test_umd_concurrent_threads.cpp b/tests/galaxy/test_umd_concurrent_threads.cpp index bc119242..73838feb 100644 --- a/tests/galaxy/test_umd_concurrent_threads.cpp +++ b/tests/galaxy/test_umd_concurrent_threads.cpp @@ -67,7 +67,7 @@ TEST(GalaxyConcurrentThreads, WriteToAllChipsL1) { std::uint32_t write_size = vector_to_write_th1.size() * 4; std::uint32_t address = l1_mem::address_map::NCRISC_FIRMWARE_BASE; for (const auto& chip : target_devices_th1) { - for (auto& core : sdesc_per_chip.at(chip).workers) { + for (auto& core : sdesc_per_chip.at(chip).get_cores(CoreType::TENSIX)) { device.write_to_device( vector_to_write_th1.data(), vector_to_write_th1.size() * sizeof(std::uint32_t), @@ -78,7 +78,7 @@ TEST(GalaxyConcurrentThreads, WriteToAllChipsL1) { } device.wait_for_non_mmio_flush(); for (auto& chip : target_devices_th1) { - for (auto& core : sdesc_per_chip.at(chip).workers) { + for (auto& core : sdesc_per_chip.at(chip).get_cores(CoreType::TENSIX)) { test_utils::read_data_from_device( device, readback_vec, tt_cxy_pair(chip, core), address, write_size, "SMALL_READ_WRITE_TLB"); EXPECT_EQ(vector_to_write_th1, readback_vec) @@ -93,7 +93,7 @@ TEST(GalaxyConcurrentThreads, WriteToAllChipsL1) { std::uint32_t write_size = vector_to_write_th2.size() * 4; std::uint32_t address = l1_mem::address_map::NCRISC_FIRMWARE_BASE; for (const auto& chip : target_devices_th2) { - for (auto& core : sdesc_per_chip.at(chip).workers) { + for (auto& core : sdesc_per_chip.at(chip).get_cores(CoreType::TENSIX)) { device.write_to_device( vector_to_write_th2.data(), vector_to_write_th2.size() * sizeof(std::uint32_t), @@ -104,7 +104,7 @@ TEST(GalaxyConcurrentThreads, WriteToAllChipsL1) { } device.wait_for_non_mmio_flush(); for (const auto& chip : target_devices_th2) { - for (auto& core : sdesc_per_chip.at(chip).workers) { + for (auto& core : sdesc_per_chip.at(chip).get_cores(CoreType::TENSIX)) { test_utils::read_data_from_device( device, readback_vec, tt_cxy_pair(chip, core), address, write_size, "SMALL_READ_WRITE_TLB"); EXPECT_EQ(vector_to_write_th2, readback_vec) @@ -160,7 +160,7 @@ TEST(GalaxyConcurrentThreads, WriteToAllChipsDram) { std::uint32_t write_size = vector_to_write.size() * 4; std::vector dram_cores; - for (const auto& subchan_cores : sdesc_per_chip.at(0).dram_cores) { + for (const auto& subchan_cores : sdesc_per_chip.at(0).get_dram_cores()) { dram_cores.insert(dram_cores.end(), subchan_cores.begin(), subchan_cores.end()); } device.deassert_risc_reset(); @@ -193,7 +193,7 @@ TEST(GalaxyConcurrentThreads, WriteToAllChipsDram) { std::vector readback_vec = {}; std::uint32_t address = 0x5000000; for (const auto& chip : target_devices_th2) { - for (auto& core : sdesc_per_chip.at(chip).workers) { + for (auto& core : sdesc_per_chip.at(chip).get_cores(CoreType::TENSIX)) { device.write_to_device( vector_to_write.data(), vector_to_write.size() * sizeof(std::uint32_t), @@ -204,7 +204,7 @@ TEST(GalaxyConcurrentThreads, WriteToAllChipsDram) { } device.wait_for_non_mmio_flush(); for (const auto& chip : target_devices_th2) { - for (auto& core : sdesc_per_chip.at(chip).workers) { + for (auto& core : sdesc_per_chip.at(chip).get_cores(CoreType::TENSIX)) { test_utils::read_data_from_device( device, readback_vec, tt_cxy_pair(chip, core), address, write_size, "SMALL_READ_WRITE_TLB"); EXPECT_EQ(vector_to_write, readback_vec) << "Vector read back from dram core " << core.x << "-" @@ -247,7 +247,7 @@ TEST(GalaxyConcurrentThreads, PushInputsWhileSignalingCluster) { std::vector large_vector(20000, 0xbeef1234); std::vector dram_cores; - for (const auto& subchan_cores : sdesc_per_chip.at(0).dram_cores) { + for (const auto& subchan_cores : sdesc_per_chip.at(0).get_dram_cores()) { dram_cores.insert(dram_cores.end(), subchan_cores.begin(), subchan_cores.end()); } @@ -277,7 +277,7 @@ TEST(GalaxyConcurrentThreads, PushInputsWhileSignalingCluster) { std::vector readback_vec = {}; std::uint32_t address = l1_mem::address_map::NCRISC_FIRMWARE_BASE; for (const auto& chip : target_devices) { - for (auto& core : sdesc_per_chip.at(chip).workers) { + for (auto& core : sdesc_per_chip.at(chip).get_cores(CoreType::TENSIX)) { device.write_to_device( small_vector.data(), small_vector.size() * sizeof(std::uint32_t), @@ -288,7 +288,7 @@ TEST(GalaxyConcurrentThreads, PushInputsWhileSignalingCluster) { } device.wait_for_non_mmio_flush(); for (const auto& chip : target_devices) { - for (auto& core : sdesc_per_chip.at(chip).workers) { + for (auto& core : sdesc_per_chip.at(chip).get_cores(CoreType::TENSIX)) { test_utils::read_data_from_device( device, readback_vec, diff --git a/tests/galaxy/test_umd_remote_api.cpp b/tests/galaxy/test_umd_remote_api.cpp index 41eecd79..a8f39376 100644 --- a/tests/galaxy/test_umd_remote_api.cpp +++ b/tests/galaxy/test_umd_remote_api.cpp @@ -51,13 +51,13 @@ void run_remote_read_write_test(uint32_t vector_size, bool dram_write) { std::vector write_bw; std::vector read_bw; for (int loop = 0; loop < 10; loop++) { - std::vector target_cores; + std::vector target_cores; if (dram_write) { - for (const auto& subchan_cores : sdesc_per_chip.at(chip).dram_cores) { + for (const auto& subchan_cores : sdesc_per_chip.at(chip).get_dram_cores()) { target_cores.insert(target_cores.end(), subchan_cores.begin(), subchan_cores.end()); } } else { - target_cores = sdesc_per_chip.at(chip).workers; + target_cores = sdesc_per_chip.at(chip).get_cores(CoreType::TENSIX); } for (const auto& core : target_cores) { tt_cxy_pair target_core = tt_cxy_pair(chip, core); @@ -329,7 +329,7 @@ TEST(GalaxyDataMovement, BroadcastData1) { tt_multichip_core_addr sender_core(4, tt_xy_pair(1, 1), 0x5000); std::vector receiver_cores; - for (const auto& core : sdesc.workers) { + for (const auto& core : sdesc.get_cores(CoreType::TENSIX)) { receiver_cores.push_back(tt_multichip_core_addr(5, core, 0x6000)); } run_data_broadcast_test(100, sender_core, receiver_cores); diff --git a/tests/grayskull/test_cluster_gs.cpp b/tests/grayskull/test_cluster_gs.cpp index 977a3c92..2a060d7f 100644 --- a/tests/grayskull/test_cluster_gs.cpp +++ b/tests/grayskull/test_cluster_gs.cpp @@ -60,7 +60,7 @@ TEST(SiliconDriverGS, Harvesting) { ASSERT_EQ(cluster.using_harvested_soc_descriptors(), true) << "Expected Driver to have performed harvesting"; for (const auto& chip : sdesc_per_chip) { - ASSERT_LE(chip.second.workers.size(), 96) + ASSERT_LE(chip.second.get_cores(CoreType::TENSIX).size(), 96) << "Expected SOC descriptor with harvesting to have less than or equal to 96 workers for chip " << chip.first; } @@ -88,7 +88,8 @@ TEST(SiliconDriverGS, CustomSocDesc) { ASSERT_EQ(cluster.using_harvested_soc_descriptors(), false) << "SOC descriptors should not be modified when harvesting is disabled"; for (const auto& chip : sdesc_per_chip) { - ASSERT_EQ(chip.second.workers.size(), 1) << "Expected 1x1 SOC descriptor to be unmodified by driver"; + ASSERT_EQ(chip.second.get_cores(CoreType::TENSIX).size(), 1) + << "Expected 1x1 SOC descriptor to be unmodified by driver"; } } @@ -109,7 +110,7 @@ TEST(SiliconDriverGS, HarvestingRuntime) { for (int i = 0; i < target_devices.size(); i++) { // Iterate over devices and only setup static TLBs for functional worker cores auto& sdesc = cluster.get_soc_descriptor(i); - for (auto& core : sdesc.workers) { + for (auto& core : sdesc.get_cores(CoreType::TENSIX)) { // Statically mapping a 1MB TLB to this core, starting from address DATA_BUFFER_SPACE_BASE. cluster.configure_tlb(i, core, get_static_tlb_index(core), l1_mem::address_map::DATA_BUFFER_SPACE_BASE); } @@ -130,7 +131,7 @@ TEST(SiliconDriverGS, HarvestingRuntime) { std::uint32_t dynamic_write_address = 0x30000000; for (int loop = 0; loop < 100; loop++) { // Write to each core a 100 times at different statically mapped addresses - for (auto& core : cluster.get_soc_descriptor(i).workers) { + for (auto& core : cluster.get_soc_descriptor(i).get_cores(CoreType::TENSIX)) { cluster.write_to_device( vector_to_write.data(), vector_to_write.size() * sizeof(std::uint32_t), @@ -199,7 +200,7 @@ TEST(SiliconDriverGS, StaticTLB_RW) { for (int i = 0; i < target_devices.size(); i++) { // Iterate over devices and only setup static TLBs for worker cores auto& sdesc = cluster.get_soc_descriptor(i); - for (auto& core : sdesc.workers) { + for (auto& core : sdesc.get_cores(CoreType::TENSIX)) { // Statically mapping a 1MB TLB to this core, starting from address DATA_BUFFER_SPACE_BASE. cluster.configure_tlb( i, core, get_static_tlb_index(core), l1_mem::address_map::DATA_BUFFER_SPACE_BASE, TLB_DATA::Posted); @@ -218,7 +219,7 @@ TEST(SiliconDriverGS, StaticTLB_RW) { std::uint32_t address = l1_mem::address_map::DATA_BUFFER_SPACE_BASE; for (int loop = 0; loop < 100; loop++) { // Write to each core a 100 times at different statically mapped addresses - for (auto& core : cluster.get_soc_descriptor(i).workers) { + for (auto& core : cluster.get_soc_descriptor(i).get_cores(CoreType::TENSIX)) { cluster.write_to_device( vector_to_write.data(), vector_to_write.size() * sizeof(std::uint32_t), @@ -272,7 +273,7 @@ TEST(SiliconDriverGS, DynamicTLB_RW) { std::uint32_t address = l1_mem::address_map::DATA_BUFFER_SPACE_BASE; for (int loop = 0; loop < 100; loop++) { // Write to each core a 100 times at different statically mapped addresses - for (auto& core : cluster.get_soc_descriptor(i).workers) { + for (auto& core : cluster.get_soc_descriptor(i).get_cores(CoreType::TENSIX)) { cluster.write_to_device( vector_to_write.data(), vector_to_write.size() * sizeof(std::uint32_t), @@ -325,7 +326,7 @@ TEST(SiliconDriverGS, MultiThreadedDevice) { float timeout_in_seconds = 10; std::uint32_t address = l1_mem::address_map::DATA_BUFFER_SPACE_BASE; for (int loop = 0; loop < 100; loop++) { - for (auto& core : cluster.get_soc_descriptor(0).workers) { + for (auto& core : cluster.get_soc_descriptor(0).get_cores(CoreType::TENSIX)) { cluster.write_to_device( vector_to_write.data(), vector_to_write.size() * sizeof(std::uint32_t), @@ -356,7 +357,7 @@ TEST(SiliconDriverGS, MultiThreadedDevice) { std::vector readback_vec = {}; float timeout_in_seconds = 10; std::uint32_t address = 0x30000000; - for (auto& core_ls : cluster.get_soc_descriptor(0).dram_cores) { + for (auto& core_ls : cluster.get_soc_descriptor(0).get_dram_cores()) { for (int loop = 0; loop < 100; loop++) { for (auto& core : core_ls) { cluster.write_to_device( @@ -414,7 +415,7 @@ TEST(SiliconDriverGS, MultiThreadedMemBar) { // this tests takes ~5 mins to run for (int i = 0; i < target_devices.size(); i++) { // Iterate over devices and only setup static TLBs for functional worker cores auto& sdesc = cluster.get_soc_descriptor(i); - for (auto& core : sdesc.workers) { + for (auto& core : sdesc.get_cores(CoreType::TENSIX)) { // Statically mapping a 1MB TLB to this core, starting from address DATA_BUFFER_SPACE_BASE. cluster.configure_tlb(i, core, get_static_tlb_index(core), base_addr); } @@ -423,7 +424,7 @@ TEST(SiliconDriverGS, MultiThreadedMemBar) { // this tests takes ~5 mins to run tt_device_params default_params; cluster.start_device(default_params); std::vector readback_membar_vec = {}; - for (auto& core : cluster.get_soc_descriptor(0).workers) { + for (auto& core : cluster.get_soc_descriptor(0).get_cores(CoreType::TENSIX)) { test_utils::read_data_from_device( cluster, readback_membar_vec, @@ -436,7 +437,7 @@ TEST(SiliconDriverGS, MultiThreadedMemBar) { // this tests takes ~5 mins to run readback_membar_vec = {}; } - for (auto& core : cluster.get_soc_descriptor(0).workers) { + for (auto& core : cluster.get_soc_descriptor(0).get_cores(CoreType::TENSIX)) { test_utils::read_data_from_device( cluster, readback_membar_vec, @@ -473,7 +474,7 @@ TEST(SiliconDriverGS, MultiThreadedMemBar) { // this tests takes ~5 mins to run std::thread th1 = std::thread([&] { std::uint32_t address = base_addr; for (int loop = 0; loop < 100; loop++) { - for (auto& core : cluster.get_soc_descriptor(0).workers) { + for (auto& core : cluster.get_soc_descriptor(0).get_cores(CoreType::TENSIX)) { std::vector readback_vec = {}; cluster.write_to_device( vec1.data(), vec1.size() * sizeof(std::uint32_t), tt_cxy_pair(0, core), address, ""); @@ -491,7 +492,7 @@ TEST(SiliconDriverGS, MultiThreadedMemBar) { // this tests takes ~5 mins to run std::thread th2 = std::thread([&] { std::uint32_t address = base_addr + vec1.size() * 4; for (int loop = 0; loop < 100; loop++) { - for (auto& core : cluster.get_soc_descriptor(0).workers) { + for (auto& core : cluster.get_soc_descriptor(0).get_cores(CoreType::TENSIX)) { std::vector readback_vec = {}; cluster.write_to_device( vec2.data(), vec2.size() * sizeof(std::uint32_t), tt_cxy_pair(0, core), address, ""); @@ -509,7 +510,7 @@ TEST(SiliconDriverGS, MultiThreadedMemBar) { // this tests takes ~5 mins to run th1.join(); th2.join(); - for (auto& core : cluster.get_soc_descriptor(0).workers) { + for (auto& core : cluster.get_soc_descriptor(0).get_cores(CoreType::TENSIX)) { test_utils::read_data_from_device( cluster, readback_membar_vec, @@ -539,7 +540,7 @@ TEST(SiliconDriverGS, SysmemTestWithPcie) { cluster.start_device(tt_device_params{}); // no special parameters const chip_id_t mmio_chip_id = 0; - const auto PCIE = cluster.get_soc_descriptor(mmio_chip_id).pcie_cores.at(0); + const auto PCIE = cluster.get_soc_descriptor(mmio_chip_id).get_cores(CoreType::PCIE).at(0); const tt_cxy_pair PCIE_CORE(mmio_chip_id, PCIE.x, PCIE.y); const size_t test_size_bytes = 0x4000; // Arbitrarilly chosen, but small size so the test runs quickly. diff --git a/tests/microbenchmark/device_fixture.hpp b/tests/microbenchmark/device_fixture.hpp index 4cea4506..c92eeccd 100644 --- a/tests/microbenchmark/device_fixture.hpp +++ b/tests/microbenchmark/device_fixture.hpp @@ -41,7 +41,7 @@ class uBenchmarkFixture : public ::testing::Test { for (int i = 0; i < target_devices.size(); i++) { // Iterate over devices and only setup static TLBs for functional worker cores auto& sdesc = device->get_soc_descriptor(i); - for (auto& core : sdesc.workers) { + for (auto& core : sdesc.get_cores(CoreType::TENSIX)) { // Statically mapping a 1MB TLB to this core, starting from address DATA_BUFFER_SPACE_BASE. device->configure_tlb(i, core, get_static_tlb_index(core), l1_mem::address_map::DATA_BUFFER_SPACE_BASE); } diff --git a/tests/microbenchmark/test_rw_tensix.cpp b/tests/microbenchmark/test_rw_tensix.cpp index f0d9a9d5..51fa0cd9 100644 --- a/tests/microbenchmark/test_rw_tensix.cpp +++ b/tests/microbenchmark/test_rw_tensix.cpp @@ -27,7 +27,7 @@ TEST_F(uBenchmarkFixture, WriteAllCores32Bytes) { ankerl::nanobench::Bench bench_static; ankerl::nanobench::Bench bench_dynamic; - for (auto& core : device->get_soc_descriptor(0).workers) { + for (auto& core : device->get_soc_descriptor(0).get_cores(CoreType::TENSIX)) { std::stringstream wname; wname << "Write to device core (" << core.x << ", " << core.y << ")"; // Write 32 bytes through static tlbs diff --git a/tests/test_utils/stimulus_generators.hpp b/tests/test_utils/stimulus_generators.hpp index 99351ad7..3e2817d9 100644 --- a/tests/test_utils/stimulus_generators.hpp +++ b/tests/test_utils/stimulus_generators.hpp @@ -359,7 +359,7 @@ static inline std::vector generate_core_index_locations( std::vector core_index_to_location = {}; for (chip_id_t chip : cluster_desc.get_all_chips()) { - for (auto const& dram_channel_cores : soc_desc.dram_cores) { + for (auto const& dram_channel_cores : soc_desc.get_dram_cores()) { for (tt_xy_pair const& dram_core : dram_channel_cores) { core_index_to_location.push_back({static_cast(chip), dram_core.x, dram_core.y}); } diff --git a/tests/wormhole/test_cluster_wh.cpp b/tests/wormhole/test_cluster_wh.cpp index e53846c3..82d91e92 100644 --- a/tests/wormhole/test_cluster_wh.cpp +++ b/tests/wormhole/test_cluster_wh.cpp @@ -111,7 +111,7 @@ TEST(SiliconDriverWH, Harvesting) { ASSERT_EQ(cluster.using_harvested_soc_descriptors(), true) << "Expected Driver to have performed harvesting"; for (const auto& chip : sdesc_per_chip) { - ASSERT_EQ(chip.second.workers.size(), 48) + ASSERT_EQ(chip.second.get_cores(CoreType::TENSIX).size(), 48) << "Expected SOC descriptor with harvesting to have 48 workers for chip" << chip.first; } for (int i = 0; i < num_devices; i++) { @@ -139,7 +139,8 @@ TEST(SiliconDriverWH, CustomSocDesc) { ASSERT_EQ(cluster.using_harvested_soc_descriptors(), false) << "SOC descriptors should not be modified when harvesting is disabled"; for (const auto& chip : sdesc_per_chip) { - ASSERT_EQ(chip.second.workers.size(), 1) << "Expected 1x1 SOC descriptor to be unmodified by driver"; + ASSERT_EQ(chip.second.get_cores(CoreType::TENSIX).size(), 1) + << "Expected 1x1 SOC descriptor to be unmodified by driver"; } } @@ -159,7 +160,7 @@ TEST(SiliconDriverWH, HarvestingRuntime) { // Iterate over MMIO devices and only setup static TLBs for worker cores if (std::find(mmio_devices.begin(), mmio_devices.end(), i) != mmio_devices.end()) { auto& sdesc = cluster.get_soc_descriptor(i); - for (auto& core : sdesc.workers) { + for (auto& core : sdesc.get_cores(CoreType::TENSIX)) { // Statically mapping a 1MB TLB to this core, starting from address NCRISC_FIRMWARE_BASE. cluster.configure_tlb( i, core, get_static_tlb_index_callback(core), l1_mem::address_map::NCRISC_FIRMWARE_BASE); @@ -180,7 +181,7 @@ TEST(SiliconDriverWH, HarvestingRuntime) { std::uint32_t dynamic_write_address = 0x40000000; for (int loop = 0; loop < 100; loop++) { // Write to each core a 100 times at different statically mapped addresses - for (auto& core : cluster.get_soc_descriptor(i).workers) { + for (auto& core : cluster.get_soc_descriptor(i).get_cores(CoreType::TENSIX)) { cluster.write_to_device( vector_to_write.data(), vector_to_write.size() * sizeof(std::uint32_t), @@ -247,7 +248,7 @@ TEST(SiliconDriverWH, UnalignedStaticTLB_RW) { // Iterate over MMIO devices and only setup static TLBs for worker cores if (std::find(mmio_devices.begin(), mmio_devices.end(), i) != mmio_devices.end()) { auto& sdesc = cluster.get_soc_descriptor(i); - for (auto& core : sdesc.workers) { + for (auto& core : sdesc.get_cores(CoreType::TENSIX)) { // Statically mapping a 1MB TLB to this core, starting from address NCRISC_FIRMWARE_BASE. cluster.configure_tlb( i, core, get_static_tlb_index_callback(core), l1_mem::address_map::NCRISC_FIRMWARE_BASE); @@ -268,7 +269,7 @@ TEST(SiliconDriverWH, UnalignedStaticTLB_RW) { std::vector readback_vec(size, 0); std::uint32_t address = l1_mem::address_map::NCRISC_FIRMWARE_BASE; for (int loop = 0; loop < 50; loop++) { - for (auto& core : cluster.get_soc_descriptor(i).workers) { + for (auto& core : cluster.get_soc_descriptor(i).get_cores(CoreType::TENSIX)) { cluster.write_to_device(write_vec.data(), size, tt_cxy_pair(i, core), address, ""); cluster.wait_for_non_mmio_flush(); cluster.read_from_device(readback_vec.data(), tt_cxy_pair(i, core), address, size, ""); @@ -301,7 +302,7 @@ TEST(SiliconDriverWH, StaticTLB_RW) { // Iterate over MMIO devices and only setup static TLBs for worker cores if (std::find(mmio_devices.begin(), mmio_devices.end(), i) != mmio_devices.end()) { auto& sdesc = cluster.get_soc_descriptor(i); - for (auto& core : sdesc.workers) { + for (auto& core : sdesc.get_cores(CoreType::TENSIX)) { // Statically mapping a 1MB TLB to this core, starting from address NCRISC_FIRMWARE_BASE. cluster.configure_tlb( i, core, get_static_tlb_index_callback(core), l1_mem::address_map::NCRISC_FIRMWARE_BASE); @@ -320,7 +321,7 @@ TEST(SiliconDriverWH, StaticTLB_RW) { std::uint32_t address = l1_mem::address_map::NCRISC_FIRMWARE_BASE; // Write to each core a 100 times at different statically mapped addresses for (int loop = 0; loop < 100; loop++) { - for (auto& core : cluster.get_soc_descriptor(i).workers) { + for (auto& core : cluster.get_soc_descriptor(i).get_cores(CoreType::TENSIX)) { cluster.write_to_device( vector_to_write.data(), vector_to_write.size() * sizeof(std::uint32_t), @@ -369,7 +370,7 @@ TEST(SiliconDriverWH, DynamicTLB_RW) { std::uint32_t address = l1_mem::address_map::NCRISC_FIRMWARE_BASE; // Write to each core a 100 times at different statically mapped addresses for (int loop = 0; loop < 100; loop++) { - for (auto& core : cluster.get_soc_descriptor(i).workers) { + for (auto& core : cluster.get_soc_descriptor(i).get_cores(CoreType::TENSIX)) { cluster.write_to_device( vector_to_write.data(), vector_to_write.size() * sizeof(std::uint32_t), @@ -417,7 +418,7 @@ TEST(SiliconDriverWH, MultiThreadedDevice) { std::vector readback_vec = {}; std::uint32_t address = l1_mem::address_map::NCRISC_FIRMWARE_BASE; for (int loop = 0; loop < 100; loop++) { - for (auto& core : cluster.get_soc_descriptor(0).workers) { + for (auto& core : cluster.get_soc_descriptor(0).get_cores(CoreType::TENSIX)) { cluster.write_to_device( vector_to_write.data(), vector_to_write.size() * sizeof(std::uint32_t), @@ -438,7 +439,7 @@ TEST(SiliconDriverWH, MultiThreadedDevice) { std::vector vector_to_write = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9}; std::vector readback_vec = {}; std::uint32_t address = 0x30000000; - for (auto& core_ls : cluster.get_soc_descriptor(0).dram_cores) { + for (auto& core_ls : cluster.get_soc_descriptor(0).get_dram_cores()) { for (int loop = 0; loop < 100; loop++) { for (auto& core : core_ls) { cluster.write_to_device( @@ -483,7 +484,7 @@ TEST(SiliconDriverWH, MultiThreadedMemBar) { // Iterate over devices and only setup static TLBs for functional worker cores if (std::find(mmio_devices.begin(), mmio_devices.end(), i) != mmio_devices.end()) { auto& sdesc = cluster.get_soc_descriptor(i); - for (auto& core : sdesc.workers) { + for (auto& core : sdesc.get_cores(CoreType::TENSIX)) { // Statically mapping a 1MB TLB to this core, starting from address DATA_BUFFER_SPACE_BASE. cluster.configure_tlb(i, core, get_static_tlb_index_callback(core), base_addr); } @@ -494,7 +495,7 @@ TEST(SiliconDriverWH, MultiThreadedMemBar) { cluster.start_device(default_params); std::vector readback_membar_vec = {}; - for (auto& core : cluster.get_soc_descriptor(0).workers) { + for (auto& core : cluster.get_soc_descriptor(0).get_cores(CoreType::TENSIX)) { test_utils::read_data_from_device( cluster, readback_membar_vec, @@ -516,7 +517,7 @@ TEST(SiliconDriverWH, MultiThreadedMemBar) { readback_membar_vec = {}; } - for (auto& core : cluster.get_soc_descriptor(0).ethernet_cores) { + for (auto& core : cluster.get_soc_descriptor(0).get_cores(CoreType::ETH)) { test_utils::read_data_from_device( cluster, readback_membar_vec, @@ -545,7 +546,7 @@ TEST(SiliconDriverWH, MultiThreadedMemBar) { std::thread th1 = std::thread([&] { std::uint32_t address = base_addr; for (int loop = 0; loop < 50; loop++) { - for (auto& core : cluster.get_soc_descriptor(0).workers) { + for (auto& core : cluster.get_soc_descriptor(0).get_cores(CoreType::TENSIX)) { std::vector readback_vec = {}; cluster.write_to_device( vec1.data(), vec1.size() * sizeof(std::uint32_t), tt_cxy_pair(0, core), address, ""); @@ -563,7 +564,7 @@ TEST(SiliconDriverWH, MultiThreadedMemBar) { std::thread th2 = std::thread([&] { std::uint32_t address = base_addr + vec1.size() * 4; for (int loop = 0; loop < 50; loop++) { - for (auto& core : cluster.get_soc_descriptor(0).workers) { + for (auto& core : cluster.get_soc_descriptor(0).get_cores(CoreType::TENSIX)) { std::vector readback_vec = {}; cluster.write_to_device( vec2.data(), vec2.size() * sizeof(std::uint32_t), tt_cxy_pair(0, core), address, ""); @@ -581,7 +582,7 @@ TEST(SiliconDriverWH, MultiThreadedMemBar) { th1.join(); th2.join(); - for (auto& core : cluster.get_soc_descriptor(0).workers) { + for (auto& core : cluster.get_soc_descriptor(0).get_cores(CoreType::TENSIX)) { test_utils::read_data_from_device( cluster, readback_membar_vec, @@ -594,7 +595,7 @@ TEST(SiliconDriverWH, MultiThreadedMemBar) { readback_membar_vec = {}; } - for (auto& core : cluster.get_soc_descriptor(0).ethernet_cores) { + for (auto& core : cluster.get_soc_descriptor(0).get_cores(CoreType::ETH)) { test_utils::read_data_from_device( cluster, readback_membar_vec, @@ -658,7 +659,7 @@ TEST(SiliconDriverWH, BroadcastWrite) { cluster.wait_for_non_mmio_flush(); for (const auto i : target_devices) { - for (const auto& core : cluster.get_soc_descriptor(i).workers) { + for (const auto& core : cluster.get_soc_descriptor(i).get_cores(CoreType::TENSIX)) { if (rows_to_exclude.find(core.y) != rows_to_exclude.end()) { continue; } @@ -753,7 +754,7 @@ TEST(SiliconDriverWH, VirtualCoordinateBroadcast) { cluster.wait_for_non_mmio_flush(); for (const auto i : target_devices) { - for (const auto& core : cluster.get_soc_descriptor(i).workers) { + for (const auto& core : cluster.get_soc_descriptor(i).get_cores(CoreType::TENSIX)) { if (rows_to_exclude.find(core.y) != rows_to_exclude.end()) { continue; } @@ -825,7 +826,7 @@ TEST(SiliconDriverWH, SysmemTestWithPcie) { cluster.start_device(tt_device_params{}); // no special parameters const chip_id_t mmio_chip_id = 0; - const auto PCIE = cluster.get_soc_descriptor(mmio_chip_id).pcie_cores.at(0); + const auto PCIE = cluster.get_soc_descriptor(mmio_chip_id).get_cores(CoreType::PCIE).at(0); const tt_cxy_pair PCIE_CORE(mmio_chip_id, PCIE.x, PCIE.y); const size_t test_size_bytes = 0x4000; // Arbitrarilly chosen, but small size so the test runs quickly. @@ -891,7 +892,7 @@ TEST(SiliconDriverWH, RandomSysmemTestWithPcie) { cluster.start_device(tt_device_params{}); // no special parameters const chip_id_t mmio_chip_id = 0; - const auto PCIE = cluster.get_soc_descriptor(mmio_chip_id).pcie_cores.at(0); + const auto PCIE = cluster.get_soc_descriptor(mmio_chip_id).get_cores(CoreType::PCIE).at(0); const tt_cxy_pair PCIE_CORE(mmio_chip_id, PCIE.x, PCIE.y); const size_t ONE_GIG = 1 << 30; const size_t num_tests = 0x20000; // runs in a reasonable amount of time @@ -956,7 +957,7 @@ TEST(SiliconDriverWH, LargeAddressTlb) { true, // clean system resources - yes true); // perform harvesting - yes - const tt_xy_pair ARC_CORE = cluster.get_soc_descriptor(0).arc_cores.at(0); + const tt_xy_pair ARC_CORE = cluster.get_soc_descriptor(0).get_cores(CoreType::ARC).at(0); const tt_cxy_pair ARC_CORE_CHIP(0, ARC_CORE.x, ARC_CORE.y); set_barrier_params(cluster);