From fe56807d0118b2e7495ecb0c42750b4bf2b41c76 Mon Sep 17 00:00:00 2001 From: pjanevski Date: Thu, 9 Jan 2025 18:27:20 +0000 Subject: [PATCH] Add software harvesting --- device/api/umd/device/cluster.h | 15 ++++++----- device/api/umd/device/types/harvesting.h | 19 +++++++++++++ device/cluster.cpp | 34 +++++++++++++----------- tests/api/test_soc_descriptor.cpp | 2 +- tests/grayskull/test_cluster_gs.cpp | 19 ++++++------- tests/wormhole/test_cluster_wh.cpp | 28 +++++++++++-------- 6 files changed, 73 insertions(+), 44 deletions(-) create mode 100644 device/api/umd/device/types/harvesting.h diff --git a/device/api/umd/device/cluster.h b/device/api/umd/device/cluster.h index 9a36c041..5fbe95c2 100644 --- a/device/api/umd/device/cluster.h +++ b/device/api/umd/device/cluster.h @@ -24,6 +24,7 @@ #include "umd/device/types/arch.h" #include "umd/device/types/cluster_descriptor_types.h" #include "umd/device/types/cluster_types.h" +#include "umd/device/types/harvesting.h" #include "umd/device/types/tlb.h" using TLB_DATA = tt::umd::tlb_data; @@ -498,7 +499,7 @@ class Cluster : public tt_device { const bool skip_driver_allocs = false, const bool clean_system_resources = false, bool perform_harvesting = true, - std::unordered_map simulated_harvesting_masks = {}); + std::unordered_map simulated_harvesting_masks = {}); /** * Cluster constructor. @@ -518,7 +519,7 @@ class Cluster : public tt_device { const bool skip_driver_allocs = false, const bool clean_system_resources = false, bool perform_harvesting = true, - std::unordered_map simulated_harvesting_masks = {}); + std::unordered_map simulated_harvesting_masks = {}); /** * Cluster constructor. @@ -542,7 +543,7 @@ class Cluster : public tt_device { const bool skip_driver_allocs = false, const bool clean_system_resources = false, bool perform_harvesting = true, - std::unordered_map simulated_harvesting_masks = {}); + std::unordered_map simulated_harvesting_masks = {}); /** * Cluster constructor. @@ -564,7 +565,7 @@ class Cluster : public tt_device { const bool skip_driver_allocs = false, const bool clean_system_resources = false, bool perform_harvesting = true, - std::unordered_map simulated_harvesting_masks = {}); + std::unordered_map simulated_harvesting_masks = {}); /** * Cluster constructor which creates a cluster with Mock chips. @@ -873,19 +874,19 @@ class Cluster : public tt_device { chip_id_t logical_device_id, tt_ClusterDescriptor* cluster_desc, bool perform_harvesting, - std::unordered_map& simulated_harvesting_masks); + std::unordered_map& simulated_harvesting_masks); void add_chip(chip_id_t chip_id, std::unique_ptr chip); uint32_t get_tensix_harvesting_mask( chip_id_t chip_id, tt_ClusterDescriptor* cluster_desc, bool perform_harvesting, - std::unordered_map& simulated_harvesting_masks); + std::unordered_map& simulated_harvesting_masks); void construct_cluster( const uint32_t& num_host_mem_ch_per_mmio_device, const bool skip_driver_allocs, const bool clean_system_resources, bool perform_harvesting, - std::unordered_map simulated_harvesting_masks); + std::unordered_map simulated_harvesting_masks); tt::umd::CoreCoord translate_chip_coord( const chip_id_t chip, const tt::umd::CoreCoord core_coord, const CoordSystem coord_system) const; diff --git a/device/api/umd/device/types/harvesting.h b/device/api/umd/device/types/harvesting.h new file mode 100644 index 00000000..7eddc975 --- /dev/null +++ b/device/api/umd/device/types/harvesting.h @@ -0,0 +1,19 @@ +/* + * SPDX-FileCopyrightText: (c) 2024 Tenstorrent Inc. + * + * SPDX-License-Identifier: Apache-2.0 + */ + +#pragma once + +#include + +namespace tt::umd { + +struct SoftwareHarvesting { + uint16_t tensix_harvesting_mask; + uint16_t eth_harvesting_mask; + uint8_t dram_harvesting_mask; +}; + +} // namespace tt::umd diff --git a/device/cluster.cpp b/device/cluster.cpp index 238e83eb..23fe676a 100644 --- a/device/cluster.cpp +++ b/device/cluster.cpp @@ -281,7 +281,7 @@ void Cluster::construct_cluster( const bool skip_driver_allocs, const bool clean_system_resources, bool perform_harvesting, - std::unordered_map simulated_harvesting_masks) { + std::unordered_map simulated_harvesting_masks) { if (!skip_driver_allocs) { auto available_device_ids = detect_available_device_ids(); log_info(LogSiliconDriver, "Detected PCI devices: {}", available_device_ids); @@ -374,8 +374,8 @@ void Cluster::construct_cluster( "Could not find harvesting mask for device_id {}", *device_id); if (arch_name == tt::ARCH::GRAYSKULL) { - if ((simulated_harvesting_masks.at(*device_id) & harvested_rows_per_target[*device_id]) != - harvested_rows_per_target[*device_id]) { + if ((simulated_harvesting_masks.at(*device_id).tensix_harvesting_mask & + harvested_rows_per_target[*device_id]) != harvested_rows_per_target[*device_id]) { log_warning( LogSiliconDriver, "Simulated harvesting config for device {} does not include the actual harvesting config. " @@ -383,23 +383,25 @@ void Cluster::construct_cluster( "{} Simulated Harvested Rows : {}", *device_id, harvested_rows_per_target[*device_id], - simulated_harvesting_masks.at(*device_id)); + simulated_harvesting_masks.at(*device_id).tensix_harvesting_mask); } - simulated_harvesting_masks.at(*device_id) |= harvested_rows_per_target[*device_id]; + simulated_harvesting_masks.at(*device_id).tensix_harvesting_mask |= + harvested_rows_per_target[*device_id]; } else if (arch_name == tt::ARCH::WORMHOLE_B0) { log_assert( - std::bitset<32>(simulated_harvesting_masks.at(*device_id)).count() >= + std::bitset<32>(simulated_harvesting_masks.at(*device_id).tensix_harvesting_mask).count() >= std::bitset<32>(harvested_rows_per_target[*device_id]).count(), "Simulated Harvesting for WH must contain at least as many rows as the actual harvesting config. " "Actual Harvested Rows : {} Simulated Harvested Rows : {}", harvested_rows_per_target[*device_id], - simulated_harvesting_masks.at(*device_id)); - num_rows_harvested.at(*device_id) = std::bitset<32>(simulated_harvesting_masks.at(*device_id)).count(); + simulated_harvesting_masks.at(*device_id).tensix_harvesting_mask); + num_rows_harvested.at(*device_id) = + std::bitset<32>(simulated_harvesting_masks.at(*device_id).tensix_harvesting_mask).count(); log_assert( performed_harvesting ? translation_tables_en : true, "Using a harvested WH cluster with NOC translation disabled."); } - harvested_rows_per_target[*device_id] = simulated_harvesting_masks.at(*device_id); + harvested_rows_per_target[*device_id] = simulated_harvesting_masks.at(*device_id).tensix_harvesting_mask; } } @@ -439,7 +441,7 @@ std::unique_ptr Cluster::construct_chip_from_cluster( chip_id_t chip_id, tt_ClusterDescriptor* cluster_desc, bool perform_harvesting, - std::unordered_map& simulated_harvesting_masks) { + std::unordered_map& simulated_harvesting_masks) { tt::ARCH arch = cluster_desc->get_arch(chip_id); const BoardType chip_board_type = cluster_desc->get_board_type(chip_id); std::string soc_desc_path = tt_SocDescriptor::get_soc_descriptor_path(arch, chip_board_type); @@ -467,7 +469,7 @@ uint32_t Cluster::get_tensix_harvesting_mask( chip_id_t chip_id, tt_ClusterDescriptor* cluster_desc, bool perform_harvesting, - std::unordered_map& simulated_harvesting_masks) { + std::unordered_map& simulated_harvesting_masks) { if (!perform_harvesting) { log_info(LogSiliconDriver, "Skipping harvesting for chip {}.", chip_id); return 0; @@ -476,7 +478,7 @@ uint32_t Cluster::get_tensix_harvesting_mask( uint32_t tensix_harvesting_mask = CoordinateManager::shuffle_tensix_harvesting_mask( cluster_desc->get_arch(chip_id), tensix_harvesting_mask_physical_layout); uint32_t simulated_harvesting_mask = (simulated_harvesting_masks.find(chip_id) != simulated_harvesting_masks.end()) - ? simulated_harvesting_masks.at(chip_id) + ? simulated_harvesting_masks.at(chip_id).tensix_harvesting_mask : 0; if (simulated_harvesting_mask != 0) { log_info( @@ -502,7 +504,7 @@ Cluster::Cluster( const bool skip_driver_allocs, const bool clean_system_resources, bool perform_harvesting, - std::unordered_map simulated_harvesting_masks) { + std::unordered_map simulated_harvesting_masks) { cluster_desc = tt_ClusterDescriptor::create(); for (auto& chip_id : cluster_desc->get_all_chips()) { @@ -528,7 +530,7 @@ Cluster::Cluster( const bool skip_driver_allocs, const bool clean_system_resources, bool perform_harvesting, - std::unordered_map simulated_harvesting_masks) { + std::unordered_map simulated_harvesting_masks) { cluster_desc = tt_ClusterDescriptor::create(); for (auto& chip_id : target_devices) { @@ -559,7 +561,7 @@ Cluster::Cluster( const bool skip_driver_allocs, const bool clean_system_resources, bool perform_harvesting, - std::unordered_map simulated_harvesting_masks) { + std::unordered_map simulated_harvesting_masks) { cluster_desc = tt_ClusterDescriptor::create(); for (auto& chip_id : target_devices) { @@ -596,7 +598,7 @@ Cluster::Cluster( const bool skip_driver_allocs, const bool clean_system_resources, bool perform_harvesting, - std::unordered_map simulated_harvesting_masks) { + const std::unordered_map simulated_harvesting_masks) { cluster_desc = tt_ClusterDescriptor::create(); for (auto& [chip_id, chip] : chips) { diff --git a/tests/api/test_soc_descriptor.cpp b/tests/api/test_soc_descriptor.cpp index 9a2c0d11..9d39dc4e 100644 --- a/tests/api/test_soc_descriptor.cpp +++ b/tests/api/test_soc_descriptor.cpp @@ -439,7 +439,7 @@ TEST(SocDescriptor, SocDescriptorWormholeNoLogicalForHarvestedCores) { } TEST(SocDescriptor, SocDescriptorBlackholeNoLogicalForHarvestedCores) { - tt_SocDescriptor soc_desc(test_utils::GetAbsPath("tests/soc_descs/blackhole_140_arch.yaml"), 1); + tt_SocDescriptor soc_desc(test_utils::GetAbsPath("tests/soc_descs/blackhole_140_arch_local.yaml"), 1); EXPECT_THROW(soc_desc.get_harvested_cores(CoreType::TENSIX, CoordSystem::LOGICAL), std::runtime_error); diff --git a/tests/grayskull/test_cluster_gs.cpp b/tests/grayskull/test_cluster_gs.cpp index 7640ffc4..10b18101 100644 --- a/tests/grayskull/test_cluster_gs.cpp +++ b/tests/grayskull/test_cluster_gs.cpp @@ -53,7 +53,7 @@ TEST(SiliconDriverGS, CreateMultipleInstance) { TEST(SiliconDriverGS, Harvesting) { std::set target_devices = {0}; - std::unordered_map simulated_harvesting_masks = {{0, 6}, {1, 12}}; + std::unordered_map simulated_harvesting_masks = {{0, {6, 0, 0}}, {1, {12, 0, 0}}}; uint32_t num_host_mem_ch_per_mmio_device = 1; Cluster cluster = Cluster(num_host_mem_ch_per_mmio_device, false, true, true, simulated_harvesting_masks); auto sdesc_per_chip = cluster.get_virtual_soc_descriptors(); @@ -66,22 +66,23 @@ TEST(SiliconDriverGS, Harvesting) { } // harvesting info stored in soc descriptor is in logical coordinates. ASSERT_EQ( - cluster.get_soc_descriptor(0).tensix_harvesting_mask & simulated_harvesting_masks[0], - simulated_harvesting_masks[0]) + cluster.get_soc_descriptor(0).tensix_harvesting_mask & simulated_harvesting_masks[0].tensix_harvesting_mask, + simulated_harvesting_masks[0].tensix_harvesting_mask) << "Expected first chip to include simulated harvesting mask of 6"; // get_harvesting_masks_for_soc_descriptors will return harvesting info in noc0 coordinates. - simulated_harvesting_masks[0] = CoordinateManager::shuffle_tensix_harvesting_mask_to_noc0_coords( - tt::ARCH::GRAYSKULL, simulated_harvesting_masks[0]); + simulated_harvesting_masks[0].tensix_harvesting_mask = + CoordinateManager::shuffle_tensix_harvesting_mask_to_noc0_coords( + tt::ARCH::GRAYSKULL, simulated_harvesting_masks[0].tensix_harvesting_mask); ASSERT_EQ( - cluster.get_harvesting_masks_for_soc_descriptors().at(0) & simulated_harvesting_masks[0], - simulated_harvesting_masks[0]) + cluster.get_harvesting_masks_for_soc_descriptors().at(0) & simulated_harvesting_masks[0].tensix_harvesting_mask, + simulated_harvesting_masks[0].tensix_harvesting_mask) << "Expected first chip to include simulated harvesting mask of 6"; cluster.close_device(); } TEST(SiliconDriverGS, CustomSocDesc) { std::set target_devices = {0}; - std::unordered_map simulated_harvesting_masks = {{0, 6}, {1, 12}}; + std::unordered_map simulated_harvesting_masks = {{0, {6, 0, 0}}, {1, {12, 0, 0}}}; uint32_t num_host_mem_ch_per_mmio_device = 1; // Initialize the driver with a 1x1 descriptor and explictly do not perform harvesting Cluster cluster = Cluster( @@ -110,7 +111,7 @@ TEST(SiliconDriverGS, HarvestingRuntime) { }; std::set target_devices = {0}; - std::unordered_map simulated_harvesting_masks = {{0, 6}, {1, 12}}; + std::unordered_map simulated_harvesting_masks = {{0, {6, 0, 0}}, {1, {12, 0, 0}}}; uint32_t num_host_mem_ch_per_mmio_device = 1; Cluster cluster = Cluster(num_host_mem_ch_per_mmio_device, false, true, true, simulated_harvesting_masks); diff --git a/tests/wormhole/test_cluster_wh.cpp b/tests/wormhole/test_cluster_wh.cpp index a90186a9..517e9fc5 100644 --- a/tests/wormhole/test_cluster_wh.cpp +++ b/tests/wormhole/test_cluster_wh.cpp @@ -102,7 +102,7 @@ TEST(SiliconDriverWH, CreateDestroy) { TEST(SiliconDriverWH, Harvesting) { std::set target_devices = get_target_devices(); int num_devices = target_devices.size(); - std::unordered_map simulated_harvesting_masks = {{0, 30}, {1, 60}}; + std::unordered_map simulated_harvesting_masks = {{0, {30, 0, 0}}, {1, {60, 0, 0}}}; uint32_t num_host_mem_ch_per_mmio_device = 1; Cluster cluster = Cluster(num_host_mem_ch_per_mmio_device, false, true, true, simulated_harvesting_masks); @@ -114,7 +114,7 @@ TEST(SiliconDriverWH, Harvesting) { for (int i = 0; i < num_devices; i++) { uint32_t harvesting_mask_logical = CoordinateManager::shuffle_tensix_harvesting_mask(tt::ARCH::WORMHOLE_B0, harvesting_info.at(i)); - simulated_harvesting_masks[i] |= harvesting_mask_logical; + simulated_harvesting_masks[i].tensix_harvesting_mask |= harvesting_mask_logical; } ASSERT_EQ(cluster.using_harvested_soc_descriptors(), true) << "Expected Driver to have performed harvesting"; @@ -125,22 +125,28 @@ TEST(SiliconDriverWH, Harvesting) { } for (int i = 0; i < num_devices; i++) { // harvesting info stored in soc descriptor is in logical coordinates. - ASSERT_EQ(cluster.get_soc_descriptor(i).tensix_harvesting_mask, simulated_harvesting_masks.at(i)) - << "Expecting chip " << i << " to have harvesting mask of " << simulated_harvesting_masks.at(i); + ASSERT_EQ( + cluster.get_soc_descriptor(i).tensix_harvesting_mask, + simulated_harvesting_masks.at(i).tensix_harvesting_mask) + << "Expecting chip " << i << " to have harvesting mask of " + << simulated_harvesting_masks.at(i).tensix_harvesting_mask; // get_harvesting_masks_for_soc_descriptors will return harvesting info in noc0 coordinates. - simulated_harvesting_masks[i] = CoordinateManager::shuffle_tensix_harvesting_mask_to_noc0_coords( - tt::ARCH::WORMHOLE_B0, simulated_harvesting_masks[i]); + simulated_harvesting_masks[i].tensix_harvesting_mask = + CoordinateManager::shuffle_tensix_harvesting_mask_to_noc0_coords( + tt::ARCH::WORMHOLE_B0, simulated_harvesting_masks[i].tensix_harvesting_mask); ASSERT_EQ( - cluster.get_harvesting_masks_for_soc_descriptors().at(i) & simulated_harvesting_masks.at(i), - simulated_harvesting_masks.at(i)) - << "Expecting chip " << i << " to give noc0 harvesting mask of " << simulated_harvesting_masks.at(i); + cluster.get_harvesting_masks_for_soc_descriptors().at(i) & + simulated_harvesting_masks.at(i).tensix_harvesting_mask, + simulated_harvesting_masks.at(i).tensix_harvesting_mask) + << "Expecting chip " << i << " to give noc0 harvesting mask of " + << simulated_harvesting_masks.at(i).tensix_harvesting_mask; } } TEST(SiliconDriverWH, CustomSocDesc) { std::set target_devices = get_target_devices(); - std::unordered_map simulated_harvesting_masks = {{0, 30}, {1, 60}}; + std::unordered_map simulated_harvesting_masks = {{0, {30, 0, 0}}, {1, {60, 0, 0}}}; uint32_t num_host_mem_ch_per_mmio_device = 1; // Initialize the driver with a 1x1 descriptor and explictly do not perform harvesting @@ -165,7 +171,7 @@ TEST(SiliconDriverWH, HarvestingRuntime) { auto get_static_tlb_index_callback = [](tt_xy_pair target) { return get_static_tlb_index(target); }; std::set target_devices = get_target_devices(); - std::unordered_map simulated_harvesting_masks = {{0, 30}, {1, 60}}; + std::unordered_map simulated_harvesting_masks = {{0, {30, 0, 0}}, {1, {60, 0, 0}}}; uint32_t num_host_mem_ch_per_mmio_device = 1;