Skip to content

Commit

Permalink
Add software harvesting
Browse files Browse the repository at this point in the history
  • Loading branch information
pjanevskiTT committed Jan 10, 2025
1 parent d36cdd0 commit fe56807
Show file tree
Hide file tree
Showing 6 changed files with 73 additions and 44 deletions.
15 changes: 8 additions & 7 deletions device/api/umd/device/cluster.h
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
#include "umd/device/types/arch.h"
#include "umd/device/types/cluster_descriptor_types.h"
#include "umd/device/types/cluster_types.h"
#include "umd/device/types/harvesting.h"
#include "umd/device/types/tlb.h"

using TLB_DATA = tt::umd::tlb_data;
Expand Down Expand Up @@ -498,7 +499,7 @@ class Cluster : public tt_device {
const bool skip_driver_allocs = false,
const bool clean_system_resources = false,
bool perform_harvesting = true,
std::unordered_map<chip_id_t, uint32_t> simulated_harvesting_masks = {});
std::unordered_map<chip_id_t, SoftwareHarvesting> simulated_harvesting_masks = {});

/**
* Cluster constructor.
Expand All @@ -518,7 +519,7 @@ class Cluster : public tt_device {
const bool skip_driver_allocs = false,
const bool clean_system_resources = false,
bool perform_harvesting = true,
std::unordered_map<chip_id_t, uint32_t> simulated_harvesting_masks = {});
std::unordered_map<chip_id_t, SoftwareHarvesting> simulated_harvesting_masks = {});

/**
* Cluster constructor.
Expand All @@ -542,7 +543,7 @@ class Cluster : public tt_device {
const bool skip_driver_allocs = false,
const bool clean_system_resources = false,
bool perform_harvesting = true,
std::unordered_map<chip_id_t, uint32_t> simulated_harvesting_masks = {});
std::unordered_map<chip_id_t, SoftwareHarvesting> simulated_harvesting_masks = {});

/**
* Cluster constructor.
Expand All @@ -564,7 +565,7 @@ class Cluster : public tt_device {
const bool skip_driver_allocs = false,
const bool clean_system_resources = false,
bool perform_harvesting = true,
std::unordered_map<chip_id_t, uint32_t> simulated_harvesting_masks = {});
std::unordered_map<chip_id_t, SoftwareHarvesting> simulated_harvesting_masks = {});

/**
* Cluster constructor which creates a cluster with Mock chips.
Expand Down Expand Up @@ -873,19 +874,19 @@ class Cluster : public tt_device {
chip_id_t logical_device_id,
tt_ClusterDescriptor* cluster_desc,
bool perform_harvesting,
std::unordered_map<chip_id_t, uint32_t>& simulated_harvesting_masks);
std::unordered_map<chip_id_t, SoftwareHarvesting>& simulated_harvesting_masks);
void add_chip(chip_id_t chip_id, std::unique_ptr<Chip> chip);
uint32_t get_tensix_harvesting_mask(
chip_id_t chip_id,
tt_ClusterDescriptor* cluster_desc,
bool perform_harvesting,
std::unordered_map<chip_id_t, uint32_t>& simulated_harvesting_masks);
std::unordered_map<chip_id_t, SoftwareHarvesting>& simulated_harvesting_masks);
void construct_cluster(
const uint32_t& num_host_mem_ch_per_mmio_device,
const bool skip_driver_allocs,
const bool clean_system_resources,
bool perform_harvesting,
std::unordered_map<chip_id_t, uint32_t> simulated_harvesting_masks);
std::unordered_map<chip_id_t, SoftwareHarvesting> simulated_harvesting_masks);
tt::umd::CoreCoord translate_chip_coord(
const chip_id_t chip, const tt::umd::CoreCoord core_coord, const CoordSystem coord_system) const;

Expand Down
19 changes: 19 additions & 0 deletions device/api/umd/device/types/harvesting.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
/*
* SPDX-FileCopyrightText: (c) 2024 Tenstorrent Inc.
*
* SPDX-License-Identifier: Apache-2.0
*/

#pragma once

#include <cstdint>

namespace tt::umd {

struct SoftwareHarvesting {
uint16_t tensix_harvesting_mask;
uint16_t eth_harvesting_mask;
uint8_t dram_harvesting_mask;
};

} // namespace tt::umd
34 changes: 18 additions & 16 deletions device/cluster.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -281,7 +281,7 @@ void Cluster::construct_cluster(
const bool skip_driver_allocs,
const bool clean_system_resources,
bool perform_harvesting,
std::unordered_map<chip_id_t, uint32_t> simulated_harvesting_masks) {
std::unordered_map<chip_id_t, SoftwareHarvesting> simulated_harvesting_masks) {
if (!skip_driver_allocs) {
auto available_device_ids = detect_available_device_ids();
log_info(LogSiliconDriver, "Detected PCI devices: {}", available_device_ids);
Expand Down Expand Up @@ -374,32 +374,34 @@ void Cluster::construct_cluster(
"Could not find harvesting mask for device_id {}",
*device_id);
if (arch_name == tt::ARCH::GRAYSKULL) {
if ((simulated_harvesting_masks.at(*device_id) & harvested_rows_per_target[*device_id]) !=
harvested_rows_per_target[*device_id]) {
if ((simulated_harvesting_masks.at(*device_id).tensix_harvesting_mask &
harvested_rows_per_target[*device_id]) != harvested_rows_per_target[*device_id]) {
log_warning(
LogSiliconDriver,
"Simulated harvesting config for device {} does not include the actual harvesting config. "
"Simulated harvesting mask will be added to the real harvesting mask. Actual Harvested Rows : "
"{} Simulated Harvested Rows : {}",
*device_id,
harvested_rows_per_target[*device_id],
simulated_harvesting_masks.at(*device_id));
simulated_harvesting_masks.at(*device_id).tensix_harvesting_mask);
}
simulated_harvesting_masks.at(*device_id) |= harvested_rows_per_target[*device_id];
simulated_harvesting_masks.at(*device_id).tensix_harvesting_mask |=
harvested_rows_per_target[*device_id];
} else if (arch_name == tt::ARCH::WORMHOLE_B0) {
log_assert(
std::bitset<32>(simulated_harvesting_masks.at(*device_id)).count() >=
std::bitset<32>(simulated_harvesting_masks.at(*device_id).tensix_harvesting_mask).count() >=
std::bitset<32>(harvested_rows_per_target[*device_id]).count(),
"Simulated Harvesting for WH must contain at least as many rows as the actual harvesting config. "
"Actual Harvested Rows : {} Simulated Harvested Rows : {}",
harvested_rows_per_target[*device_id],
simulated_harvesting_masks.at(*device_id));
num_rows_harvested.at(*device_id) = std::bitset<32>(simulated_harvesting_masks.at(*device_id)).count();
simulated_harvesting_masks.at(*device_id).tensix_harvesting_mask);
num_rows_harvested.at(*device_id) =
std::bitset<32>(simulated_harvesting_masks.at(*device_id).tensix_harvesting_mask).count();
log_assert(
performed_harvesting ? translation_tables_en : true,
"Using a harvested WH cluster with NOC translation disabled.");
}
harvested_rows_per_target[*device_id] = simulated_harvesting_masks.at(*device_id);
harvested_rows_per_target[*device_id] = simulated_harvesting_masks.at(*device_id).tensix_harvesting_mask;
}
}

Expand Down Expand Up @@ -439,7 +441,7 @@ std::unique_ptr<Chip> Cluster::construct_chip_from_cluster(
chip_id_t chip_id,
tt_ClusterDescriptor* cluster_desc,
bool perform_harvesting,
std::unordered_map<chip_id_t, uint32_t>& simulated_harvesting_masks) {
std::unordered_map<chip_id_t, SoftwareHarvesting>& simulated_harvesting_masks) {
tt::ARCH arch = cluster_desc->get_arch(chip_id);
const BoardType chip_board_type = cluster_desc->get_board_type(chip_id);
std::string soc_desc_path = tt_SocDescriptor::get_soc_descriptor_path(arch, chip_board_type);
Expand Down Expand Up @@ -467,7 +469,7 @@ uint32_t Cluster::get_tensix_harvesting_mask(
chip_id_t chip_id,
tt_ClusterDescriptor* cluster_desc,
bool perform_harvesting,
std::unordered_map<chip_id_t, uint32_t>& simulated_harvesting_masks) {
std::unordered_map<chip_id_t, SoftwareHarvesting>& simulated_harvesting_masks) {
if (!perform_harvesting) {
log_info(LogSiliconDriver, "Skipping harvesting for chip {}.", chip_id);
return 0;
Expand All @@ -476,7 +478,7 @@ uint32_t Cluster::get_tensix_harvesting_mask(
uint32_t tensix_harvesting_mask = CoordinateManager::shuffle_tensix_harvesting_mask(
cluster_desc->get_arch(chip_id), tensix_harvesting_mask_physical_layout);
uint32_t simulated_harvesting_mask = (simulated_harvesting_masks.find(chip_id) != simulated_harvesting_masks.end())
? simulated_harvesting_masks.at(chip_id)
? simulated_harvesting_masks.at(chip_id).tensix_harvesting_mask
: 0;
if (simulated_harvesting_mask != 0) {
log_info(
Expand All @@ -502,7 +504,7 @@ Cluster::Cluster(
const bool skip_driver_allocs,
const bool clean_system_resources,
bool perform_harvesting,
std::unordered_map<chip_id_t, uint32_t> simulated_harvesting_masks) {
std::unordered_map<chip_id_t, SoftwareHarvesting> simulated_harvesting_masks) {
cluster_desc = tt_ClusterDescriptor::create();

for (auto& chip_id : cluster_desc->get_all_chips()) {
Expand All @@ -528,7 +530,7 @@ Cluster::Cluster(
const bool skip_driver_allocs,
const bool clean_system_resources,
bool perform_harvesting,
std::unordered_map<chip_id_t, uint32_t> simulated_harvesting_masks) {
std::unordered_map<chip_id_t, SoftwareHarvesting> simulated_harvesting_masks) {
cluster_desc = tt_ClusterDescriptor::create();

for (auto& chip_id : target_devices) {
Expand Down Expand Up @@ -559,7 +561,7 @@ Cluster::Cluster(
const bool skip_driver_allocs,
const bool clean_system_resources,
bool perform_harvesting,
std::unordered_map<chip_id_t, uint32_t> simulated_harvesting_masks) {
std::unordered_map<chip_id_t, SoftwareHarvesting> simulated_harvesting_masks) {
cluster_desc = tt_ClusterDescriptor::create();

for (auto& chip_id : target_devices) {
Expand Down Expand Up @@ -596,7 +598,7 @@ Cluster::Cluster(
const bool skip_driver_allocs,
const bool clean_system_resources,
bool perform_harvesting,
std::unordered_map<chip_id_t, uint32_t> simulated_harvesting_masks) {
const std::unordered_map<chip_id_t, SoftwareHarvesting> simulated_harvesting_masks) {
cluster_desc = tt_ClusterDescriptor::create();

for (auto& [chip_id, chip] : chips) {
Expand Down
2 changes: 1 addition & 1 deletion tests/api/test_soc_descriptor.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -439,7 +439,7 @@ TEST(SocDescriptor, SocDescriptorWormholeNoLogicalForHarvestedCores) {
}

TEST(SocDescriptor, SocDescriptorBlackholeNoLogicalForHarvestedCores) {
tt_SocDescriptor soc_desc(test_utils::GetAbsPath("tests/soc_descs/blackhole_140_arch.yaml"), 1);
tt_SocDescriptor soc_desc(test_utils::GetAbsPath("tests/soc_descs/blackhole_140_arch_local.yaml"), 1);

EXPECT_THROW(soc_desc.get_harvested_cores(CoreType::TENSIX, CoordSystem::LOGICAL), std::runtime_error);

Expand Down
19 changes: 10 additions & 9 deletions tests/grayskull/test_cluster_gs.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@ TEST(SiliconDriverGS, CreateMultipleInstance) {

TEST(SiliconDriverGS, Harvesting) {
std::set<chip_id_t> target_devices = {0};
std::unordered_map<chip_id_t, uint32_t> simulated_harvesting_masks = {{0, 6}, {1, 12}};
std::unordered_map<chip_id_t, SoftwareHarvesting> simulated_harvesting_masks = {{0, {6, 0, 0}}, {1, {12, 0, 0}}};
uint32_t num_host_mem_ch_per_mmio_device = 1;
Cluster cluster = Cluster(num_host_mem_ch_per_mmio_device, false, true, true, simulated_harvesting_masks);
auto sdesc_per_chip = cluster.get_virtual_soc_descriptors();
Expand All @@ -66,22 +66,23 @@ TEST(SiliconDriverGS, Harvesting) {
}
// harvesting info stored in soc descriptor is in logical coordinates.
ASSERT_EQ(
cluster.get_soc_descriptor(0).tensix_harvesting_mask & simulated_harvesting_masks[0],
simulated_harvesting_masks[0])
cluster.get_soc_descriptor(0).tensix_harvesting_mask & simulated_harvesting_masks[0].tensix_harvesting_mask,
simulated_harvesting_masks[0].tensix_harvesting_mask)
<< "Expected first chip to include simulated harvesting mask of 6";
// get_harvesting_masks_for_soc_descriptors will return harvesting info in noc0 coordinates.
simulated_harvesting_masks[0] = CoordinateManager::shuffle_tensix_harvesting_mask_to_noc0_coords(
tt::ARCH::GRAYSKULL, simulated_harvesting_masks[0]);
simulated_harvesting_masks[0].tensix_harvesting_mask =
CoordinateManager::shuffle_tensix_harvesting_mask_to_noc0_coords(
tt::ARCH::GRAYSKULL, simulated_harvesting_masks[0].tensix_harvesting_mask);
ASSERT_EQ(
cluster.get_harvesting_masks_for_soc_descriptors().at(0) & simulated_harvesting_masks[0],
simulated_harvesting_masks[0])
cluster.get_harvesting_masks_for_soc_descriptors().at(0) & simulated_harvesting_masks[0].tensix_harvesting_mask,
simulated_harvesting_masks[0].tensix_harvesting_mask)
<< "Expected first chip to include simulated harvesting mask of 6";
cluster.close_device();
}

TEST(SiliconDriverGS, CustomSocDesc) {
std::set<chip_id_t> target_devices = {0};
std::unordered_map<chip_id_t, uint32_t> simulated_harvesting_masks = {{0, 6}, {1, 12}};
std::unordered_map<chip_id_t, SoftwareHarvesting> simulated_harvesting_masks = {{0, {6, 0, 0}}, {1, {12, 0, 0}}};
uint32_t num_host_mem_ch_per_mmio_device = 1;
// Initialize the driver with a 1x1 descriptor and explictly do not perform harvesting
Cluster cluster = Cluster(
Expand Down Expand Up @@ -110,7 +111,7 @@ TEST(SiliconDriverGS, HarvestingRuntime) {
};

std::set<chip_id_t> target_devices = {0};
std::unordered_map<chip_id_t, uint32_t> simulated_harvesting_masks = {{0, 6}, {1, 12}};
std::unordered_map<chip_id_t, SoftwareHarvesting> simulated_harvesting_masks = {{0, {6, 0, 0}}, {1, {12, 0, 0}}};
uint32_t num_host_mem_ch_per_mmio_device = 1;
Cluster cluster = Cluster(num_host_mem_ch_per_mmio_device, false, true, true, simulated_harvesting_masks);

Expand Down
28 changes: 17 additions & 11 deletions tests/wormhole/test_cluster_wh.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -102,7 +102,7 @@ TEST(SiliconDriverWH, CreateDestroy) {
TEST(SiliconDriverWH, Harvesting) {
std::set<chip_id_t> target_devices = get_target_devices();
int num_devices = target_devices.size();
std::unordered_map<chip_id_t, uint32_t> simulated_harvesting_masks = {{0, 30}, {1, 60}};
std::unordered_map<chip_id_t, SoftwareHarvesting> simulated_harvesting_masks = {{0, {30, 0, 0}}, {1, {60, 0, 0}}};

uint32_t num_host_mem_ch_per_mmio_device = 1;
Cluster cluster = Cluster(num_host_mem_ch_per_mmio_device, false, true, true, simulated_harvesting_masks);
Expand All @@ -114,7 +114,7 @@ TEST(SiliconDriverWH, Harvesting) {
for (int i = 0; i < num_devices; i++) {
uint32_t harvesting_mask_logical =
CoordinateManager::shuffle_tensix_harvesting_mask(tt::ARCH::WORMHOLE_B0, harvesting_info.at(i));
simulated_harvesting_masks[i] |= harvesting_mask_logical;
simulated_harvesting_masks[i].tensix_harvesting_mask |= harvesting_mask_logical;
}

ASSERT_EQ(cluster.using_harvested_soc_descriptors(), true) << "Expected Driver to have performed harvesting";
Expand All @@ -125,22 +125,28 @@ TEST(SiliconDriverWH, Harvesting) {
}
for (int i = 0; i < num_devices; i++) {
// harvesting info stored in soc descriptor is in logical coordinates.
ASSERT_EQ(cluster.get_soc_descriptor(i).tensix_harvesting_mask, simulated_harvesting_masks.at(i))
<< "Expecting chip " << i << " to have harvesting mask of " << simulated_harvesting_masks.at(i);
ASSERT_EQ(
cluster.get_soc_descriptor(i).tensix_harvesting_mask,
simulated_harvesting_masks.at(i).tensix_harvesting_mask)
<< "Expecting chip " << i << " to have harvesting mask of "
<< simulated_harvesting_masks.at(i).tensix_harvesting_mask;

// get_harvesting_masks_for_soc_descriptors will return harvesting info in noc0 coordinates.
simulated_harvesting_masks[i] = CoordinateManager::shuffle_tensix_harvesting_mask_to_noc0_coords(
tt::ARCH::WORMHOLE_B0, simulated_harvesting_masks[i]);
simulated_harvesting_masks[i].tensix_harvesting_mask =
CoordinateManager::shuffle_tensix_harvesting_mask_to_noc0_coords(
tt::ARCH::WORMHOLE_B0, simulated_harvesting_masks[i].tensix_harvesting_mask);
ASSERT_EQ(
cluster.get_harvesting_masks_for_soc_descriptors().at(i) & simulated_harvesting_masks.at(i),
simulated_harvesting_masks.at(i))
<< "Expecting chip " << i << " to give noc0 harvesting mask of " << simulated_harvesting_masks.at(i);
cluster.get_harvesting_masks_for_soc_descriptors().at(i) &
simulated_harvesting_masks.at(i).tensix_harvesting_mask,
simulated_harvesting_masks.at(i).tensix_harvesting_mask)
<< "Expecting chip " << i << " to give noc0 harvesting mask of "
<< simulated_harvesting_masks.at(i).tensix_harvesting_mask;
}
}

TEST(SiliconDriverWH, CustomSocDesc) {
std::set<chip_id_t> target_devices = get_target_devices();
std::unordered_map<chip_id_t, uint32_t> simulated_harvesting_masks = {{0, 30}, {1, 60}};
std::unordered_map<chip_id_t, SoftwareHarvesting> simulated_harvesting_masks = {{0, {30, 0, 0}}, {1, {60, 0, 0}}};

uint32_t num_host_mem_ch_per_mmio_device = 1;
// Initialize the driver with a 1x1 descriptor and explictly do not perform harvesting
Expand All @@ -165,7 +171,7 @@ TEST(SiliconDriverWH, HarvestingRuntime) {
auto get_static_tlb_index_callback = [](tt_xy_pair target) { return get_static_tlb_index(target); };

std::set<chip_id_t> target_devices = get_target_devices();
std::unordered_map<chip_id_t, uint32_t> simulated_harvesting_masks = {{0, 30}, {1, 60}};
std::unordered_map<chip_id_t, SoftwareHarvesting> simulated_harvesting_masks = {{0, {30, 0, 0}}, {1, {60, 0, 0}}};

uint32_t num_host_mem_ch_per_mmio_device = 1;

Expand Down

0 comments on commit fe56807

Please sign in to comment.