Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[Harvesting] simulated_harvesting_masks passed to tt_SocDescriptor #440

Merged
merged 8 commits into from
Jan 3, 2025
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 10 additions & 1 deletion device/api/umd/device/cluster.h
Original file line number Diff line number Diff line change
Expand Up @@ -855,8 +855,17 @@ class Cluster : public tt_device {
void wait_for_connected_non_mmio_flush(chip_id_t chip_id);
std::unique_ptr<Chip> construct_chip_from_cluster(
chip_id_t chip_id, tt_ClusterDescriptor* cluster_desc, tt_SocDescriptor& soc_desc);
std::unique_ptr<Chip> construct_chip_from_cluster(chip_id_t logical_device_id, tt_ClusterDescriptor* cluster_desc);
std::unique_ptr<Chip> construct_chip_from_cluster(
chip_id_t logical_device_id,
tt_ClusterDescriptor* cluster_desc,
bool perform_harvesting,
std::unordered_map<chip_id_t, uint32_t>& simulated_harvesting_masks);
void add_chip(chip_id_t chip_id, std::unique_ptr<Chip> chip);
uint32_t get_tensix_harvesting_mask(
chip_id_t chip_id,
tt_ClusterDescriptor* cluster_desc,
bool perform_harvesting,
std::unordered_map<chip_id_t, uint32_t>& simulated_harvesting_masks);
void construct_cluster(
const uint32_t& num_host_mem_ch_per_mmio_device,
const bool skip_driver_allocs,
Expand Down
51 changes: 40 additions & 11 deletions device/cluster.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -452,15 +452,16 @@ std::unique_ptr<Chip> Cluster::construct_chip_from_cluster(
}
}

std::unique_ptr<Chip> Cluster::construct_chip_from_cluster(chip_id_t chip_id, tt_ClusterDescriptor* cluster_desc) {
std::unique_ptr<Chip> Cluster::construct_chip_from_cluster(
chip_id_t chip_id,
tt_ClusterDescriptor* cluster_desc,
bool perform_harvesting,
broskoTT marked this conversation as resolved.
Show resolved Hide resolved
std::unordered_map<chip_id_t, uint32_t>& simulated_harvesting_masks) {
tt::ARCH arch = cluster_desc->get_arch(chip_id);
std::string soc_desc_path = tt_SocDescriptor::get_soc_descriptor_path(arch);
// Note that initially soc_descriptors are not harvested, but will be harvested later if perform_harvesting is
// true.
// TODO: This should be changed, harvesting should be done in tt_socdescriptor's constructor and not as part of
// cluster class.
uint32_t tensix_harvesting_mask = cluster_desc->get_harvesting_info().at(chip_id);
tt_SocDescriptor soc_desc = tt_SocDescriptor(soc_desc_path, tensix_harvesting_mask /*, harvesting_info*/);
uint32_t tensix_harvesting_mask =
get_tensix_harvesting_mask(chip_id, cluster_desc, perform_harvesting, simulated_harvesting_masks);
tt_SocDescriptor soc_desc = tt_SocDescriptor(soc_desc_path, tensix_harvesting_mask);
return construct_chip_from_cluster(chip_id, cluster_desc, soc_desc);
}

Expand All @@ -478,6 +479,30 @@ void Cluster::add_chip(chip_id_t chip_id, std::unique_ptr<Chip> chip) {
chips_.emplace(chip_id, std::move(chip));
}

uint32_t Cluster::get_tensix_harvesting_mask(
chip_id_t chip_id,
tt_ClusterDescriptor* cluster_desc,
bool perform_harvesting,
std::unordered_map<chip_id_t, uint32_t>& simulated_harvesting_masks) {
if (!perform_harvesting) {
log_info(LogSiliconDriver, "Skipping harvesting for chip {}.", chip_id);
return 0;
}
uint32_t tensix_harvesting_mask = cluster_desc->get_harvesting_info().at(chip_id);
uint32_t simulated_harvesting_mask = (simulated_harvesting_masks.find(chip_id) != simulated_harvesting_masks.end())
? simulated_harvesting_masks.at(chip_id)
: 0;
if (simulated_harvesting_mask != 0) {
log_info(
LogSiliconDriver,
"Adding simulated harvesting mask {} for chip {} which has real harvesting mask {}.",
simulated_harvesting_mask,
chip_id,
tensix_harvesting_mask);
}
return tensix_harvesting_mask | simulated_harvesting_mask;
broskoTT marked this conversation as resolved.
Show resolved Hide resolved
}

Cluster::Cluster(
broskoTT marked this conversation as resolved.
Show resolved Hide resolved
const uint32_t& num_host_mem_ch_per_mmio_device,
const bool skip_driver_allocs,
Expand All @@ -487,7 +512,9 @@ Cluster::Cluster(
cluster_desc = tt_ClusterDescriptor::create();

for (auto& chip_id : cluster_desc->get_all_chips()) {
add_chip(chip_id, construct_chip_from_cluster(chip_id, cluster_desc.get()));
add_chip(
chip_id,
construct_chip_from_cluster(chip_id, cluster_desc.get(), perform_harvesting, simulated_harvesting_masks));
}

// TODO: work on removing this member altogether. Currently assumes all have the same arch.
Expand Down Expand Up @@ -515,7 +542,9 @@ Cluster::Cluster(
cluster_desc->get_all_chips().find(chip_id) != cluster_desc->get_all_chips().end(),
"Target device {} not present in current cluster!",
chip_id);
add_chip(chip_id, construct_chip_from_cluster(chip_id, cluster_desc.get()));
add_chip(
chip_id,
construct_chip_from_cluster(chip_id, cluster_desc.get(), perform_harvesting, simulated_harvesting_masks));
}

// TODO: work on removing this member altogether. Currently assumes all have the same arch.
Expand Down Expand Up @@ -544,8 +573,8 @@ Cluster::Cluster(
cluster_desc->get_all_chips().find(chip_id) != cluster_desc->get_all_chips().end(),
"Target device {} not present in current cluster!",
chip_id);

size_t tensix_harvesting_mask = cluster_desc->get_harvesting_info().at(chip_id);
size_t tensix_harvesting_mask =
get_tensix_harvesting_mask(chip_id, cluster_desc.get(), perform_harvesting, simulated_harvesting_masks);
tt_SocDescriptor soc_desc = tt_SocDescriptor(sdesc_path, tensix_harvesting_mask);
log_assert(
cluster_desc->get_arch(chip_id) == soc_desc.arch,
Expand Down
Loading