Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Implement Cluster CoreCoord API #401

Merged
merged 5 commits into from
Dec 20, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
211 changes: 153 additions & 58 deletions device/api/umd/device/cluster.h

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion device/api/umd/device/coordinate_manager.h
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ class CoordinateManager {

CoordinateManager(CoordinateManager& other) = default;

tt::umd::CoreCoord to(const tt::umd::CoreCoord core_coord, const CoordSystem coord_system);
tt::umd::CoreCoord translate_coord_to(const tt::umd::CoreCoord core_coord, const CoordSystem coord_system);

std::vector<tt::umd::CoreCoord> get_cores(const CoreType core_type) const;
tt_xy_pair get_grid_size(const CoreType core_type) const;
Expand Down
7 changes: 7 additions & 0 deletions device/api/umd/device/tt_core_coordinates.h
Original file line number Diff line number Diff line change
Expand Up @@ -83,3 +83,10 @@ struct CoreCoord : public tt_xy_pair {
};

} // namespace tt::umd

pjanevskiTT marked this conversation as resolved.
Show resolved Hide resolved
namespace std {
template <>
struct hash<tt::umd::CoreCoord> {
size_t operator()(const tt::umd::CoreCoord& core_coord) const;
};
} // namespace std
2 changes: 1 addition & 1 deletion device/api/umd/device/tt_soc_descriptor.h
Original file line number Diff line number Diff line change
Expand Up @@ -90,7 +90,7 @@ class tt_SocDescriptor {
harvested_grid_size_map(other.harvested_grid_size_map) {}

// CoreCoord conversions.
tt::umd::CoreCoord to(const tt::umd::CoreCoord core_coord, const CoordSystem coord_system) const;
tt::umd::CoreCoord translate_coord_to(const tt::umd::CoreCoord core_coord, const CoordSystem coord_system) const;
pjanevskiTT marked this conversation as resolved.
Show resolved Hide resolved

static std::string get_soc_descriptor_path(tt::ARCH arch);

Expand Down
3 changes: 3 additions & 0 deletions device/blackhole/blackhole_coordinate_manager.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,9 @@ BlackholeCoordinateManager::BlackholeCoordinateManager(
}

void BlackholeCoordinateManager::translate_tensix_coords() {
if (CoordinateManager::get_num_harvested(tensix_harvesting_mask) > tensix_grid_size.x) {
tensix_harvesting_mask = 0;
}
size_t num_harvested_x = CoordinateManager::get_num_harvested(tensix_harvesting_mask);
size_t grid_size_x = tensix_grid_size.x;
size_t grid_size_y = tensix_grid_size.y;
Expand Down
92 changes: 85 additions & 7 deletions device/cluster.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@
#include <utility>
#include <vector>

#include "api/umd/device/tt_core_coordinates.h"
#include "logger.hpp"
#include "umd/device/architecture_implementation.h"
#include "umd/device/chip/local_chip.h"
Expand Down Expand Up @@ -503,8 +504,8 @@ std::unique_ptr<Chip> Cluster::construct_chip_from_cluster(chip_id_t chip_id, tt
// true.
// TODO: This should be changed, harvesting should be done in tt_socdescriptor's constructor and not as part of
// cluster class.
// uint32_t harvesting_info = cluster_desc->get_harvesting_info().at(chip_id);
tt_SocDescriptor soc_desc = tt_SocDescriptor(soc_desc_path /*, harvesting_info*/);
uint32_t tensix_harvesting_mask = cluster_desc->get_harvesting_info().at(chip_id);
pjanevskiTT marked this conversation as resolved.
Show resolved Hide resolved
tt_SocDescriptor soc_desc = tt_SocDescriptor(soc_desc_path, tensix_harvesting_mask /*, harvesting_info*/);
pjanevskiTT marked this conversation as resolved.
Show resolved Hide resolved
return construct_chip_from_cluster(chip_id, cluster_desc, soc_desc);
}

Expand Down Expand Up @@ -589,11 +590,8 @@ Cluster::Cluster(
"Target device {} not present in current cluster!",
chip_id);

// Note that initially soc_descriptors are not harvested, but will be harvested later if perform_harvesting is
// true.
// TODO: This should be changed, harvesting should be done in tt_socdescriptor's constructor and not as part of
// cluster class.
tt_SocDescriptor soc_desc = tt_SocDescriptor(sdesc_path);
size_t tensix_harvesting_mask = cluster_desc->get_harvesting_info().at(chip_id);
tt_SocDescriptor soc_desc = tt_SocDescriptor(sdesc_path, tensix_harvesting_mask);
log_assert(
cluster_desc->get_arch(chip_id) == soc_desc.arch,
"Passed soc descriptor has {} arch, but for chip id {} has arch {}",
Expand Down Expand Up @@ -683,6 +681,17 @@ void Cluster::configure_active_ethernet_cores_for_mmio_device(
non_mmio_transfer_cores_customized = true;
}

void Cluster::configure_active_ethernet_cores_for_mmio_device(
pjanevskiTT marked this conversation as resolved.
Show resolved Hide resolved
const std::unordered_set<CoreCoord>& active_eth_cores_per_chip, chip_id_t mmio_chip) {
std::unordered_set<tt_xy_pair> active_eth_cores_xy;
for (const auto& core : active_eth_cores_per_chip) {
CoreCoord virtual_coord = translate_chip_coord(mmio_chip, core, CoordSystem::VIRTUAL);
active_eth_cores_xy.insert(virtual_coord);
}

configure_active_ethernet_cores_for_mmio_device(mmio_chip, active_eth_cores_xy);
}

void Cluster::populate_cores() {
std::uint32_t count = 0;
for (const auto& [chip_id, chip] : chips_) {
Expand Down Expand Up @@ -996,6 +1005,12 @@ void Cluster::deassert_risc_reset_at_core(tt_cxy_pair core, const TensixSoftRese
}
}

void Cluster::deassert_risc_reset_at_core(
const chip_id_t chip, const CoreCoord core, const TensixSoftResetOptions& soft_resets) {
const CoreCoord virtual_coord = translate_chip_coord(chip, core, CoordSystem::VIRTUAL);
deassert_risc_reset_at_core({(size_t)chip, virtual_coord}, soft_resets);
}

void Cluster::assert_risc_reset_at_core(tt_cxy_pair core) {
// Get Target Device to query soc descriptor and determine location in cluster
std::uint32_t target_device = core.chip;
Expand All @@ -1016,6 +1031,11 @@ void Cluster::assert_risc_reset_at_core(tt_cxy_pair core) {
}
}

void Cluster::assert_risc_reset_at_core(const chip_id_t chip, const CoreCoord core) {
const CoreCoord virtual_coord = translate_chip_coord(chip, core, CoordSystem::VIRTUAL);
assert_risc_reset_at_core({(size_t)chip, virtual_coord});
}

// Free memory during teardown, and remove (clean/unlock) from any leftover mutexes.
void Cluster::cleanup_shared_host_state() {
for (auto& mutex : hardware_resource_mutex_map) {
Expand Down Expand Up @@ -1078,6 +1098,11 @@ tt::Writer Cluster::get_static_tlb_writer(tt_cxy_pair target) {
return tt::Writer(base + tlb_offset, tlb_size);
}

tt::Writer Cluster::get_static_tlb_writer(const chip_id_t chip, const CoreCoord target) {
const CoreCoord virtual_coord = translate_chip_coord(chip, target, CoordSystem::VIRTUAL);
return get_static_tlb_writer({(size_t)chip, virtual_coord});
}

void Cluster::write_device_memory(
const void* mem_ptr,
uint32_t size_in_bytes,
Expand Down Expand Up @@ -1323,6 +1348,11 @@ std::optional<std::tuple<uint32_t, uint32_t>> Cluster::get_tlb_data_from_target(
return get_tt_device(target.chip)->get_architecture_implementation()->describe_tlb(tlb_index);
}

std::optional<std::tuple<uint32_t, uint32_t>> Cluster::get_tlb_data_from_target(const chip_id_t chip, CoreCoord core) {
const CoreCoord virtual_coord = translate_chip_coord(chip, core, CoordSystem::VIRTUAL);
return get_tlb_data_from_target({(size_t)chip, virtual_coord});
}

void Cluster::configure_tlb(
chip_id_t logical_device_id, tt_xy_pair core, int32_t tlb_index, uint64_t address, uint64_t ordering) {
log_assert(
Expand Down Expand Up @@ -1353,6 +1383,12 @@ void Cluster::configure_tlb(
map_core_to_tlb_per_chip.at(logical_device_id).insert({core, tlb_index});
}

void Cluster::configure_tlb(
chip_id_t logical_device_id, tt::umd::CoreCoord core, int32_t tlb_index, uint64_t address, uint64_t ordering) {
const CoreCoord virtual_coord = translate_chip_coord(logical_device_id, core, CoordSystem::VIRTUAL);
configure_tlb(logical_device_id, {virtual_coord.x, virtual_coord.y}, tlb_index, address, ordering);
}

void Cluster::set_fallback_tlb_ordering_mode(const std::string& fallback_tlb, uint64_t ordering) {
log_assert(
ordering == TLB_DATA::Strict || ordering == TLB_DATA::Posted || ordering == TLB_DATA::Relaxed,
Expand Down Expand Up @@ -2932,6 +2968,16 @@ void Cluster::l1_membar(
}
}

void Cluster::l1_membar(
const chip_id_t chip, const std::unordered_set<tt::umd::CoreCoord>& cores, const std::string& fallback_tlb) {
pjanevskiTT marked this conversation as resolved.
Show resolved Hide resolved
std::unordered_set<tt_xy_pair> cores_xy;
for (const auto& core : cores) {
const CoreCoord virtual_core = translate_chip_coord(chip, core, CoordSystem::VIRTUAL);
cores_xy.insert({virtual_core.x, virtual_core.y});
}
l1_membar(chip, fallback_tlb, cores_xy);
}

void Cluster::dram_membar(
const chip_id_t chip, const std::string& fallback_tlb, const std::unordered_set<tt_xy_pair>& cores) {
if (cluster_desc->is_chip_mmio_capable(chip)) {
Expand All @@ -2950,6 +2996,16 @@ void Cluster::dram_membar(
}
}

void Cluster::dram_membar(
const chip_id_t chip, const std::unordered_set<tt::umd::CoreCoord>& cores, const std::string& fallback_tlb) {
std::unordered_set<tt_xy_pair> cores_xy;
for (const auto& core : cores) {
const CoreCoord virtual_core = translate_chip_coord(chip, core, CoordSystem::VIRTUAL);
cores_xy.insert({virtual_core.x, virtual_core.y});
}
dram_membar(chip, fallback_tlb, cores_xy);
}

void Cluster::dram_membar(
const chip_id_t chip, const std::string& fallback_tlb, const std::unordered_set<uint32_t>& channels) {
if (cluster_desc->is_chip_mmio_capable(chip)) {
Expand Down Expand Up @@ -2987,6 +3043,17 @@ void Cluster::write_to_device(
}
}

void Cluster::write_to_device(
const void* mem_ptr,
uint32_t size_in_bytes,
chip_id_t chip,
CoreCoord core,
uint64_t addr,
const std::string& tlb_to_use) {
CoreCoord virtual_coord = translate_chip_coord(chip, core, CoordSystem::VIRTUAL);
write_to_device(mem_ptr, size_in_bytes, {(size_t)chip, virtual_coord}, addr, tlb_to_use);
}

void Cluster::read_mmio_device_register(
void* mem_ptr, tt_cxy_pair core, uint64_t addr, uint32_t size, const std::string& fallback_tlb) {
TTDevice* tt_device = get_tt_device(core.chip);
Expand Down Expand Up @@ -3046,6 +3113,12 @@ void Cluster::read_from_device(
}
}

void Cluster::read_from_device(
void* mem_ptr, chip_id_t chip, CoreCoord core, uint64_t addr, uint32_t size, const std::string& fallback_tlb) {
CoreCoord virtual_coord = translate_chip_coord(chip, core, CoordSystem::VIRTUAL);
read_from_device(mem_ptr, {(size_t)chip, virtual_coord}, addr, size, fallback_tlb);
}

int Cluster::arc_msg(
int logical_device_id,
uint32_t msg_code,
Expand Down Expand Up @@ -3329,4 +3402,9 @@ void Cluster::set_barrier_address_params(const barrier_address_params& barrier_a
dram_address_params.DRAM_BARRIER_BASE = barrier_address_params_.dram_barrier_base;
}

tt::umd::CoreCoord Cluster::translate_chip_coord(
const chip_id_t chip, const tt::umd::CoreCoord core_coord, const CoordSystem coord_system) const {
return get_soc_descriptor(chip).translate_coord_to(core_coord, coord_system);
}

} // namespace tt::umd
5 changes: 4 additions & 1 deletion device/coordinate_manager.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -81,11 +81,14 @@ void CoordinateManager::identity_map_physical_cores() {
}
}

CoreCoord CoordinateManager::to(const CoreCoord core_coord, const CoordSystem coord_system) {
CoreCoord CoordinateManager::translate_coord_to(const CoreCoord core_coord, const CoordSystem coord_system) {
return from_physical_map.at({to_physical_map.at(core_coord), coord_system});
}

void CoordinateManager::translate_tensix_coords() {
if (CoordinateManager::get_num_harvested(tensix_harvesting_mask) > tensix_grid_size.y) {
tensix_harvesting_mask = 0;
}
size_t num_harvested_y = CoordinateManager::get_num_harvested(tensix_harvesting_mask);
size_t grid_size_x = tensix_grid_size.x;
size_t grid_size_y = tensix_grid_size.y;
Expand Down
16 changes: 16 additions & 0 deletions device/tt_core_coordinates.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
// SPDX-FileCopyrightText: (c) 2024 Tenstorrent Inc.
//
// SPDX-License-Identifier: Apache-2.0

#include "umd/device/tt_core_coordinates.h"

namespace std {
std::size_t operator()(const CoreCoord& core_coord) const {
size_t seed = 0;
seed = std::hash<size_t>{}(core_coord.x) + 0x9e3779b9 + (seed << 6) + (seed >> 2);
seed = std::hash<size_t>{}(core_coord.y) + 0x9e3779b9 + (seed << 6) + (seed >> 2);
seed = std::hash<CoreType>{}(core_coord.core_type) + 0x9e3779b9 + (seed << 6) + (seed >> 2);
seed = std::hash<CoordSystem>{}(core_coord.coord_system) + 0x9e3779b9 + (seed << 6) + (seed >> 2);
return seed;
}
} // namespace std
7 changes: 4 additions & 3 deletions device/tt_soc_descriptor.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -211,8 +211,9 @@ void tt_SocDescriptor::create_coordinate_manager(
get_cores_and_grid_size_from_coordinate_manager();
}

tt::umd::CoreCoord tt_SocDescriptor::to(const tt::umd::CoreCoord core_coord, const CoordSystem coord_system) const {
return coordinate_manager->to(core_coord, coord_system);
tt::umd::CoreCoord tt_SocDescriptor::translate_coord_to(
const tt::umd::CoreCoord core_coord, const CoordSystem coord_system) const {
return coordinate_manager->translate_coord_to(core_coord, coord_system);
}

tt_SocDescriptor::tt_SocDescriptor(
Expand Down Expand Up @@ -269,7 +270,7 @@ tt_xy_pair tt_SocDescriptor::get_core_for_dram_channel(int dram_chan, int subcha

CoreCoord tt_SocDescriptor::get_dram_core_for_channel(int dram_chan, int subchannel) const {
const CoreCoord logical_dram_coord = CoreCoord(dram_chan, subchannel, CoreType::DRAM, CoordSystem::LOGICAL);
return to(logical_dram_coord, CoordSystem::PHYSICAL);
return translate_coord_to(logical_dram_coord, CoordSystem::PHYSICAL);
}

bool tt_SocDescriptor::is_ethernet_core(const tt_xy_pair &core) const {
Expand Down
Loading
Loading