Skip to content

Commit

Permalink
Implement Cluster CoreCoord API (#401)
Browse files Browse the repository at this point in the history
  • Loading branch information
pjanevskiTT authored Dec 20, 2024
1 parent 2920dd4 commit e092e10
Show file tree
Hide file tree
Showing 14 changed files with 471 additions and 225 deletions.
211 changes: 153 additions & 58 deletions device/api/umd/device/cluster.h

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion device/api/umd/device/coordinate_manager.h
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ class CoordinateManager {

CoordinateManager(CoordinateManager& other) = default;

tt::umd::CoreCoord to(const tt::umd::CoreCoord core_coord, const CoordSystem coord_system);
tt::umd::CoreCoord translate_coord_to(const tt::umd::CoreCoord core_coord, const CoordSystem coord_system);

std::vector<tt::umd::CoreCoord> get_cores(const CoreType core_type) const;
tt_xy_pair get_grid_size(const CoreType core_type) const;
Expand Down
7 changes: 7 additions & 0 deletions device/api/umd/device/tt_core_coordinates.h
Original file line number Diff line number Diff line change
Expand Up @@ -83,3 +83,10 @@ struct CoreCoord : public tt_xy_pair {
};

} // namespace tt::umd

namespace std {
template <>
struct hash<tt::umd::CoreCoord> {
size_t operator()(const tt::umd::CoreCoord& core_coord) const;
};
} // namespace std
2 changes: 1 addition & 1 deletion device/api/umd/device/tt_soc_descriptor.h
Original file line number Diff line number Diff line change
Expand Up @@ -90,7 +90,7 @@ class tt_SocDescriptor {
harvested_grid_size_map(other.harvested_grid_size_map) {}

// CoreCoord conversions.
tt::umd::CoreCoord to(const tt::umd::CoreCoord core_coord, const CoordSystem coord_system) const;
tt::umd::CoreCoord translate_coord_to(const tt::umd::CoreCoord core_coord, const CoordSystem coord_system) const;

static std::string get_soc_descriptor_path(tt::ARCH arch);

Expand Down
3 changes: 3 additions & 0 deletions device/blackhole/blackhole_coordinate_manager.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,9 @@ BlackholeCoordinateManager::BlackholeCoordinateManager(
}

void BlackholeCoordinateManager::translate_tensix_coords() {
if (CoordinateManager::get_num_harvested(tensix_harvesting_mask) > tensix_grid_size.x) {
tensix_harvesting_mask = 0;
}
size_t num_harvested_x = CoordinateManager::get_num_harvested(tensix_harvesting_mask);
size_t grid_size_x = tensix_grid_size.x;
size_t grid_size_y = tensix_grid_size.y;
Expand Down
92 changes: 85 additions & 7 deletions device/cluster.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@
#include <utility>
#include <vector>

#include "api/umd/device/tt_core_coordinates.h"
#include "logger.hpp"
#include "umd/device/architecture_implementation.h"
#include "umd/device/chip/local_chip.h"
Expand Down Expand Up @@ -503,8 +504,8 @@ std::unique_ptr<Chip> Cluster::construct_chip_from_cluster(chip_id_t chip_id, tt
// true.
// TODO: This should be changed, harvesting should be done in tt_socdescriptor's constructor and not as part of
// cluster class.
// uint32_t harvesting_info = cluster_desc->get_harvesting_info().at(chip_id);
tt_SocDescriptor soc_desc = tt_SocDescriptor(soc_desc_path /*, harvesting_info*/);
uint32_t tensix_harvesting_mask = cluster_desc->get_harvesting_info().at(chip_id);
tt_SocDescriptor soc_desc = tt_SocDescriptor(soc_desc_path, tensix_harvesting_mask /*, harvesting_info*/);
return construct_chip_from_cluster(chip_id, cluster_desc, soc_desc);
}

Expand Down Expand Up @@ -589,11 +590,8 @@ Cluster::Cluster(
"Target device {} not present in current cluster!",
chip_id);

// Note that initially soc_descriptors are not harvested, but will be harvested later if perform_harvesting is
// true.
// TODO: This should be changed, harvesting should be done in tt_socdescriptor's constructor and not as part of
// cluster class.
tt_SocDescriptor soc_desc = tt_SocDescriptor(sdesc_path);
size_t tensix_harvesting_mask = cluster_desc->get_harvesting_info().at(chip_id);
tt_SocDescriptor soc_desc = tt_SocDescriptor(sdesc_path, tensix_harvesting_mask);
log_assert(
cluster_desc->get_arch(chip_id) == soc_desc.arch,
"Passed soc descriptor has {} arch, but for chip id {} has arch {}",
Expand Down Expand Up @@ -683,6 +681,17 @@ void Cluster::configure_active_ethernet_cores_for_mmio_device(
non_mmio_transfer_cores_customized = true;
}

void Cluster::configure_active_ethernet_cores_for_mmio_device(
const std::unordered_set<CoreCoord>& active_eth_cores_per_chip, chip_id_t mmio_chip) {
std::unordered_set<tt_xy_pair> active_eth_cores_xy;
for (const auto& core : active_eth_cores_per_chip) {
CoreCoord virtual_coord = translate_chip_coord(mmio_chip, core, CoordSystem::VIRTUAL);
active_eth_cores_xy.insert(virtual_coord);
}

configure_active_ethernet_cores_for_mmio_device(mmio_chip, active_eth_cores_xy);
}

void Cluster::populate_cores() {
std::uint32_t count = 0;
for (const auto& [chip_id, chip] : chips_) {
Expand Down Expand Up @@ -996,6 +1005,12 @@ void Cluster::deassert_risc_reset_at_core(tt_cxy_pair core, const TensixSoftRese
}
}

void Cluster::deassert_risc_reset_at_core(
const chip_id_t chip, const CoreCoord core, const TensixSoftResetOptions& soft_resets) {
const CoreCoord virtual_coord = translate_chip_coord(chip, core, CoordSystem::VIRTUAL);
deassert_risc_reset_at_core({(size_t)chip, virtual_coord}, soft_resets);
}

void Cluster::assert_risc_reset_at_core(tt_cxy_pair core) {
// Get Target Device to query soc descriptor and determine location in cluster
std::uint32_t target_device = core.chip;
Expand All @@ -1016,6 +1031,11 @@ void Cluster::assert_risc_reset_at_core(tt_cxy_pair core) {
}
}

void Cluster::assert_risc_reset_at_core(const chip_id_t chip, const CoreCoord core) {
const CoreCoord virtual_coord = translate_chip_coord(chip, core, CoordSystem::VIRTUAL);
assert_risc_reset_at_core({(size_t)chip, virtual_coord});
}

// Free memory during teardown, and remove (clean/unlock) from any leftover mutexes.
void Cluster::cleanup_shared_host_state() {
for (auto& mutex : hardware_resource_mutex_map) {
Expand Down Expand Up @@ -1078,6 +1098,11 @@ tt::Writer Cluster::get_static_tlb_writer(tt_cxy_pair target) {
return tt::Writer(base + tlb_offset, tlb_size);
}

tt::Writer Cluster::get_static_tlb_writer(const chip_id_t chip, const CoreCoord target) {
const CoreCoord virtual_coord = translate_chip_coord(chip, target, CoordSystem::VIRTUAL);
return get_static_tlb_writer({(size_t)chip, virtual_coord});
}

void Cluster::write_device_memory(
const void* mem_ptr,
uint32_t size_in_bytes,
Expand Down Expand Up @@ -1323,6 +1348,11 @@ std::optional<std::tuple<uint32_t, uint32_t>> Cluster::get_tlb_data_from_target(
return get_tt_device(target.chip)->get_architecture_implementation()->describe_tlb(tlb_index);
}

std::optional<std::tuple<uint32_t, uint32_t>> Cluster::get_tlb_data_from_target(const chip_id_t chip, CoreCoord core) {
const CoreCoord virtual_coord = translate_chip_coord(chip, core, CoordSystem::VIRTUAL);
return get_tlb_data_from_target({(size_t)chip, virtual_coord});
}

void Cluster::configure_tlb(
chip_id_t logical_device_id, tt_xy_pair core, int32_t tlb_index, uint64_t address, uint64_t ordering) {
log_assert(
Expand Down Expand Up @@ -1353,6 +1383,12 @@ void Cluster::configure_tlb(
map_core_to_tlb_per_chip.at(logical_device_id).insert({core, tlb_index});
}

void Cluster::configure_tlb(
chip_id_t logical_device_id, tt::umd::CoreCoord core, int32_t tlb_index, uint64_t address, uint64_t ordering) {
const CoreCoord virtual_coord = translate_chip_coord(logical_device_id, core, CoordSystem::VIRTUAL);
configure_tlb(logical_device_id, {virtual_coord.x, virtual_coord.y}, tlb_index, address, ordering);
}

void Cluster::set_fallback_tlb_ordering_mode(const std::string& fallback_tlb, uint64_t ordering) {
log_assert(
ordering == TLB_DATA::Strict || ordering == TLB_DATA::Posted || ordering == TLB_DATA::Relaxed,
Expand Down Expand Up @@ -2932,6 +2968,16 @@ void Cluster::l1_membar(
}
}

void Cluster::l1_membar(
const chip_id_t chip, const std::unordered_set<tt::umd::CoreCoord>& cores, const std::string& fallback_tlb) {
std::unordered_set<tt_xy_pair> cores_xy;
for (const auto& core : cores) {
const CoreCoord virtual_core = translate_chip_coord(chip, core, CoordSystem::VIRTUAL);
cores_xy.insert({virtual_core.x, virtual_core.y});
}
l1_membar(chip, fallback_tlb, cores_xy);
}

void Cluster::dram_membar(
const chip_id_t chip, const std::string& fallback_tlb, const std::unordered_set<tt_xy_pair>& cores) {
if (cluster_desc->is_chip_mmio_capable(chip)) {
Expand All @@ -2950,6 +2996,16 @@ void Cluster::dram_membar(
}
}

void Cluster::dram_membar(
const chip_id_t chip, const std::unordered_set<tt::umd::CoreCoord>& cores, const std::string& fallback_tlb) {
std::unordered_set<tt_xy_pair> cores_xy;
for (const auto& core : cores) {
const CoreCoord virtual_core = translate_chip_coord(chip, core, CoordSystem::VIRTUAL);
cores_xy.insert({virtual_core.x, virtual_core.y});
}
dram_membar(chip, fallback_tlb, cores_xy);
}

void Cluster::dram_membar(
const chip_id_t chip, const std::string& fallback_tlb, const std::unordered_set<uint32_t>& channels) {
if (cluster_desc->is_chip_mmio_capable(chip)) {
Expand Down Expand Up @@ -2987,6 +3043,17 @@ void Cluster::write_to_device(
}
}

void Cluster::write_to_device(
const void* mem_ptr,
uint32_t size_in_bytes,
chip_id_t chip,
CoreCoord core,
uint64_t addr,
const std::string& tlb_to_use) {
CoreCoord virtual_coord = translate_chip_coord(chip, core, CoordSystem::VIRTUAL);
write_to_device(mem_ptr, size_in_bytes, {(size_t)chip, virtual_coord}, addr, tlb_to_use);
}

void Cluster::read_mmio_device_register(
void* mem_ptr, tt_cxy_pair core, uint64_t addr, uint32_t size, const std::string& fallback_tlb) {
TTDevice* tt_device = get_tt_device(core.chip);
Expand Down Expand Up @@ -3046,6 +3113,12 @@ void Cluster::read_from_device(
}
}

void Cluster::read_from_device(
void* mem_ptr, chip_id_t chip, CoreCoord core, uint64_t addr, uint32_t size, const std::string& fallback_tlb) {
CoreCoord virtual_coord = translate_chip_coord(chip, core, CoordSystem::VIRTUAL);
read_from_device(mem_ptr, {(size_t)chip, virtual_coord}, addr, size, fallback_tlb);
}

int Cluster::arc_msg(
int logical_device_id,
uint32_t msg_code,
Expand Down Expand Up @@ -3329,4 +3402,9 @@ void Cluster::set_barrier_address_params(const barrier_address_params& barrier_a
dram_address_params.DRAM_BARRIER_BASE = barrier_address_params_.dram_barrier_base;
}

tt::umd::CoreCoord Cluster::translate_chip_coord(
const chip_id_t chip, const tt::umd::CoreCoord core_coord, const CoordSystem coord_system) const {
return get_soc_descriptor(chip).translate_coord_to(core_coord, coord_system);
}

} // namespace tt::umd
5 changes: 4 additions & 1 deletion device/coordinate_manager.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -81,11 +81,14 @@ void CoordinateManager::identity_map_physical_cores() {
}
}

CoreCoord CoordinateManager::to(const CoreCoord core_coord, const CoordSystem coord_system) {
CoreCoord CoordinateManager::translate_coord_to(const CoreCoord core_coord, const CoordSystem coord_system) {
return from_physical_map.at({to_physical_map.at(core_coord), coord_system});
}

void CoordinateManager::translate_tensix_coords() {
if (CoordinateManager::get_num_harvested(tensix_harvesting_mask) > tensix_grid_size.y) {
tensix_harvesting_mask = 0;
}
size_t num_harvested_y = CoordinateManager::get_num_harvested(tensix_harvesting_mask);
size_t grid_size_x = tensix_grid_size.x;
size_t grid_size_y = tensix_grid_size.y;
Expand Down
16 changes: 16 additions & 0 deletions device/tt_core_coordinates.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
// SPDX-FileCopyrightText: (c) 2024 Tenstorrent Inc.
//
// SPDX-License-Identifier: Apache-2.0

#include "umd/device/tt_core_coordinates.h"

namespace std {
std::size_t operator()(const CoreCoord& core_coord) const {
size_t seed = 0;
seed = std::hash<size_t>{}(core_coord.x) + 0x9e3779b9 + (seed << 6) + (seed >> 2);
seed = std::hash<size_t>{}(core_coord.y) + 0x9e3779b9 + (seed << 6) + (seed >> 2);
seed = std::hash<CoreType>{}(core_coord.core_type) + 0x9e3779b9 + (seed << 6) + (seed >> 2);
seed = std::hash<CoordSystem>{}(core_coord.coord_system) + 0x9e3779b9 + (seed << 6) + (seed >> 2);
return seed;
}
} // namespace std
7 changes: 4 additions & 3 deletions device/tt_soc_descriptor.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -211,8 +211,9 @@ void tt_SocDescriptor::create_coordinate_manager(
get_cores_and_grid_size_from_coordinate_manager();
}

tt::umd::CoreCoord tt_SocDescriptor::to(const tt::umd::CoreCoord core_coord, const CoordSystem coord_system) const {
return coordinate_manager->to(core_coord, coord_system);
tt::umd::CoreCoord tt_SocDescriptor::translate_coord_to(
const tt::umd::CoreCoord core_coord, const CoordSystem coord_system) const {
return coordinate_manager->translate_coord_to(core_coord, coord_system);
}

tt_SocDescriptor::tt_SocDescriptor(
Expand Down Expand Up @@ -269,7 +270,7 @@ tt_xy_pair tt_SocDescriptor::get_core_for_dram_channel(int dram_chan, int subcha

CoreCoord tt_SocDescriptor::get_dram_core_for_channel(int dram_chan, int subchannel) const {
const CoreCoord logical_dram_coord = CoreCoord(dram_chan, subchannel, CoreType::DRAM, CoordSystem::LOGICAL);
return to(logical_dram_coord, CoordSystem::PHYSICAL);
return translate_coord_to(logical_dram_coord, CoordSystem::PHYSICAL);
}

bool tt_SocDescriptor::is_ethernet_core(const tt_xy_pair &core) const {
Expand Down
Loading

0 comments on commit e092e10

Please sign in to comment.