Skip to content

Commit

Permalink
Implement Cluster CoreCoord API
Browse files Browse the repository at this point in the history
  • Loading branch information
pjanevskiTT committed Dec 17, 2024
1 parent 85bf5c4 commit 533c7a7
Show file tree
Hide file tree
Showing 18 changed files with 481 additions and 227 deletions.
213 changes: 152 additions & 61 deletions device/api/umd/device/cluster.h

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion device/api/umd/device/coordinate_manager.h
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ class CoordinateManager {

CoordinateManager(CoordinateManager& other) = default;

tt::umd::CoreCoord to(const tt::umd::CoreCoord core_coord, const CoordSystem coord_system);
tt::umd::CoreCoord translate_coord_to(const tt::umd::CoreCoord core_coord, const CoordSystem coord_system);

std::vector<tt::umd::CoreCoord> get_cores(const CoreType core_type) const;
tt_xy_pair get_grid_size(const CoreType core_type) const;
Expand Down
5 changes: 5 additions & 0 deletions device/api/umd/device/tt_core_coordinates.h
Original file line number Diff line number Diff line change
Expand Up @@ -83,3 +83,8 @@ struct CoreCoord : public tt_xy_pair {
};

} // namespace tt::umd

template <>
struct std::hash<tt::umd::CoreCoord> {
std::size_t operator()(const tt::umd::CoreCoord& core_range) const;
};
2 changes: 1 addition & 1 deletion device/api/umd/device/tt_soc_descriptor.h
Original file line number Diff line number Diff line change
Expand Up @@ -90,7 +90,7 @@ class tt_SocDescriptor {
harvested_grid_size_map(other.harvested_grid_size_map) {}

// CoreCoord conversions.
tt::umd::CoreCoord to(const tt::umd::CoreCoord core_coord, const CoordSystem coord_system) const;
tt::umd::CoreCoord translate_coord_to(const tt::umd::CoreCoord core_coord, const CoordSystem coord_system) const;

static std::string get_soc_descriptor_path(tt::ARCH arch);

Expand Down
109 changes: 107 additions & 2 deletions device/cluster.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@
#include <utility>
#include <vector>

#include "api/umd/device/tt_core_coordinates.h"
#include "logger.hpp"
#include "umd/device/architecture_implementation.h"
#include "umd/device/chip/local_chip.h"
Expand Down Expand Up @@ -498,8 +499,8 @@ std::unique_ptr<Chip> Cluster::construct_chip_from_cluster(chip_id_t chip_id, tt
// true.
// TODO: This should be changed, harvesting should be done in tt_socdescriptor's constructor and not as part of
// cluster class.
// uint32_t harvesting_info = cluster_desc->get_harvesting_info().at(chip_id);
tt_SocDescriptor soc_desc = tt_SocDescriptor(soc_desc_path /*, harvesting_info*/);
uint32_t tensix_harvesting_mask = cluster_desc->get_harvesting_info().at(chip_id);
tt_SocDescriptor soc_desc = tt_SocDescriptor(soc_desc_path, tensix_harvesting_mask /*, harvesting_info*/);
return construct_chip_from_cluster(chip_id, cluster_desc, soc_desc);
}

Expand Down Expand Up @@ -678,6 +679,17 @@ void Cluster::configure_active_ethernet_cores_for_mmio_device(
non_mmio_transfer_cores_customized = true;
}

void Cluster::configure_active_ethernet_cores_for_mmio_device(
chip_id_t mmio_chip, const std::unordered_set<CoreCoord>& active_eth_cores_per_chip) {
std::unordered_set<tt_xy_pair> active_eth_cores_xy;
for (const auto& core : active_eth_cores_per_chip) {
CoreCoord virtual_coord = translate_coord_to(mmio_chip, core, CoordSystem::VIRTUAL);
active_eth_cores_xy.insert(tt_xy_pair(virtual_coord.x, virtual_coord.y));
}

configure_active_ethernet_cores_for_mmio_device(mmio_chip, active_eth_cores_xy);
}

void Cluster::populate_cores() {
std::uint32_t count = 0;
for (const auto& [chip_id, chip] : chips_) {
Expand Down Expand Up @@ -1000,6 +1012,15 @@ void Cluster::deassert_risc_reset_at_core(tt_cxy_pair core, const TensixSoftRese
}
}

void Cluster::deassert_risc_reset_at_core(
const chip_id_t chip, const CoreCoord core, const TensixSoftResetOptions& soft_resets) {
tt_cxy_pair virtual_core;
const CoreCoord virtual_coord = translate_coord_to(chip, core, CoordSystem::VIRTUAL);
virtual_core.x = virtual_coord.x;
virtual_core.y = virtual_coord.y;
deassert_risc_reset_at_core(virtual_core, soft_resets);
}

void Cluster::assert_risc_reset_at_core(tt_cxy_pair core) {
// Get Target Device to query soc descriptor and determine location in cluster
std::uint32_t target_device = core.chip;
Expand All @@ -1023,6 +1044,14 @@ void Cluster::assert_risc_reset_at_core(tt_cxy_pair core) {
}
}

void Cluster::assert_risc_reset_at_core(const chip_id_t chip, const CoreCoord core) {
tt_cxy_pair virtual_core;
const CoreCoord virtual_coord = translate_coord_to(chip, core, CoordSystem::VIRTUAL);
virtual_core.x = virtual_coord.x;
virtual_core.y = virtual_coord.y;
assert_risc_reset_at_core(virtual_core);
}

// Free memory during teardown, and remove (clean/unlock) from any leftover mutexes.
void Cluster::cleanup_shared_host_state() {
for (auto& mutex : hardware_resource_mutex_map) {
Expand Down Expand Up @@ -1097,6 +1126,14 @@ tt::Writer Cluster::get_static_tlb_writer(tt_cxy_pair target) {
return tt::Writer(base + tlb_offset, tlb_size);
}

tt::Writer Cluster::get_static_tlb_writer(const chip_id_t chip, const CoreCoord target) {
tt_cxy_pair virtual_core;
const CoreCoord virtual_coord = translate_coord_to(chip, target, CoordSystem::VIRTUAL);
virtual_core.x = virtual_coord.x;
virtual_core.y = virtual_coord.y;
return get_static_tlb_writer(virtual_core);
}

void Cluster::write_device_memory(
const void* mem_ptr,
uint32_t size_in_bytes,
Expand Down Expand Up @@ -1367,6 +1404,15 @@ std::optional<std::tuple<uint32_t, uint32_t>> Cluster::get_tlb_data_from_target(
return tlb_data;
}

std::optional<std::tuple<uint32_t, uint32_t>> Cluster::get_tlb_data_from_target(const chip_id_t chip, CoreCoord core) {
tt_cxy_pair virtual_core;
const CoreCoord virtual_coord = translate_coord_to(chip, core, CoordSystem::VIRTUAL);
virtual_core.chip = chip;
virtual_core.x = virtual_coord.x;
virtual_core.y = virtual_coord.y;
return get_tlb_data_from_target(virtual_core);
}

void Cluster::configure_tlb(
chip_id_t logical_device_id, tt_xy_pair core, int32_t tlb_index, uint64_t address, uint64_t ordering) {
log_assert(
Expand All @@ -1381,6 +1427,15 @@ void Cluster::configure_tlb(
tlb_config_map[logical_device_id].insert({tlb_index, (address / tlb_size) * tlb_size});
}

void Cluster::configure_tlb(
chip_id_t logical_device_id, tt::umd::CoreCoord core, int32_t tlb_index, uint64_t address, uint64_t ordering) {
tt_xy_pair virtual_core;
const CoreCoord virtual_coord = translate_coord_to(logical_device_id, core, CoordSystem::VIRTUAL);
virtual_core.x = virtual_coord.x;
virtual_core.y = virtual_coord.y;
configure_tlb(logical_device_id, virtual_core, tlb_index, address, ordering);
}

void Cluster::set_fallback_tlb_ordering_mode(const std::string& fallback_tlb, uint64_t ordering) {
log_assert(
ordering == TLB_DATA::Strict || ordering == TLB_DATA::Posted || ordering == TLB_DATA::Relaxed,
Expand Down Expand Up @@ -1715,6 +1770,11 @@ uint32_t Cluster::get_harvested_noc_rows_for_chip(int logical_device_id) {
return get_harvested_noc_rows(get_harvested_rows(logical_device_id));
}

CoreCoord Cluster::translate_coord_to(
const chip_id_t chip, const CoreCoord core_coord, const CoordSystem coord_system) {
return get_soc_descriptor(chip).translate_coord_to(core_coord, coord_system);
}

void Cluster::enable_local_ethernet_queue(const chip_id_t& device_id, int timeout) {
uint32_t msg_success = 0x0;
auto timeout_seconds = std::chrono::seconds(timeout);
Expand Down Expand Up @@ -2967,6 +3027,16 @@ void Cluster::l1_membar(
}
}

void Cluster::l1_membar(
const chip_id_t chip, const std::string& fallback_tlb, const std::unordered_set<tt::umd::CoreCoord>& cores) {
std::unordered_set<tt_xy_pair> cores_xy;
for (const auto& core : cores) {
const CoreCoord virtual_core = translate_coord_to(chip, core, CoordSystem::VIRTUAL);
cores_xy.insert({virtual_core.x, virtual_core.y});
}
l1_membar(chip, fallback_tlb, cores_xy);
}

void Cluster::dram_membar(
const chip_id_t chip, const std::string& fallback_tlb, const std::unordered_set<tt_xy_pair>& cores) {
if (cluster_desc->is_chip_mmio_capable(chip)) {
Expand All @@ -2985,6 +3055,16 @@ void Cluster::dram_membar(
}
}

void Cluster::dram_membar(
const chip_id_t chip, const std::string& fallback_tlb, const std::unordered_set<tt::umd::CoreCoord>& cores) {
std::unordered_set<tt_xy_pair> cores_xy;
for (const auto& core : cores) {
const CoreCoord virtual_core = translate_coord_to(chip, core, CoordSystem::VIRTUAL);
cores_xy.insert({virtual_core.x, virtual_core.y});
}
dram_membar(chip, fallback_tlb, cores_xy);
}

void Cluster::dram_membar(
const chip_id_t chip, const std::string& fallback_tlb, const std::unordered_set<uint32_t>& channels) {
if (cluster_desc->is_chip_mmio_capable(chip)) {
Expand Down Expand Up @@ -3022,6 +3102,21 @@ void Cluster::write_to_device(
}
}

void Cluster::write_to_device(
const void* mem_ptr,
uint32_t size_in_bytes,
chip_id_t chip,
CoreCoord core,
uint64_t addr,
const std::string& tlb_to_use) {
tt_cxy_pair virtual_core;
virtual_core.chip = chip;
CoreCoord virtual_coord = translate_coord_to(chip, core, CoordSystem::VIRTUAL);
virtual_core.x = virtual_coord.x;
virtual_core.y = virtual_coord.y;
write_to_device(mem_ptr, size_in_bytes, virtual_core, addr, tlb_to_use);
}

void Cluster::read_mmio_device_register(
void* mem_ptr, tt_cxy_pair core, uint64_t addr, uint32_t size, const std::string& fallback_tlb) {
TTDevice* tt_device = get_tt_device(core.chip);
Expand Down Expand Up @@ -3081,6 +3176,16 @@ void Cluster::read_from_device(
}
}

void Cluster::read_from_device(
void* mem_ptr, chip_id_t chip, CoreCoord core, uint64_t addr, uint32_t size, const std::string& fallback_tlb) {
tt_cxy_pair virtual_core;
virtual_core.chip = chip;
CoreCoord virtual_coord = translate_coord_to(chip, core, CoordSystem::VIRTUAL);
virtual_core.x = virtual_coord.x;
virtual_core.y = virtual_coord.y;
read_from_device(mem_ptr, virtual_core, addr, size, fallback_tlb);
}

int Cluster::arc_msg(
int logical_device_id,
uint32_t msg_code,
Expand Down
2 changes: 1 addition & 1 deletion device/coordinate_manager.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -81,7 +81,7 @@ void CoordinateManager::identity_map_physical_cores() {
}
}

CoreCoord CoordinateManager::to(const CoreCoord core_coord, const CoordSystem coord_system) {
CoreCoord CoordinateManager::translate_coord_to(const CoreCoord core_coord, const CoordSystem coord_system) {
return from_physical_map.at({to_physical_map.at(core_coord), coord_system});
}

Expand Down
7 changes: 4 additions & 3 deletions device/tt_soc_descriptor.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -211,8 +211,9 @@ void tt_SocDescriptor::create_coordinate_manager(
get_cores_and_grid_size_from_coordinate_manager();
}

tt::umd::CoreCoord tt_SocDescriptor::to(const tt::umd::CoreCoord core_coord, const CoordSystem coord_system) const {
return coordinate_manager->to(core_coord, coord_system);
tt::umd::CoreCoord tt_SocDescriptor::translate_coord_to(
const tt::umd::CoreCoord core_coord, const CoordSystem coord_system) const {
return coordinate_manager->translate_coord_to(core_coord, coord_system);
}

tt_SocDescriptor::tt_SocDescriptor(
Expand Down Expand Up @@ -269,7 +270,7 @@ tt_xy_pair tt_SocDescriptor::get_core_for_dram_channel(int dram_chan, int subcha

CoreCoord tt_SocDescriptor::get_dram_core_for_channel(int dram_chan, int subchannel) const {
const CoreCoord logical_dram_coord = CoreCoord(dram_chan, subchannel, CoreType::DRAM, CoordSystem::LOGICAL);
return to(logical_dram_coord, CoordSystem::PHYSICAL);
return translate_coord_to(logical_dram_coord, CoordSystem::PHYSICAL);
}

bool tt_SocDescriptor::is_ethernet_core(const tt_xy_pair &core) const {
Expand Down
Loading

0 comments on commit 533c7a7

Please sign in to comment.