Skip to content

Commit

Permalink
Implement Cluster CoreCoord API
Browse files Browse the repository at this point in the history
  • Loading branch information
pjanevskiTT committed Dec 16, 2024
1 parent 85bf5c4 commit f9833cd
Show file tree
Hide file tree
Showing 11 changed files with 351 additions and 130 deletions.
212 changes: 151 additions & 61 deletions device/api/umd/device/cluster.h

Large diffs are not rendered by default.

5 changes: 5 additions & 0 deletions device/api/umd/device/tt_core_coordinates.h
Original file line number Diff line number Diff line change
Expand Up @@ -83,3 +83,8 @@ struct CoreCoord : public tt_xy_pair {
};

} // namespace tt::umd

template <>
struct std::hash<tt::umd::CoreCoord> {
std::size_t operator()(const tt::umd::CoreCoord& core_range) const;
};
108 changes: 106 additions & 2 deletions device/cluster.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@
#include <utility>
#include <vector>

#include "api/umd/device/tt_core_coordinates.h"
#include "logger.hpp"
#include "umd/device/architecture_implementation.h"
#include "umd/device/chip/local_chip.h"
Expand Down Expand Up @@ -498,8 +499,8 @@ std::unique_ptr<Chip> Cluster::construct_chip_from_cluster(chip_id_t chip_id, tt
// true.
// TODO: This should be changed, harvesting should be done in tt_socdescriptor's constructor and not as part of
// cluster class.
// uint32_t harvesting_info = cluster_desc->get_harvesting_info().at(chip_id);
tt_SocDescriptor soc_desc = tt_SocDescriptor(soc_desc_path /*, harvesting_info*/);
uint32_t tensix_harvesting_mask = cluster_desc->get_harvesting_info().at(chip_id);
tt_SocDescriptor soc_desc = tt_SocDescriptor(soc_desc_path, tensix_harvesting_mask/*, harvesting_info*/);
return construct_chip_from_cluster(chip_id, cluster_desc, soc_desc);
}

Expand Down Expand Up @@ -678,6 +679,17 @@ void Cluster::configure_active_ethernet_cores_for_mmio_device(
non_mmio_transfer_cores_customized = true;
}

void Cluster::configure_active_ethernet_cores_for_mmio_device(
chip_id_t mmio_chip, const std::unordered_set<CoreCoord>& active_eth_cores_per_chip) {
std::unordered_set<tt_xy_pair> active_eth_cores_xy;
for (const auto& core : active_eth_cores_per_chip) {
CoreCoord virtual_coord = to(mmio_chip, core, CoordSystem::VIRTUAL);
active_eth_cores_xy.insert(tt_xy_pair(virtual_coord.x, virtual_coord.y));
}

configure_active_ethernet_cores_for_mmio_device(mmio_chip, active_eth_cores_xy);
}

void Cluster::populate_cores() {
std::uint32_t count = 0;
for (const auto& [chip_id, chip] : chips_) {
Expand Down Expand Up @@ -1000,6 +1012,15 @@ void Cluster::deassert_risc_reset_at_core(tt_cxy_pair core, const TensixSoftRese
}
}

void Cluster::deassert_risc_reset_at_core(
const chip_id_t chip, const CoreCoord core, const TensixSoftResetOptions& soft_resets) {
tt_cxy_pair virtual_core;
const CoreCoord virtual_coord = to(chip, core, CoordSystem::VIRTUAL);
virtual_core.x = virtual_coord.x;
virtual_core.y = virtual_coord.y;
deassert_risc_reset_at_core(virtual_core, soft_resets);
}

void Cluster::assert_risc_reset_at_core(tt_cxy_pair core) {
// Get Target Device to query soc descriptor and determine location in cluster
std::uint32_t target_device = core.chip;
Expand All @@ -1023,6 +1044,14 @@ void Cluster::assert_risc_reset_at_core(tt_cxy_pair core) {
}
}

void Cluster::assert_risc_reset_at_core(const chip_id_t chip, const CoreCoord core) {
tt_cxy_pair virtual_core;
const CoreCoord virtual_coord = to(chip, core, CoordSystem::VIRTUAL);
virtual_core.x = virtual_coord.x;
virtual_core.y = virtual_coord.y;
assert_risc_reset_at_core(virtual_core);
}

// Free memory during teardown, and remove (clean/unlock) from any leftover mutexes.
void Cluster::cleanup_shared_host_state() {
for (auto& mutex : hardware_resource_mutex_map) {
Expand Down Expand Up @@ -1097,6 +1126,14 @@ tt::Writer Cluster::get_static_tlb_writer(tt_cxy_pair target) {
return tt::Writer(base + tlb_offset, tlb_size);
}

tt::Writer Cluster::get_static_tlb_writer(const chip_id_t chip, const CoreCoord target) {
tt_cxy_pair virtual_core;
const CoreCoord virtual_coord = to(chip, target, CoordSystem::VIRTUAL);
virtual_core.x = virtual_coord.x;
virtual_core.y = virtual_coord.y;
return get_static_tlb_writer(virtual_core);
}

void Cluster::write_device_memory(
const void* mem_ptr,
uint32_t size_in_bytes,
Expand Down Expand Up @@ -1367,6 +1404,15 @@ std::optional<std::tuple<uint32_t, uint32_t>> Cluster::get_tlb_data_from_target(
return tlb_data;
}

std::optional<std::tuple<uint32_t, uint32_t>> Cluster::get_tlb_data_from_target(const chip_id_t chip, CoreCoord core) {
tt_cxy_pair virtual_core;
const CoreCoord virtual_coord = to(chip, core, CoordSystem::VIRTUAL);
virtual_core.chip = chip;
virtual_core.x = virtual_coord.x;
virtual_core.y = virtual_coord.y;
return get_tlb_data_from_target(virtual_core);
}

void Cluster::configure_tlb(
chip_id_t logical_device_id, tt_xy_pair core, int32_t tlb_index, uint64_t address, uint64_t ordering) {
log_assert(
Expand All @@ -1381,6 +1427,15 @@ void Cluster::configure_tlb(
tlb_config_map[logical_device_id].insert({tlb_index, (address / tlb_size) * tlb_size});
}

void Cluster::configure_tlb(
chip_id_t logical_device_id, tt::umd::CoreCoord core, int32_t tlb_index, uint64_t address, uint64_t ordering) {
tt_xy_pair virtual_core;
const CoreCoord virtual_coord = to(logical_device_id, core, CoordSystem::VIRTUAL);
virtual_core.x = virtual_coord.x;
virtual_core.y = virtual_coord.y;
configure_tlb(logical_device_id, virtual_core, tlb_index, address, ordering);
}

void Cluster::set_fallback_tlb_ordering_mode(const std::string& fallback_tlb, uint64_t ordering) {
log_assert(
ordering == TLB_DATA::Strict || ordering == TLB_DATA::Posted || ordering == TLB_DATA::Relaxed,
Expand Down Expand Up @@ -1715,6 +1770,10 @@ uint32_t Cluster::get_harvested_noc_rows_for_chip(int logical_device_id) {
return get_harvested_noc_rows(get_harvested_rows(logical_device_id));
}

CoreCoord Cluster::to(const chip_id_t chip, const CoreCoord core_coord, const CoordSystem coord_system) {
return get_soc_descriptor(chip).to(core_coord, coord_system);
}

void Cluster::enable_local_ethernet_queue(const chip_id_t& device_id, int timeout) {
uint32_t msg_success = 0x0;
auto timeout_seconds = std::chrono::seconds(timeout);
Expand Down Expand Up @@ -2967,6 +3026,16 @@ void Cluster::l1_membar(
}
}

void Cluster::l1_membar(
const chip_id_t chip, const std::string& fallback_tlb, const std::unordered_set<tt::umd::CoreCoord>& cores) {
std::unordered_set<tt_xy_pair> cores_xy;
for (const auto& core : cores) {
const CoreCoord virtual_core = to(chip, core, CoordSystem::VIRTUAL);
cores_xy.insert({virtual_core.x, virtual_core.y});
}
l1_membar(chip, fallback_tlb, cores_xy);
}

void Cluster::dram_membar(
const chip_id_t chip, const std::string& fallback_tlb, const std::unordered_set<tt_xy_pair>& cores) {
if (cluster_desc->is_chip_mmio_capable(chip)) {
Expand All @@ -2985,6 +3054,16 @@ void Cluster::dram_membar(
}
}

void Cluster::dram_membar(
const chip_id_t chip, const std::string& fallback_tlb, const std::unordered_set<tt::umd::CoreCoord>& cores) {
std::unordered_set<tt_xy_pair> cores_xy;
for (const auto& core : cores) {
const CoreCoord virtual_core = to(chip, core, CoordSystem::VIRTUAL);
cores_xy.insert({virtual_core.x, virtual_core.y});
}
dram_membar(chip, fallback_tlb, cores_xy);
}

void Cluster::dram_membar(
const chip_id_t chip, const std::string& fallback_tlb, const std::unordered_set<uint32_t>& channels) {
if (cluster_desc->is_chip_mmio_capable(chip)) {
Expand Down Expand Up @@ -3022,6 +3101,21 @@ void Cluster::write_to_device(
}
}

void Cluster::write_to_device(
const void* mem_ptr,
uint32_t size_in_bytes,
chip_id_t chip,
CoreCoord core,
uint64_t addr,
const std::string& tlb_to_use) {
tt_cxy_pair virtual_core;
virtual_core.chip = chip;
CoreCoord virtual_coord = to(chip, core, CoordSystem::VIRTUAL);
virtual_core.x = virtual_coord.x;
virtual_core.y = virtual_coord.y;
write_to_device(mem_ptr, size_in_bytes, virtual_core, addr, tlb_to_use);
}

void Cluster::read_mmio_device_register(
void* mem_ptr, tt_cxy_pair core, uint64_t addr, uint32_t size, const std::string& fallback_tlb) {
TTDevice* tt_device = get_tt_device(core.chip);
Expand Down Expand Up @@ -3081,6 +3175,16 @@ void Cluster::read_from_device(
}
}

void Cluster::read_from_device(
void* mem_ptr, chip_id_t chip, CoreCoord core, uint64_t addr, uint32_t size, const std::string& fallback_tlb) {
tt_cxy_pair virtual_core;
virtual_core.chip = chip;
CoreCoord virtual_coord = to(chip, core, CoordSystem::VIRTUAL);
virtual_core.x = virtual_coord.x;
virtual_core.y = virtual_coord.y;
read_from_device(mem_ptr, virtual_core, addr, size, fallback_tlb);
}

int Cluster::arc_msg(
int logical_device_id,
uint32_t msg_code,
Expand Down
Loading

0 comments on commit f9833cd

Please sign in to comment.