Skip to content

Commit

Permalink
Address comments
Browse files Browse the repository at this point in the history
  • Loading branch information
pjanevskiTT committed Dec 17, 2024
1 parent 354ca09 commit d66814b
Show file tree
Hide file tree
Showing 5 changed files with 53 additions and 40 deletions.
30 changes: 16 additions & 14 deletions device/api/umd/device/cluster.h
Original file line number Diff line number Diff line change
Expand Up @@ -109,7 +109,7 @@ class tt_device {
}

virtual void configure_active_ethernet_cores_for_mmio_device(
chip_id_t mmio_chip, const std::unordered_set<tt::umd::CoreCoord>& active_eth_cores_per_chip) {
const std::unordered_set<tt::umd::CoreCoord>& active_eth_cores_per_chip, chip_id_t mmio_chip) {
throw std::runtime_error(
"---- tt_device::configure_active_ethernet_cores_for_mmio_device is not implemented\n");
}
Expand Down Expand Up @@ -276,6 +276,11 @@ class tt_device {
throw std::runtime_error("---- tt_device::l1_membar is not implemented\n");
}

virtual void l1_membar(
const chip_id_t chip, const std::unordered_set<tt::umd::CoreCoord>& cores, const std::string& fallback_tlb) {
throw std::runtime_error("---- tt_device::l1_membar is not implemented\n");
}

virtual void dram_membar(
const chip_id_t chip, const std::string& fallback_tlb, const std::unordered_set<uint32_t>& channels = {}) {
throw std::runtime_error("---- tt_device::dram_membar is not implemented\n");
Expand All @@ -286,6 +291,11 @@ class tt_device {
throw std::runtime_error("---- tt_device::dram_membar is not implemented\n");
}

virtual void dram_membar(
const chip_id_t chip, const std::unordered_set<tt::umd::CoreCoord>& cores, const std::string& fallback_tlb) {
throw std::runtime_error("---- tt_device::dram_membar is not implemented\n");
}

// Misc. Functions to Query/Set Device State
/**
* Query post harvesting SOC descriptors from UMD in virtual coordinates.
Expand Down Expand Up @@ -450,12 +460,6 @@ class tt_device {
return soc_descriptor_per_chip.at(chip_id);
}

virtual tt::umd::CoreCoord translate_coord_to(
const chip_id_t chip, const tt::umd::CoreCoord core_coord, const CoordSystem coord_system) {
throw std::runtime_error("---- tt_device::to is not implemented\n");
return tt::umd::CoreCoord();
}

bool performed_harvesting = false;
std::unordered_map<chip_id_t, uint32_t> harvested_rows_per_target = {};
bool translation_tables_en = false;
Expand Down Expand Up @@ -705,14 +709,12 @@ class Cluster : public tt_device {
std::optional<std::tuple<uint32_t, uint32_t>> get_tlb_data_from_target(
const chip_id_t chip, const tt::umd::CoreCoord core);
tt::Writer get_static_tlb_writer(const chip_id_t chip, const tt::umd::CoreCoord target);
virtual tt::umd::CoreCoord translate_coord_to(
const chip_id_t chip, const tt::umd::CoreCoord core_coord, const CoordSystem coord_system);
virtual void configure_active_ethernet_cores_for_mmio_device(
chip_id_t mmio_chip, const std::unordered_set<tt::umd::CoreCoord>& active_eth_cores_per_chip);
void l1_membar(
const chip_id_t chip, const std::string& fallback_tlb, const std::unordered_set<tt::umd::CoreCoord>& cores);
void dram_membar(
const chip_id_t chip, const std::string& fallback_tlb, const std::unordered_set<tt::umd::CoreCoord>& cores);
const std::unordered_set<tt::umd::CoreCoord>& active_eth_cores_per_chip, chip_id_t mmio_chip);
virtual void l1_membar(
const chip_id_t chip, const std::unordered_set<tt::umd::CoreCoord>& cores, const std::string& fallback_tlb);
virtual void dram_membar(
const chip_id_t chip, const std::unordered_set<tt::umd::CoreCoord>& cores, const std::string& fallback_tlb);

// Destructor
virtual ~Cluster();
Expand Down
4 changes: 3 additions & 1 deletion device/api/umd/device/tt_core_coordinates.h
Original file line number Diff line number Diff line change
Expand Up @@ -84,7 +84,9 @@ struct CoreCoord : public tt_xy_pair {

} // namespace tt::umd

namespace std {
template <>
struct std::hash<tt::umd::CoreCoord> {
std::size_t operator()(const tt::umd::CoreCoord& core_range) const;
std::size_t operator()(const tt::umd::CoreCoord& core_coord) const;
};
} // namespace std
36 changes: 18 additions & 18 deletions device/cluster.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -689,10 +689,11 @@ void Cluster::configure_active_ethernet_cores_for_mmio_device(
}

void Cluster::configure_active_ethernet_cores_for_mmio_device(
chip_id_t mmio_chip, const std::unordered_set<CoreCoord>& active_eth_cores_per_chip) {
const std::unordered_set<CoreCoord>& active_eth_cores_per_chip, chip_id_t mmio_chip) {
std::unordered_set<tt_xy_pair> active_eth_cores_xy;
const tt_SocDescriptor& soc_desc = get_soc_descriptor(mmio_chip);
for (const auto& core : active_eth_cores_per_chip) {
CoreCoord virtual_coord = translate_coord_to(mmio_chip, core, CoordSystem::VIRTUAL);
CoreCoord virtual_coord = soc_desc.translate_coord_to(core, CoordSystem::VIRTUAL);
active_eth_cores_xy.insert(tt_xy_pair(virtual_coord.x, virtual_coord.y));
}

Expand Down Expand Up @@ -1016,7 +1017,8 @@ void Cluster::deassert_risc_reset_at_core(tt_cxy_pair core, const TensixSoftRese
void Cluster::deassert_risc_reset_at_core(
const chip_id_t chip, const CoreCoord core, const TensixSoftResetOptions& soft_resets) {
tt_cxy_pair virtual_core;
const CoreCoord virtual_coord = translate_coord_to(chip, core, CoordSystem::VIRTUAL);
virtual_core.chip = chip;
const CoreCoord virtual_coord = get_soc_descriptor(chip).translate_coord_to(core, CoordSystem::VIRTUAL);
virtual_core.x = virtual_coord.x;
virtual_core.y = virtual_coord.y;
deassert_risc_reset_at_core(virtual_core, soft_resets);
Expand Down Expand Up @@ -1044,7 +1046,8 @@ void Cluster::assert_risc_reset_at_core(tt_cxy_pair core) {

void Cluster::assert_risc_reset_at_core(const chip_id_t chip, const CoreCoord core) {
tt_cxy_pair virtual_core;
const CoreCoord virtual_coord = translate_coord_to(chip, core, CoordSystem::VIRTUAL);
virtual_core.chip = chip;
const CoreCoord virtual_coord = get_soc_descriptor(chip).translate_coord_to(core, CoordSystem::VIRTUAL);
virtual_core.x = virtual_coord.x;
virtual_core.y = virtual_coord.y;
assert_risc_reset_at_core(virtual_core);
Expand Down Expand Up @@ -1114,7 +1117,8 @@ tt::Writer Cluster::get_static_tlb_writer(tt_cxy_pair target) {

tt::Writer Cluster::get_static_tlb_writer(const chip_id_t chip, const CoreCoord target) {
tt_cxy_pair virtual_core;
const CoreCoord virtual_coord = translate_coord_to(chip, target, CoordSystem::VIRTUAL);
virtual_core.chip = chip;
const CoreCoord virtual_coord = get_soc_descriptor(chip).translate_coord_to(target, CoordSystem::VIRTUAL);
virtual_core.x = virtual_coord.x;
virtual_core.y = virtual_coord.y;
return get_static_tlb_writer(virtual_core);
Expand Down Expand Up @@ -1369,8 +1373,8 @@ std::optional<std::tuple<uint32_t, uint32_t>> Cluster::get_tlb_data_from_target(

std::optional<std::tuple<uint32_t, uint32_t>> Cluster::get_tlb_data_from_target(const chip_id_t chip, CoreCoord core) {
tt_cxy_pair virtual_core;
const CoreCoord virtual_coord = translate_coord_to(chip, core, CoordSystem::VIRTUAL);
virtual_core.chip = chip;
const CoreCoord virtual_coord = get_soc_descriptor(chip).translate_coord_to(core, CoordSystem::VIRTUAL);
virtual_core.x = virtual_coord.x;
virtual_core.y = virtual_coord.y;
return get_tlb_data_from_target(virtual_core);
Expand Down Expand Up @@ -1408,7 +1412,8 @@ void Cluster::configure_tlb(
void Cluster::configure_tlb(
chip_id_t logical_device_id, tt::umd::CoreCoord core, int32_t tlb_index, uint64_t address, uint64_t ordering) {
tt_xy_pair virtual_core;
const CoreCoord virtual_coord = translate_coord_to(logical_device_id, core, CoordSystem::VIRTUAL);
const CoreCoord virtual_coord =
get_soc_descriptor(logical_device_id).translate_coord_to(core, CoordSystem::VIRTUAL);
virtual_core.x = virtual_coord.x;
virtual_core.y = virtual_coord.y;
configure_tlb(logical_device_id, virtual_core, tlb_index, address, ordering);
Expand Down Expand Up @@ -1748,11 +1753,6 @@ uint32_t Cluster::get_harvested_noc_rows_for_chip(int logical_device_id) {
return get_harvested_noc_rows(get_harvested_rows(logical_device_id));
}

CoreCoord Cluster::translate_coord_to(
const chip_id_t chip, const CoreCoord core_coord, const CoordSystem coord_system) {
return get_soc_descriptor(chip).translate_coord_to(core_coord, coord_system);
}

void Cluster::enable_local_ethernet_queue(const chip_id_t& device_id, int timeout) {
uint32_t msg_success = 0x0;
auto timeout_seconds = std::chrono::seconds(timeout);
Expand Down Expand Up @@ -3006,10 +3006,10 @@ void Cluster::l1_membar(
}

void Cluster::l1_membar(
const chip_id_t chip, const std::string& fallback_tlb, const std::unordered_set<tt::umd::CoreCoord>& cores) {
const chip_id_t chip, const std::unordered_set<tt::umd::CoreCoord>& cores, const std::string& fallback_tlb) {
std::unordered_set<tt_xy_pair> cores_xy;
for (const auto& core : cores) {
const CoreCoord virtual_core = translate_coord_to(chip, core, CoordSystem::VIRTUAL);
const CoreCoord virtual_core = get_soc_descriptor(chip).translate_coord_to(core, CoordSystem::VIRTUAL);
cores_xy.insert({virtual_core.x, virtual_core.y});
}
l1_membar(chip, fallback_tlb, cores_xy);
Expand All @@ -3034,10 +3034,10 @@ void Cluster::dram_membar(
}

void Cluster::dram_membar(
const chip_id_t chip, const std::string& fallback_tlb, const std::unordered_set<tt::umd::CoreCoord>& cores) {
const chip_id_t chip, const std::unordered_set<tt::umd::CoreCoord>& cores, const std::string& fallback_tlb) {
std::unordered_set<tt_xy_pair> cores_xy;
for (const auto& core : cores) {
const CoreCoord virtual_core = translate_coord_to(chip, core, CoordSystem::VIRTUAL);
const CoreCoord virtual_core = get_soc_descriptor(chip).translate_coord_to(core, CoordSystem::VIRTUAL);
cores_xy.insert({virtual_core.x, virtual_core.y});
}
dram_membar(chip, fallback_tlb, cores_xy);
Expand Down Expand Up @@ -3089,7 +3089,7 @@ void Cluster::write_to_device(
const std::string& tlb_to_use) {
tt_cxy_pair virtual_core;
virtual_core.chip = chip;
CoreCoord virtual_coord = translate_coord_to(chip, core, CoordSystem::VIRTUAL);
CoreCoord virtual_coord = get_soc_descriptor(chip).translate_coord_to(core, CoordSystem::VIRTUAL);
virtual_core.x = virtual_coord.x;
virtual_core.y = virtual_coord.y;
write_to_device(mem_ptr, size_in_bytes, virtual_core, addr, tlb_to_use);
Expand Down Expand Up @@ -3158,7 +3158,7 @@ void Cluster::read_from_device(
void* mem_ptr, chip_id_t chip, CoreCoord core, uint64_t addr, uint32_t size, const std::string& fallback_tlb) {
tt_cxy_pair virtual_core;
virtual_core.chip = chip;
CoreCoord virtual_coord = translate_coord_to(chip, core, CoordSystem::VIRTUAL);
CoreCoord virtual_coord = get_soc_descriptor(chip).translate_coord_to(core, CoordSystem::VIRTUAL);
virtual_core.x = virtual_coord.x;
virtual_core.y = virtual_coord.y;
read_from_device(mem_ptr, virtual_core, addr, size, fallback_tlb);
Expand Down
16 changes: 16 additions & 0 deletions device/tt_core_coordinates.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
// SPDX-FileCopyrightText: (c) 2024 Tenstorrent Inc.
//
// SPDX-License-Identifier: Apache-2.0

#include "umd/device/tt_core_coordinates.h"

namespace std {
std::size_t operator()(const CoreCoord& core_coord) const {
size_t seed = 0;
seed = std::hash<size_t>{}(core_coord.x) + 0x9e3779b9 + (seed << 6) + (seed >> 2);
seed = std::hash<size_t>{}(core_coord.y) + 0x9e3779b9 + (seed << 6) + (seed >> 2);
seed = std::hash<CoreType>{}(core_coord.core_type) + 0x9e3779b9 + (seed << 6) + (seed >> 2);
seed = std::hash<CoordSystem>{}(core_coord.coord_system) + 0x9e3779b9 + (seed << 6) + (seed >> 2);
return seed;
}
} // namespace std
7 changes: 0 additions & 7 deletions tests/api/test_cluster.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -122,8 +122,6 @@ TEST(ApiClusterTest, SimpleIOAllChips) {
std::vector<uint8_t> readback_data(data_size, 0);
umd_cluster->read_from_device(readback_data.data(), chip_id, any_core, 0, data_size, "LARGE_READ_TLB");

// umd_cluster->wait_for_non_mmio_flush(chip_id);

ASSERT_EQ(data, readback_data);
}
}
Expand All @@ -145,7 +143,6 @@ TEST(ApiClusterTest, RemoteFlush) {
umd_cluster->set_barrier_address_params({L1_BARRIER_BASE, ETH_BARRIER_BASE, DRAM_BARRIER_BASE});

for (auto chip_id : umd_cluster->get_target_remote_device_ids()) {
// const tt_SocDescriptor& soc_desc = umd_cluster->get_soc_descriptor(chip_id);
const tt_SocDescriptor& soc_desc = umd_cluster->get_soc_descriptor(chip_id);

const CoreCoord any_core = soc_desc.get_cores(CoreType::TENSIX)[0];
Expand All @@ -170,8 +167,6 @@ TEST(ApiClusterTest, RemoteFlush) {
std::vector<uint8_t> readback_data(data_size, 0);
umd_cluster->read_from_device(readback_data.data(), chip_id, any_core, 0, data_size, "LARGE_READ_TLB");

// umd_cluster->wait_for_non_mmio_flush(chip_id);

ASSERT_EQ(data, readback_data);
}
}
Expand Down Expand Up @@ -221,8 +216,6 @@ TEST(ApiClusterTest, SimpleIOSpecificChips) {
std::vector<uint8_t> readback_data(data_size, 0);
umd_cluster->read_from_device(readback_data.data(), chip_id, any_core, 0, data_size, "LARGE_READ_TLB");

// umd_cluster->wait_for_non_mmio_flush(chip_id);

ASSERT_EQ(data, readback_data);
}
}
Expand Down

0 comments on commit d66814b

Please sign in to comment.