Skip to content

Commit

Permalink
Address comments
Browse files Browse the repository at this point in the history
  • Loading branch information
pjanevskiTT committed Dec 17, 2024
1 parent 533c7a7 commit fc407bc
Show file tree
Hide file tree
Showing 4 changed files with 35 additions and 42 deletions.
30 changes: 16 additions & 14 deletions device/api/umd/device/cluster.h
Original file line number Diff line number Diff line change
Expand Up @@ -145,7 +145,7 @@ class tt_device {
}

virtual void configure_active_ethernet_cores_for_mmio_device(
chip_id_t mmio_chip, const std::unordered_set<tt::umd::CoreCoord>& active_eth_cores_per_chip) {
const std::unordered_set<tt::umd::CoreCoord>& active_eth_cores_per_chip, chip_id_t mmio_chip) {
throw std::runtime_error(
"---- tt_device::configure_active_ethernet_cores_for_mmio_device is not implemented\n");
}
Expand Down Expand Up @@ -312,6 +312,11 @@ class tt_device {
throw std::runtime_error("---- tt_device::l1_membar is not implemented\n");
}

virtual void l1_membar(
const chip_id_t chip, const std::unordered_set<tt::umd::CoreCoord>& cores, const std::string& fallback_tlb) {
throw std::runtime_error("---- tt_device::l1_membar is not implemented\n");
}

virtual void dram_membar(
const chip_id_t chip, const std::string& fallback_tlb, const std::unordered_set<uint32_t>& channels = {}) {
throw std::runtime_error("---- tt_device::dram_membar is not implemented\n");
Expand All @@ -322,6 +327,11 @@ class tt_device {
throw std::runtime_error("---- tt_device::dram_membar is not implemented\n");
}

virtual void dram_membar(
const chip_id_t chip, const std::unordered_set<tt::umd::CoreCoord>& cores, const std::string& fallback_tlb) {
throw std::runtime_error("---- tt_device::dram_membar is not implemented\n");
}

// Misc. Functions to Query/Set Device State
/**
* Query post harvesting SOC descriptors from UMD in virtual coordinates.
Expand Down Expand Up @@ -500,12 +510,6 @@ class tt_device {
return soc_descriptor_per_chip.at(chip_id);
}

virtual tt::umd::CoreCoord translate_coord_to(
const chip_id_t chip, const tt::umd::CoreCoord core_coord, const CoordSystem coord_system) {
throw std::runtime_error("---- tt_device::to is not implemented\n");
return tt::umd::CoreCoord();
}

bool performed_harvesting = false;
std::unordered_map<chip_id_t, uint32_t> harvested_rows_per_target = {};
bool translation_tables_en = false;
Expand Down Expand Up @@ -761,14 +765,12 @@ class Cluster : public tt_device {
std::optional<std::tuple<uint32_t, uint32_t>> get_tlb_data_from_target(
const chip_id_t chip, const tt::umd::CoreCoord core);
tt::Writer get_static_tlb_writer(const chip_id_t chip, const tt::umd::CoreCoord target);
virtual tt::umd::CoreCoord translate_coord_to(
const chip_id_t chip, const tt::umd::CoreCoord core_coord, const CoordSystem coord_system);
virtual void configure_active_ethernet_cores_for_mmio_device(
chip_id_t mmio_chip, const std::unordered_set<tt::umd::CoreCoord>& active_eth_cores_per_chip);
void l1_membar(
const chip_id_t chip, const std::string& fallback_tlb, const std::unordered_set<tt::umd::CoreCoord>& cores);
void dram_membar(
const chip_id_t chip, const std::string& fallback_tlb, const std::unordered_set<tt::umd::CoreCoord>& cores);
const std::unordered_set<tt::umd::CoreCoord>& active_eth_cores_per_chip, chip_id_t mmio_chip);
virtual void l1_membar(
const chip_id_t chip, const std::unordered_set<tt::umd::CoreCoord>& cores, const std::string& fallback_tlb);
virtual void dram_membar(
const chip_id_t chip, const std::unordered_set<tt::umd::CoreCoord>& cores, const std::string& fallback_tlb);

// Destructor
virtual ~Cluster();
Expand Down
2 changes: 1 addition & 1 deletion device/api/umd/device/tt_core_coordinates.h
Original file line number Diff line number Diff line change
Expand Up @@ -86,5 +86,5 @@ struct CoreCoord : public tt_xy_pair {

template <>
struct std::hash<tt::umd::CoreCoord> {
std::size_t operator()(const tt::umd::CoreCoord& core_range) const;
std::size_t operator()(const tt::umd::CoreCoord& core_coord) const;
};
36 changes: 18 additions & 18 deletions device/cluster.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -680,10 +680,11 @@ void Cluster::configure_active_ethernet_cores_for_mmio_device(
}

void Cluster::configure_active_ethernet_cores_for_mmio_device(
chip_id_t mmio_chip, const std::unordered_set<CoreCoord>& active_eth_cores_per_chip) {
const std::unordered_set<CoreCoord>& active_eth_cores_per_chip, chip_id_t mmio_chip) {
std::unordered_set<tt_xy_pair> active_eth_cores_xy;
const tt_SocDescriptor& soc_desc = get_soc_descriptor(mmio_chip);
for (const auto& core : active_eth_cores_per_chip) {
CoreCoord virtual_coord = translate_coord_to(mmio_chip, core, CoordSystem::VIRTUAL);
CoreCoord virtual_coord = soc_desc.translate_coord_to(core, CoordSystem::VIRTUAL);
active_eth_cores_xy.insert(tt_xy_pair(virtual_coord.x, virtual_coord.y));
}

Expand Down Expand Up @@ -1015,7 +1016,8 @@ void Cluster::deassert_risc_reset_at_core(tt_cxy_pair core, const TensixSoftRese
void Cluster::deassert_risc_reset_at_core(
const chip_id_t chip, const CoreCoord core, const TensixSoftResetOptions& soft_resets) {
tt_cxy_pair virtual_core;
const CoreCoord virtual_coord = translate_coord_to(chip, core, CoordSystem::VIRTUAL);
virtual_core.chip = chip;
const CoreCoord virtual_coord = get_soc_descriptor(chip).translate_coord_to(core, CoordSystem::VIRTUAL);
virtual_core.x = virtual_coord.x;
virtual_core.y = virtual_coord.y;
deassert_risc_reset_at_core(virtual_core, soft_resets);
Expand Down Expand Up @@ -1046,7 +1048,8 @@ void Cluster::assert_risc_reset_at_core(tt_cxy_pair core) {

void Cluster::assert_risc_reset_at_core(const chip_id_t chip, const CoreCoord core) {
tt_cxy_pair virtual_core;
const CoreCoord virtual_coord = translate_coord_to(chip, core, CoordSystem::VIRTUAL);
virtual_core.chip = chip;
const CoreCoord virtual_coord = get_soc_descriptor(chip).translate_coord_to(core, CoordSystem::VIRTUAL);
virtual_core.x = virtual_coord.x;
virtual_core.y = virtual_coord.y;
assert_risc_reset_at_core(virtual_core);
Expand Down Expand Up @@ -1128,7 +1131,8 @@ tt::Writer Cluster::get_static_tlb_writer(tt_cxy_pair target) {

tt::Writer Cluster::get_static_tlb_writer(const chip_id_t chip, const CoreCoord target) {
tt_cxy_pair virtual_core;
const CoreCoord virtual_coord = translate_coord_to(chip, target, CoordSystem::VIRTUAL);
virtual_core.chip = chip;
const CoreCoord virtual_coord = get_soc_descriptor(chip).translate_coord_to(target, CoordSystem::VIRTUAL);
virtual_core.x = virtual_coord.x;
virtual_core.y = virtual_coord.y;
return get_static_tlb_writer(virtual_core);
Expand Down Expand Up @@ -1406,8 +1410,8 @@ std::optional<std::tuple<uint32_t, uint32_t>> Cluster::get_tlb_data_from_target(

std::optional<std::tuple<uint32_t, uint32_t>> Cluster::get_tlb_data_from_target(const chip_id_t chip, CoreCoord core) {
tt_cxy_pair virtual_core;
const CoreCoord virtual_coord = translate_coord_to(chip, core, CoordSystem::VIRTUAL);
virtual_core.chip = chip;
const CoreCoord virtual_coord = get_soc_descriptor(chip).translate_coord_to(core, CoordSystem::VIRTUAL);
virtual_core.x = virtual_coord.x;
virtual_core.y = virtual_coord.y;
return get_tlb_data_from_target(virtual_core);
Expand All @@ -1430,7 +1434,8 @@ void Cluster::configure_tlb(
void Cluster::configure_tlb(
chip_id_t logical_device_id, tt::umd::CoreCoord core, int32_t tlb_index, uint64_t address, uint64_t ordering) {
tt_xy_pair virtual_core;
const CoreCoord virtual_coord = translate_coord_to(logical_device_id, core, CoordSystem::VIRTUAL);
const CoreCoord virtual_coord =
get_soc_descriptor(logical_device_id).translate_coord_to(core, CoordSystem::VIRTUAL);
virtual_core.x = virtual_coord.x;
virtual_core.y = virtual_coord.y;
configure_tlb(logical_device_id, virtual_core, tlb_index, address, ordering);
Expand Down Expand Up @@ -1770,11 +1775,6 @@ uint32_t Cluster::get_harvested_noc_rows_for_chip(int logical_device_id) {
return get_harvested_noc_rows(get_harvested_rows(logical_device_id));
}

CoreCoord Cluster::translate_coord_to(
const chip_id_t chip, const CoreCoord core_coord, const CoordSystem coord_system) {
return get_soc_descriptor(chip).translate_coord_to(core_coord, coord_system);
}

void Cluster::enable_local_ethernet_queue(const chip_id_t& device_id, int timeout) {
uint32_t msg_success = 0x0;
auto timeout_seconds = std::chrono::seconds(timeout);
Expand Down Expand Up @@ -3028,10 +3028,10 @@ void Cluster::l1_membar(
}

void Cluster::l1_membar(
const chip_id_t chip, const std::string& fallback_tlb, const std::unordered_set<tt::umd::CoreCoord>& cores) {
const chip_id_t chip, const std::unordered_set<tt::umd::CoreCoord>& cores, const std::string& fallback_tlb) {
std::unordered_set<tt_xy_pair> cores_xy;
for (const auto& core : cores) {
const CoreCoord virtual_core = translate_coord_to(chip, core, CoordSystem::VIRTUAL);
const CoreCoord virtual_core = get_soc_descriptor(chip).translate_coord_to(core, CoordSystem::VIRTUAL);
cores_xy.insert({virtual_core.x, virtual_core.y});
}
l1_membar(chip, fallback_tlb, cores_xy);
Expand All @@ -3056,10 +3056,10 @@ void Cluster::dram_membar(
}

void Cluster::dram_membar(
const chip_id_t chip, const std::string& fallback_tlb, const std::unordered_set<tt::umd::CoreCoord>& cores) {
const chip_id_t chip, const std::unordered_set<tt::umd::CoreCoord>& cores, const std::string& fallback_tlb) {
std::unordered_set<tt_xy_pair> cores_xy;
for (const auto& core : cores) {
const CoreCoord virtual_core = translate_coord_to(chip, core, CoordSystem::VIRTUAL);
const CoreCoord virtual_core = get_soc_descriptor(chip).translate_coord_to(core, CoordSystem::VIRTUAL);
cores_xy.insert({virtual_core.x, virtual_core.y});
}
dram_membar(chip, fallback_tlb, cores_xy);
Expand Down Expand Up @@ -3111,7 +3111,7 @@ void Cluster::write_to_device(
const std::string& tlb_to_use) {
tt_cxy_pair virtual_core;
virtual_core.chip = chip;
CoreCoord virtual_coord = translate_coord_to(chip, core, CoordSystem::VIRTUAL);
CoreCoord virtual_coord = get_soc_descriptor(chip).translate_coord_to(core, CoordSystem::VIRTUAL);
virtual_core.x = virtual_coord.x;
virtual_core.y = virtual_coord.y;
write_to_device(mem_ptr, size_in_bytes, virtual_core, addr, tlb_to_use);
Expand Down Expand Up @@ -3180,7 +3180,7 @@ void Cluster::read_from_device(
void* mem_ptr, chip_id_t chip, CoreCoord core, uint64_t addr, uint32_t size, const std::string& fallback_tlb) {
tt_cxy_pair virtual_core;
virtual_core.chip = chip;
CoreCoord virtual_coord = translate_coord_to(chip, core, CoordSystem::VIRTUAL);
CoreCoord virtual_coord = get_soc_descriptor(chip).translate_coord_to(core, CoordSystem::VIRTUAL);
virtual_core.x = virtual_coord.x;
virtual_core.y = virtual_coord.y;
read_from_device(mem_ptr, virtual_core, addr, size, fallback_tlb);
Expand Down
9 changes: 0 additions & 9 deletions tests/api/test_cluster.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -130,8 +130,6 @@ TEST(ApiClusterTest, SimpleIOAllChips) {
std::vector<uint8_t> readback_data(data_size, 0);
umd_cluster->read_from_device(readback_data.data(), chip_id, any_core, 0, data_size, "LARGE_READ_TLB");

// umd_cluster->wait_for_non_mmio_flush(chip_id);

ASSERT_EQ(data, readback_data);
}
}
Expand All @@ -152,7 +150,6 @@ TEST(ApiClusterTest, RemoteFlush) {
setup_wormhole_remote(umd_cluster.get());

for (auto chip_id : umd_cluster->get_target_remote_device_ids()) {
// const tt_SocDescriptor& soc_desc = umd_cluster->get_soc_descriptor(chip_id);
const tt_SocDescriptor& soc_desc = umd_cluster->get_soc_descriptor(chip_id);

const CoreCoord any_core = soc_desc.get_cores(CoreType::TENSIX)[0];
Expand All @@ -177,8 +174,6 @@ TEST(ApiClusterTest, RemoteFlush) {
std::vector<uint8_t> readback_data(data_size, 0);
umd_cluster->read_from_device(readback_data.data(), chip_id, any_core, 0, data_size, "LARGE_READ_TLB");

// umd_cluster->wait_for_non_mmio_flush(chip_id);

ASSERT_EQ(data, readback_data);
}
}
Expand All @@ -205,7 +200,6 @@ TEST(ApiClusterTest, SimpleIOSpecificChips) {
setup_wormhole_remote(umd_cluster.get());

for (auto chip_id : umd_cluster->get_all_chips_in_cluster()) {
// const tt_SocDescriptor& soc_desc = umd_cluster->get_soc_descriptor(chip_id);
const tt_SocDescriptor& soc_desc = umd_cluster->get_soc_descriptor(chip_id);

const CoreCoord any_core = soc_desc.get_cores(CoreType::TENSIX)[0];
Expand All @@ -219,7 +213,6 @@ TEST(ApiClusterTest, SimpleIOSpecificChips) {

// Now read back the data.
for (auto chip_id : umd_cluster->get_all_chips_in_cluster()) {
// const tt_SocDescriptor& soc_desc = umd_cluster->get_soc_descriptor(chip_id);
const tt_SocDescriptor& soc_desc = umd_cluster->get_soc_descriptor(chip_id);

const CoreCoord any_core = soc_desc.get_cores(CoreType::TENSIX)[0];
Expand All @@ -229,8 +222,6 @@ TEST(ApiClusterTest, SimpleIOSpecificChips) {
std::vector<uint8_t> readback_data(data_size, 0);
umd_cluster->read_from_device(readback_data.data(), chip_id, any_core, 0, data_size, "LARGE_READ_TLB");

// umd_cluster->wait_for_non_mmio_flush(chip_id);

ASSERT_EQ(data, readback_data);
}
}
Expand Down

0 comments on commit fc407bc

Please sign in to comment.