From d66814b37f0a5fd35f8ff08242ca35dea5b56df0 Mon Sep 17 00:00:00 2001 From: pjanevski Date: Tue, 17 Dec 2024 15:17:24 +0000 Subject: [PATCH] Address comments --- device/api/umd/device/cluster.h | 30 +++++++++-------- device/api/umd/device/tt_core_coordinates.h | 4 ++- device/cluster.cpp | 36 ++++++++++----------- device/tt_core_coordinates.cpp | 16 +++++++++ tests/api/test_cluster.cpp | 7 ---- 5 files changed, 53 insertions(+), 40 deletions(-) create mode 100644 device/tt_core_coordinates.cpp diff --git a/device/api/umd/device/cluster.h b/device/api/umd/device/cluster.h index ca3201e0..433873c8 100644 --- a/device/api/umd/device/cluster.h +++ b/device/api/umd/device/cluster.h @@ -109,7 +109,7 @@ class tt_device { } virtual void configure_active_ethernet_cores_for_mmio_device( - chip_id_t mmio_chip, const std::unordered_set& active_eth_cores_per_chip) { + const std::unordered_set& active_eth_cores_per_chip, chip_id_t mmio_chip) { throw std::runtime_error( "---- tt_device::configure_active_ethernet_cores_for_mmio_device is not implemented\n"); } @@ -276,6 +276,11 @@ class tt_device { throw std::runtime_error("---- tt_device::l1_membar is not implemented\n"); } + virtual void l1_membar( + const chip_id_t chip, const std::unordered_set& cores, const std::string& fallback_tlb) { + throw std::runtime_error("---- tt_device::l1_membar is not implemented\n"); + } + virtual void dram_membar( const chip_id_t chip, const std::string& fallback_tlb, const std::unordered_set& channels = {}) { throw std::runtime_error("---- tt_device::dram_membar is not implemented\n"); @@ -286,6 +291,11 @@ class tt_device { throw std::runtime_error("---- tt_device::dram_membar is not implemented\n"); } + virtual void dram_membar( + const chip_id_t chip, const std::unordered_set& cores, const std::string& fallback_tlb) { + throw std::runtime_error("---- tt_device::dram_membar is not implemented\n"); + } + // Misc. Functions to Query/Set Device State /** * Query post harvesting SOC descriptors from UMD in virtual coordinates. @@ -450,12 +460,6 @@ class tt_device { return soc_descriptor_per_chip.at(chip_id); } - virtual tt::umd::CoreCoord translate_coord_to( - const chip_id_t chip, const tt::umd::CoreCoord core_coord, const CoordSystem coord_system) { - throw std::runtime_error("---- tt_device::to is not implemented\n"); - return tt::umd::CoreCoord(); - } - bool performed_harvesting = false; std::unordered_map harvested_rows_per_target = {}; bool translation_tables_en = false; @@ -705,14 +709,12 @@ class Cluster : public tt_device { std::optional> get_tlb_data_from_target( const chip_id_t chip, const tt::umd::CoreCoord core); tt::Writer get_static_tlb_writer(const chip_id_t chip, const tt::umd::CoreCoord target); - virtual tt::umd::CoreCoord translate_coord_to( - const chip_id_t chip, const tt::umd::CoreCoord core_coord, const CoordSystem coord_system); virtual void configure_active_ethernet_cores_for_mmio_device( - chip_id_t mmio_chip, const std::unordered_set& active_eth_cores_per_chip); - void l1_membar( - const chip_id_t chip, const std::string& fallback_tlb, const std::unordered_set& cores); - void dram_membar( - const chip_id_t chip, const std::string& fallback_tlb, const std::unordered_set& cores); + const std::unordered_set& active_eth_cores_per_chip, chip_id_t mmio_chip); + virtual void l1_membar( + const chip_id_t chip, const std::unordered_set& cores, const std::string& fallback_tlb); + virtual void dram_membar( + const chip_id_t chip, const std::unordered_set& cores, const std::string& fallback_tlb); // Destructor virtual ~Cluster(); diff --git a/device/api/umd/device/tt_core_coordinates.h b/device/api/umd/device/tt_core_coordinates.h index f6df10f6..f1192f7e 100644 --- a/device/api/umd/device/tt_core_coordinates.h +++ b/device/api/umd/device/tt_core_coordinates.h @@ -84,7 +84,9 @@ struct CoreCoord : public tt_xy_pair { } // namespace tt::umd +namespace std { template <> struct std::hash { - std::size_t operator()(const tt::umd::CoreCoord& core_range) const; + std::size_t operator()(const tt::umd::CoreCoord& core_coord) const; }; +} // namespace std diff --git a/device/cluster.cpp b/device/cluster.cpp index be0f6ee8..590f5aea 100644 --- a/device/cluster.cpp +++ b/device/cluster.cpp @@ -689,10 +689,11 @@ void Cluster::configure_active_ethernet_cores_for_mmio_device( } void Cluster::configure_active_ethernet_cores_for_mmio_device( - chip_id_t mmio_chip, const std::unordered_set& active_eth_cores_per_chip) { + const std::unordered_set& active_eth_cores_per_chip, chip_id_t mmio_chip) { std::unordered_set active_eth_cores_xy; + const tt_SocDescriptor& soc_desc = get_soc_descriptor(mmio_chip); for (const auto& core : active_eth_cores_per_chip) { - CoreCoord virtual_coord = translate_coord_to(mmio_chip, core, CoordSystem::VIRTUAL); + CoreCoord virtual_coord = soc_desc.translate_coord_to(core, CoordSystem::VIRTUAL); active_eth_cores_xy.insert(tt_xy_pair(virtual_coord.x, virtual_coord.y)); } @@ -1016,7 +1017,8 @@ void Cluster::deassert_risc_reset_at_core(tt_cxy_pair core, const TensixSoftRese void Cluster::deassert_risc_reset_at_core( const chip_id_t chip, const CoreCoord core, const TensixSoftResetOptions& soft_resets) { tt_cxy_pair virtual_core; - const CoreCoord virtual_coord = translate_coord_to(chip, core, CoordSystem::VIRTUAL); + virtual_core.chip = chip; + const CoreCoord virtual_coord = get_soc_descriptor(chip).translate_coord_to(core, CoordSystem::VIRTUAL); virtual_core.x = virtual_coord.x; virtual_core.y = virtual_coord.y; deassert_risc_reset_at_core(virtual_core, soft_resets); @@ -1044,7 +1046,8 @@ void Cluster::assert_risc_reset_at_core(tt_cxy_pair core) { void Cluster::assert_risc_reset_at_core(const chip_id_t chip, const CoreCoord core) { tt_cxy_pair virtual_core; - const CoreCoord virtual_coord = translate_coord_to(chip, core, CoordSystem::VIRTUAL); + virtual_core.chip = chip; + const CoreCoord virtual_coord = get_soc_descriptor(chip).translate_coord_to(core, CoordSystem::VIRTUAL); virtual_core.x = virtual_coord.x; virtual_core.y = virtual_coord.y; assert_risc_reset_at_core(virtual_core); @@ -1114,7 +1117,8 @@ tt::Writer Cluster::get_static_tlb_writer(tt_cxy_pair target) { tt::Writer Cluster::get_static_tlb_writer(const chip_id_t chip, const CoreCoord target) { tt_cxy_pair virtual_core; - const CoreCoord virtual_coord = translate_coord_to(chip, target, CoordSystem::VIRTUAL); + virtual_core.chip = chip; + const CoreCoord virtual_coord = get_soc_descriptor(chip).translate_coord_to(target, CoordSystem::VIRTUAL); virtual_core.x = virtual_coord.x; virtual_core.y = virtual_coord.y; return get_static_tlb_writer(virtual_core); @@ -1369,8 +1373,8 @@ std::optional> Cluster::get_tlb_data_from_target( std::optional> Cluster::get_tlb_data_from_target(const chip_id_t chip, CoreCoord core) { tt_cxy_pair virtual_core; - const CoreCoord virtual_coord = translate_coord_to(chip, core, CoordSystem::VIRTUAL); virtual_core.chip = chip; + const CoreCoord virtual_coord = get_soc_descriptor(chip).translate_coord_to(core, CoordSystem::VIRTUAL); virtual_core.x = virtual_coord.x; virtual_core.y = virtual_coord.y; return get_tlb_data_from_target(virtual_core); @@ -1408,7 +1412,8 @@ void Cluster::configure_tlb( void Cluster::configure_tlb( chip_id_t logical_device_id, tt::umd::CoreCoord core, int32_t tlb_index, uint64_t address, uint64_t ordering) { tt_xy_pair virtual_core; - const CoreCoord virtual_coord = translate_coord_to(logical_device_id, core, CoordSystem::VIRTUAL); + const CoreCoord virtual_coord = + get_soc_descriptor(logical_device_id).translate_coord_to(core, CoordSystem::VIRTUAL); virtual_core.x = virtual_coord.x; virtual_core.y = virtual_coord.y; configure_tlb(logical_device_id, virtual_core, tlb_index, address, ordering); @@ -1748,11 +1753,6 @@ uint32_t Cluster::get_harvested_noc_rows_for_chip(int logical_device_id) { return get_harvested_noc_rows(get_harvested_rows(logical_device_id)); } -CoreCoord Cluster::translate_coord_to( - const chip_id_t chip, const CoreCoord core_coord, const CoordSystem coord_system) { - return get_soc_descriptor(chip).translate_coord_to(core_coord, coord_system); -} - void Cluster::enable_local_ethernet_queue(const chip_id_t& device_id, int timeout) { uint32_t msg_success = 0x0; auto timeout_seconds = std::chrono::seconds(timeout); @@ -3006,10 +3006,10 @@ void Cluster::l1_membar( } void Cluster::l1_membar( - const chip_id_t chip, const std::string& fallback_tlb, const std::unordered_set& cores) { + const chip_id_t chip, const std::unordered_set& cores, const std::string& fallback_tlb) { std::unordered_set cores_xy; for (const auto& core : cores) { - const CoreCoord virtual_core = translate_coord_to(chip, core, CoordSystem::VIRTUAL); + const CoreCoord virtual_core = get_soc_descriptor(chip).translate_coord_to(core, CoordSystem::VIRTUAL); cores_xy.insert({virtual_core.x, virtual_core.y}); } l1_membar(chip, fallback_tlb, cores_xy); @@ -3034,10 +3034,10 @@ void Cluster::dram_membar( } void Cluster::dram_membar( - const chip_id_t chip, const std::string& fallback_tlb, const std::unordered_set& cores) { + const chip_id_t chip, const std::unordered_set& cores, const std::string& fallback_tlb) { std::unordered_set cores_xy; for (const auto& core : cores) { - const CoreCoord virtual_core = translate_coord_to(chip, core, CoordSystem::VIRTUAL); + const CoreCoord virtual_core = get_soc_descriptor(chip).translate_coord_to(core, CoordSystem::VIRTUAL); cores_xy.insert({virtual_core.x, virtual_core.y}); } dram_membar(chip, fallback_tlb, cores_xy); @@ -3089,7 +3089,7 @@ void Cluster::write_to_device( const std::string& tlb_to_use) { tt_cxy_pair virtual_core; virtual_core.chip = chip; - CoreCoord virtual_coord = translate_coord_to(chip, core, CoordSystem::VIRTUAL); + CoreCoord virtual_coord = get_soc_descriptor(chip).translate_coord_to(core, CoordSystem::VIRTUAL); virtual_core.x = virtual_coord.x; virtual_core.y = virtual_coord.y; write_to_device(mem_ptr, size_in_bytes, virtual_core, addr, tlb_to_use); @@ -3158,7 +3158,7 @@ void Cluster::read_from_device( void* mem_ptr, chip_id_t chip, CoreCoord core, uint64_t addr, uint32_t size, const std::string& fallback_tlb) { tt_cxy_pair virtual_core; virtual_core.chip = chip; - CoreCoord virtual_coord = translate_coord_to(chip, core, CoordSystem::VIRTUAL); + CoreCoord virtual_coord = get_soc_descriptor(chip).translate_coord_to(core, CoordSystem::VIRTUAL); virtual_core.x = virtual_coord.x; virtual_core.y = virtual_coord.y; read_from_device(mem_ptr, virtual_core, addr, size, fallback_tlb); diff --git a/device/tt_core_coordinates.cpp b/device/tt_core_coordinates.cpp new file mode 100644 index 00000000..ca0413ae --- /dev/null +++ b/device/tt_core_coordinates.cpp @@ -0,0 +1,16 @@ +// SPDX-FileCopyrightText: (c) 2024 Tenstorrent Inc. +// +// SPDX-License-Identifier: Apache-2.0 + +#include "umd/device/tt_core_coordinates.h" + +namespace std { +std::size_t operator()(const CoreCoord& core_coord) const { + size_t seed = 0; + seed = std::hash{}(core_coord.x) + 0x9e3779b9 + (seed << 6) + (seed >> 2); + seed = std::hash{}(core_coord.y) + 0x9e3779b9 + (seed << 6) + (seed >> 2); + seed = std::hash{}(core_coord.core_type) + 0x9e3779b9 + (seed << 6) + (seed >> 2); + seed = std::hash{}(core_coord.coord_system) + 0x9e3779b9 + (seed << 6) + (seed >> 2); + return seed; +} +} // namespace std diff --git a/tests/api/test_cluster.cpp b/tests/api/test_cluster.cpp index 5c8af309..5a32be5e 100644 --- a/tests/api/test_cluster.cpp +++ b/tests/api/test_cluster.cpp @@ -122,8 +122,6 @@ TEST(ApiClusterTest, SimpleIOAllChips) { std::vector readback_data(data_size, 0); umd_cluster->read_from_device(readback_data.data(), chip_id, any_core, 0, data_size, "LARGE_READ_TLB"); - // umd_cluster->wait_for_non_mmio_flush(chip_id); - ASSERT_EQ(data, readback_data); } } @@ -145,7 +143,6 @@ TEST(ApiClusterTest, RemoteFlush) { umd_cluster->set_barrier_address_params({L1_BARRIER_BASE, ETH_BARRIER_BASE, DRAM_BARRIER_BASE}); for (auto chip_id : umd_cluster->get_target_remote_device_ids()) { - // const tt_SocDescriptor& soc_desc = umd_cluster->get_soc_descriptor(chip_id); const tt_SocDescriptor& soc_desc = umd_cluster->get_soc_descriptor(chip_id); const CoreCoord any_core = soc_desc.get_cores(CoreType::TENSIX)[0]; @@ -170,8 +167,6 @@ TEST(ApiClusterTest, RemoteFlush) { std::vector readback_data(data_size, 0); umd_cluster->read_from_device(readback_data.data(), chip_id, any_core, 0, data_size, "LARGE_READ_TLB"); - // umd_cluster->wait_for_non_mmio_flush(chip_id); - ASSERT_EQ(data, readback_data); } } @@ -221,8 +216,6 @@ TEST(ApiClusterTest, SimpleIOSpecificChips) { std::vector readback_data(data_size, 0); umd_cluster->read_from_device(readback_data.data(), chip_id, any_core, 0, data_size, "LARGE_READ_TLB"); - // umd_cluster->wait_for_non_mmio_flush(chip_id); - ASSERT_EQ(data, readback_data); } }