From f793b639979d9b561297b91009d60db4a6882ba9 Mon Sep 17 00:00:00 2001 From: Bojan Rosko Date: Wed, 18 Dec 2024 13:06:57 +0000 Subject: [PATCH 1/5] init --- .../umd/device/architecture_implementation.h | 1 - .../api/umd/device/blackhole_implementation.h | 1 - device/api/umd/device/cluster.h | 2 +- .../api/umd/device/grayskull_implementation.h | 1 - device/api/umd/device/types/tlb.h | 7 +-- .../api/umd/device/wormhole_implementation.h | 1 - device/blackhole/blackhole_implementation.cpp | 28 ++-------- device/cluster.cpp | 51 ++++++++++--------- device/grayskull/grayskull_implementation.cpp | 36 +++---------- device/wormhole/wormhole_implementation.cpp | 35 +++---------- 10 files changed, 48 insertions(+), 115 deletions(-) diff --git a/device/api/umd/device/architecture_implementation.h b/device/api/umd/device/architecture_implementation.h index 8935621f..8efc165b 100644 --- a/device/api/umd/device/architecture_implementation.h +++ b/device/api/umd/device/architecture_implementation.h @@ -67,7 +67,6 @@ class architecture_implementation { virtual std::tuple multicast_workaround(xy_pair start, xy_pair end) const = 0; virtual tlb_configuration get_tlb_configuration(uint32_t tlb_index) const = 0; - virtual std::optional> describe_tlb(std::int32_t tlb_index) const = 0; virtual std::pair get_tlb_data( std::uint32_t tlb_index, const tlb_data& data) const = 0; diff --git a/device/api/umd/device/blackhole_implementation.h b/device/api/umd/device/blackhole_implementation.h index b248bcfa..d2820403 100644 --- a/device/api/umd/device/blackhole_implementation.h +++ b/device/api/umd/device/blackhole_implementation.h @@ -306,7 +306,6 @@ class blackhole_implementation : public architecture_implementation { std::tuple multicast_workaround(xy_pair start, xy_pair end) const override; tlb_configuration get_tlb_configuration(uint32_t tlb_index) const override; - std::optional> describe_tlb(std::int32_t tlb_index) const override; std::pair get_tlb_data(std::uint32_t tlb_index, const tlb_data& data) const override; tt_device_l1_address_params get_l1_address_params() const override; diff --git a/device/api/umd/device/cluster.h b/device/api/umd/device/cluster.h index 08797d35..24c36a2f 100644 --- a/device/api/umd/device/cluster.h +++ b/device/api/umd/device/cluster.h @@ -656,7 +656,7 @@ class Cluster : public tt_device { /** * If the tlbs are initialized, returns a tuple with the TLB base address and its size */ - std::optional> get_tlb_data_from_target(const tt_cxy_pair& target); + tlb_configuration get_tlb_data_from_target(const tt_cxy_pair& target); /** * Provide fast write access to a statically-mapped TLB. * It is the caller's responsibility to ensure that diff --git a/device/api/umd/device/grayskull_implementation.h b/device/api/umd/device/grayskull_implementation.h index ab33f15d..2313415a 100644 --- a/device/api/umd/device/grayskull_implementation.h +++ b/device/api/umd/device/grayskull_implementation.h @@ -296,7 +296,6 @@ class grayskull_implementation : public architecture_implementation { std::tuple multicast_workaround(xy_pair start, xy_pair end) const override; tlb_configuration get_tlb_configuration(uint32_t tlb_index) const override; - std::optional> describe_tlb(std::int32_t tlb_index) const override; std::pair get_tlb_data(std::uint32_t tlb_index, const tlb_data& data) const override; tt_device_l1_address_params get_l1_address_params() const override; diff --git a/device/api/umd/device/types/tlb.h b/device/api/umd/device/types/tlb.h index 30094202..5f291442 100644 --- a/device/api/umd/device/types/tlb.h +++ b/device/api/umd/device/types/tlb.h @@ -50,9 +50,10 @@ struct tlb_data { struct tlb_configuration { uint64_t size; - uint32_t base; - uint32_t cfg_addr; - uint32_t index_offset; + uint64_t base; + uint64_t cfg_addr; + uint64_t index_offset; + uint64_t tlb_offset; tlb_offsets offset; }; diff --git a/device/api/umd/device/wormhole_implementation.h b/device/api/umd/device/wormhole_implementation.h index ce1bf036..0df17669 100644 --- a/device/api/umd/device/wormhole_implementation.h +++ b/device/api/umd/device/wormhole_implementation.h @@ -330,7 +330,6 @@ class wormhole_implementation : public architecture_implementation { std::tuple multicast_workaround(xy_pair start, xy_pair end) const override; tlb_configuration get_tlb_configuration(uint32_t tlb_index) const override; - std::optional> describe_tlb(std::int32_t tlb_index) const override; std::pair get_tlb_data(std::uint32_t tlb_index, const tlb_data& data) const override; tt_device_l1_address_params get_l1_address_params() const override; diff --git a/device/blackhole/blackhole_implementation.cpp b/device/blackhole/blackhole_implementation.cpp index f5e0ddcf..3dc25ef4 100644 --- a/device/blackhole/blackhole_implementation.cpp +++ b/device/blackhole/blackhole_implementation.cpp @@ -34,6 +34,8 @@ tlb_configuration blackhole_implementation::get_tlb_configuration(uint32_t tlb_i .base = blackhole::DYNAMIC_TLB_4G_BASE, .cfg_addr = blackhole::DYNAMIC_TLB_4G_CFG_ADDR, .index_offset = tlb_index - blackhole::TLB_BASE_INDEX_4G, + .tlb_offset = blackhole::DYNAMIC_TLB_4G_BASE + + (tlb_index - blackhole::TLB_BASE_INDEX_4G) * blackhole::DYNAMIC_TLB_4G_SIZE, .offset = blackhole::TLB_4G_OFFSET, }; } @@ -43,34 +45,12 @@ tlb_configuration blackhole_implementation::get_tlb_configuration(uint32_t tlb_i .base = blackhole::DYNAMIC_TLB_2M_BASE, .cfg_addr = blackhole::DYNAMIC_TLB_2M_CFG_ADDR, .index_offset = tlb_index - blackhole::TLB_BASE_INDEX_2M, + .tlb_offset = blackhole::DYNAMIC_TLB_2M_BASE + + (tlb_index - blackhole::TLB_BASE_INDEX_2M) * blackhole::DYNAMIC_TLB_2M_SIZE, .offset = blackhole::TLB_2M_OFFSET, }; } -std::optional> blackhole_implementation::describe_tlb( - std::int32_t tlb_index) const { - std::uint32_t TLB_COUNT_2M = 202; - - std::uint32_t TLB_BASE_2M = 0; - if (tlb_index < 0) { - return std::nullopt; - } - - if (tlb_index >= TLB_COUNT_2M && tlb_index < TLB_COUNT_2M + blackhole::TLB_COUNT_4G) { - auto tlb_offset = tlb_index - TLB_COUNT_2M; - auto size = blackhole::TLB_4G_SIZE; - return std::tuple(blackhole::TLB_BASE_4G + tlb_offset * size, size); - } - - if (tlb_index >= 0 && tlb_index < TLB_COUNT_2M) { - auto tlb_offset = tlb_index; - auto size = 1 << 21; - return std::tuple(TLB_BASE_2M + tlb_offset * size, size); - } - - return std::nullopt; -} - std::pair blackhole_implementation::get_tlb_data( std::uint32_t tlb_index, const tlb_data& data) const { if (tlb_index < blackhole::TLB_COUNT_2M) { diff --git a/device/cluster.cpp b/device/cluster.cpp index ffc626f9..7963fba0 100644 --- a/device/cluster.cpp +++ b/device/cluster.cpp @@ -173,10 +173,9 @@ bool Cluster::is_tlb_mapped(tt_cxy_pair target, uint64_t address, uint32_t size_ auto* dev = get_tt_device(target.chip); int32_t tlb_index = map_core_to_tlb_per_chip.at(target.chip).at(tt_xy_pair(target.x, target.y)); - auto tlb_description = dev->get_architecture_implementation()->describe_tlb(tlb_index); + auto tlb_description = dev->get_architecture_implementation()->get_tlb_configuration(tlb_index); - return tlb_description.has_value() && - address_in_tlb_space(address, size_in_bytes, tlb_index, std::get<1>(tlb_description.value()), target.chip); + return address_in_tlb_space(address, size_in_bytes, tlb_index, tlb_description.size, target.chip); } void Cluster::initialize_interprocess_mutexes(int logical_device_id, bool cleanup_mutexes_in_shm) { @@ -1090,12 +1089,10 @@ tt::Writer Cluster::get_static_tlb_writer(tt_cxy_pair target) { } auto tlb_index = map_core_to_tlb_per_chip.at(target.chip).at(tt_xy_pair(target.x, target.y)); - auto tlb_data = dev->get_architecture_implementation()->describe_tlb(tlb_index); + auto tlb_description = dev->get_architecture_implementation()->get_tlb_configuration(tlb_index); - auto [tlb_offset, tlb_size] = tlb_data.value(); auto* base = reinterpret_cast(dev->get_pci_device()->bar0_wc); - - return tt::Writer(base + tlb_offset, tlb_size); + return tt::Writer(base + tlb_description.tlb_offset, tlb_description.size); } tt::Writer Cluster::get_static_tlb_writer(const chip_id_t chip, const CoreCoord target) { @@ -1123,15 +1120,17 @@ void Cluster::write_device_memory( small_access); if (is_tlb_mapped(target, address, size_in_bytes)) { - auto tlb_description = dev->get_architecture_implementation()->describe_tlb( + auto tlb_description = dev->get_architecture_implementation()->get_tlb_configuration( map_core_to_tlb_per_chip.at(target.chip).at(tt_xy_pair(target.x, target.y))); - auto [tlb_offset, tlb_size] = tlb_description.value(); - if (dev->get_pci_device()->bar4_wc != nullptr && tlb_size == BH_4GB_TLB_SIZE) { + if (dev->get_pci_device()->bar4_wc != nullptr && tlb_description.size == BH_4GB_TLB_SIZE) { // This is only for Blackhole. If we want to write to DRAM (BAR4 space), we add offset // to which we write so write_block knows it needs to target BAR4 - dev->write_block((tlb_offset + address % tlb_size) + BAR0_BH_SIZE, size_in_bytes, buffer_addr); + dev->write_block( + (tlb_description.tlb_offset + address % tlb_description.size) + BAR0_BH_SIZE, + size_in_bytes, + buffer_addr); } else { - dev->write_block(tlb_offset + address % tlb_size, size_in_bytes, buffer_addr); + dev->write_block(tlb_description.tlb_offset + address % tlb_description.size, size_in_bytes, buffer_addr); } } else { const auto tlb_index = dynamic_tlb_config.at(fallback_tlb); @@ -1170,17 +1169,23 @@ void Cluster::read_device_memory( log_debug(LogSiliconDriver, " tlb_index: {}, tlb_data.has_value(): {}", tlb_index, tlb_data.has_value()); if (is_tlb_mapped(target, address, size_in_bytes)) { - auto tlb_description = dev->get_architecture_implementation()->describe_tlb( + auto tlb_description = dev->get_architecture_implementation()->get_tlb_configuration( map_core_to_tlb_per_chip.at(target.chip).at(tt_xy_pair(target.x, target.y))); - auto [tlb_offset, tlb_size] = tlb_description.value(); - if (dev->get_pci_device()->bar4_wc != nullptr && tlb_size == BH_4GB_TLB_SIZE) { + if (dev->get_pci_device()->bar4_wc != nullptr && tlb_description.size == BH_4GB_TLB_SIZE) { // This is only for Blackhole. If we want to read from DRAM (BAR4 space), we add offset // from which we read so read_block knows it needs to target BAR4 - dev->read_block((tlb_offset + address % tlb_size) + BAR0_BH_SIZE, size_in_bytes, buffer_addr); + dev->read_block( + (tlb_description.tlb_offset + address % tlb_description.size) + BAR0_BH_SIZE, + size_in_bytes, + buffer_addr); } else { - dev->read_block(tlb_offset + address % tlb_size, size_in_bytes, buffer_addr); + dev->read_block(tlb_description.tlb_offset + address % tlb_description.size, size_in_bytes, buffer_addr); } - log_debug(LogSiliconDriver, " read_block called with tlb_offset: {}, tlb_size: {}", tlb_offset, tlb_size); + log_debug( + LogSiliconDriver, + " read_block called with tlb_offset: {}, tlb_size: {}", + tlb_description.tlb_offset, + tlb_description.size); } else { const auto tlb_index = dynamic_tlb_config.at(fallback_tlb); const scoped_lock lock(*get_mutex(fallback_tlb, target.chip)); @@ -1339,13 +1344,11 @@ Cluster::~Cluster() { dynamic_tlb_ordering_modes.clear(); } -std::optional> Cluster::get_tlb_data_from_target(const tt_cxy_pair& target) { - if (!is_tlb_mapped(target)) { - return std::nullopt; - } +tlb_configuration Cluster::get_tlb_data_from_target(const tt_cxy_pair& target) { + log_assert(is_tlb_mapped(target), "TLB not mapped for core: {}", target.str()); int tlb_index = map_core_to_tlb_per_chip.at(target.chip).at(tt_xy_pair(target.x, target.y)); - return get_tt_device(target.chip)->get_architecture_implementation()->describe_tlb(tlb_index); + return get_tt_device(target.chip)->get_architecture_implementation()->get_tlb_configuration(tlb_index); } std::optional> Cluster::get_tlb_data_from_target(const chip_id_t chip, CoreCoord core) { @@ -1378,7 +1381,7 @@ void Cluster::configure_tlb( TTDevice* tt_device = get_tt_device(logical_device_id); tt_device->set_dynamic_tlb( tlb_index, harvested_coord_translation.at(logical_device_id).at(core), address, ordering); - auto tlb_size = std::get<1>(tt_device->get_architecture_implementation()->describe_tlb(tlb_index).value()); + auto tlb_size = tt_device->get_architecture_implementation()->get_tlb_configuration(tlb_index).size; tlb_config_map.at(logical_device_id).insert({tlb_index, (address / tlb_size) * tlb_size}); map_core_to_tlb_per_chip.at(logical_device_id).insert({core, tlb_index}); } diff --git a/device/grayskull/grayskull_implementation.cpp b/device/grayskull/grayskull_implementation.cpp index f0fbe478..af3d8a77 100644 --- a/device/grayskull/grayskull_implementation.cpp +++ b/device/grayskull/grayskull_implementation.cpp @@ -25,6 +25,8 @@ tlb_configuration grayskull_implementation::get_tlb_configuration(uint32_t tlb_i .base = grayskull::DYNAMIC_TLB_16M_BASE, .cfg_addr = grayskull::DYNAMIC_TLB_16M_CFG_ADDR, .index_offset = tlb_index - grayskull::TLB_BASE_INDEX_16M, + .tlb_offset = grayskull::DYNAMIC_TLB_16M_BASE + + (tlb_index - grayskull::TLB_BASE_INDEX_16M) * grayskull::DYNAMIC_TLB_16M_SIZE, .offset = grayskull::TLB_16M_OFFSET, }; } else if (tlb_index >= grayskull::TLB_BASE_INDEX_2M) { @@ -33,6 +35,8 @@ tlb_configuration grayskull_implementation::get_tlb_configuration(uint32_t tlb_i .base = grayskull::DYNAMIC_TLB_2M_BASE, .cfg_addr = grayskull::DYNAMIC_TLB_2M_CFG_ADDR, .index_offset = tlb_index - grayskull::TLB_BASE_INDEX_2M, + .tlb_offset = grayskull::DYNAMIC_TLB_2M_BASE + + (tlb_index - grayskull::TLB_BASE_INDEX_2M) * grayskull::DYNAMIC_TLB_2M_SIZE, .offset = grayskull::TLB_2M_OFFSET, }; } else { @@ -41,41 +45,13 @@ tlb_configuration grayskull_implementation::get_tlb_configuration(uint32_t tlb_i .base = grayskull::DYNAMIC_TLB_1M_BASE, .cfg_addr = grayskull::DYNAMIC_TLB_1M_CFG_ADDR, .index_offset = tlb_index - grayskull::TLB_BASE_INDEX_1M, + .tlb_offset = grayskull::DYNAMIC_TLB_1M_BASE + + (tlb_index - grayskull::TLB_BASE_INDEX_1M) * grayskull::DYNAMIC_TLB_1M_SIZE, .offset = grayskull::TLB_1M_OFFSET, }; } } -std::optional> grayskull_implementation::describe_tlb( - std::int32_t tlb_index) const { - std::uint32_t TLB_COUNT_1M = 156; - std::uint32_t TLB_COUNT_2M = 10; - std::uint32_t TLB_COUNT_16M = 20; - - std::uint32_t TLB_BASE_1M = 0; - std::uint32_t TLB_BASE_2M = TLB_COUNT_1M * (1 << 20); - std::uint32_t TLB_BASE_16M = TLB_BASE_2M + TLB_COUNT_2M * (1 << 21); - - if (tlb_index < 0) { - return std::nullopt; - } - - if (tlb_index >= 0 && tlb_index < TLB_COUNT_1M) { - std::uint32_t size = 1 << 20; - return std::tuple(TLB_BASE_1M + size * tlb_index, size); - } else if (tlb_index >= 0 && tlb_index < TLB_COUNT_1M + TLB_COUNT_2M) { - auto tlb_offset = tlb_index - TLB_COUNT_1M; - auto size = 1 << 21; - return std::tuple(TLB_BASE_2M + tlb_offset * size, size); - } else if (tlb_index >= 0 and tlb_index < TLB_COUNT_1M + TLB_COUNT_2M + TLB_COUNT_16M) { - auto tlb_offset = tlb_index - (TLB_COUNT_1M + TLB_COUNT_2M); - auto size = 1 << 24; - return std::tuple(TLB_BASE_16M + tlb_offset * size, size); - } - - return std::nullopt; -} - std::pair grayskull_implementation::get_tlb_data( std::uint32_t tlb_index, const tlb_data &data) const { if (tlb_index < grayskull::TLB_COUNT_1M) { diff --git a/device/wormhole/wormhole_implementation.cpp b/device/wormhole/wormhole_implementation.cpp index d80a72eb..2e1aadc9 100644 --- a/device/wormhole/wormhole_implementation.cpp +++ b/device/wormhole/wormhole_implementation.cpp @@ -31,6 +31,8 @@ tlb_configuration wormhole_implementation::get_tlb_configuration(uint32_t tlb_in .base = wormhole::DYNAMIC_TLB_16M_BASE, .cfg_addr = wormhole::DYNAMIC_TLB_16M_CFG_ADDR, .index_offset = tlb_index - wormhole::TLB_BASE_INDEX_16M, + .tlb_offset = wormhole::DYNAMIC_TLB_16M_BASE + + (tlb_index - wormhole::TLB_BASE_INDEX_16M) * wormhole::DYNAMIC_TLB_16M_SIZE, .offset = wormhole::TLB_16M_OFFSET, }; } else if (tlb_index >= wormhole::TLB_BASE_INDEX_2M) { @@ -39,6 +41,8 @@ tlb_configuration wormhole_implementation::get_tlb_configuration(uint32_t tlb_in .base = wormhole::DYNAMIC_TLB_2M_BASE, .cfg_addr = wormhole::DYNAMIC_TLB_2M_CFG_ADDR, .index_offset = tlb_index - wormhole::TLB_BASE_INDEX_2M, + .tlb_offset = wormhole::DYNAMIC_TLB_2M_BASE + + (tlb_index - wormhole::TLB_BASE_INDEX_2M) * wormhole::DYNAMIC_TLB_2M_SIZE, .offset = wormhole::TLB_2M_OFFSET, }; } else { @@ -47,40 +51,13 @@ tlb_configuration wormhole_implementation::get_tlb_configuration(uint32_t tlb_in .base = wormhole::DYNAMIC_TLB_1M_BASE, .cfg_addr = wormhole::DYNAMIC_TLB_1M_CFG_ADDR, .index_offset = tlb_index - wormhole::TLB_BASE_INDEX_1M, + .tlb_offset = wormhole::DYNAMIC_TLB_1M_BASE + + (tlb_index - wormhole::TLB_BASE_INDEX_1M) * wormhole::DYNAMIC_TLB_1M_SIZE, .offset = wormhole::TLB_1M_OFFSET, }; } } -std::optional> wormhole_implementation::describe_tlb( - std::int32_t tlb_index) const { - std::uint32_t TLB_COUNT_1M = 156; - std::uint32_t TLB_COUNT_2M = 10; - std::uint32_t TLB_COUNT_16M = 20; - - std::uint32_t TLB_BASE_1M = 0; - std::uint32_t TLB_BASE_2M = TLB_COUNT_1M * (1 << 20); - std::uint32_t TLB_BASE_16M = TLB_BASE_2M + TLB_COUNT_2M * (1 << 21); - if (tlb_index < 0) { - return std::nullopt; - } - - if (tlb_index >= 0 && tlb_index < TLB_COUNT_1M) { - std::uint32_t size = 1 << 20; - return std::tuple(TLB_BASE_1M + size * tlb_index, size); - } else if (tlb_index >= 0 && tlb_index < TLB_COUNT_1M + TLB_COUNT_2M) { - auto tlb_offset = tlb_index - TLB_COUNT_1M; - auto size = 1 << 21; - return std::tuple(TLB_BASE_2M + tlb_offset * size, size); - } else if (tlb_index >= 0 and tlb_index < TLB_COUNT_1M + TLB_COUNT_2M + TLB_COUNT_16M) { - auto tlb_offset = tlb_index - (TLB_COUNT_1M + TLB_COUNT_2M); - auto size = 1 << 24; - return std::tuple(TLB_BASE_16M + tlb_offset * size, size); - } - - return std::nullopt; -} - std::pair wormhole_implementation::get_tlb_data( std::uint32_t tlb_index, const tlb_data &data) const { std::uint32_t TLB_COUNT_1M = 156; From 91becc2ae457fbfbeafd4e63dab1bfa0e4b4a432 Mon Sep 17 00:00:00 2001 From: Bojan Rosko Date: Fri, 20 Dec 2024 10:14:36 +0000 Subject: [PATCH 2/5] revert old api --- device/api/umd/device/cluster.h | 3 ++- device/cluster.cpp | 7 ++++++- 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/device/api/umd/device/cluster.h b/device/api/umd/device/cluster.h index 24c36a2f..1c6cdac9 100644 --- a/device/api/umd/device/cluster.h +++ b/device/api/umd/device/cluster.h @@ -656,7 +656,8 @@ class Cluster : public tt_device { /** * If the tlbs are initialized, returns a tuple with the TLB base address and its size */ - tlb_configuration get_tlb_data_from_target(const tt_cxy_pair& target); + [[deprecated]] std::optional> get_tlb_data_from_target(const tt_cxy_pair& target); + tlb_configuration get_tlb_configuration(const tt_cxy_pair& target); /** * Provide fast write access to a statically-mapped TLB. * It is the caller's responsibility to ensure that diff --git a/device/cluster.cpp b/device/cluster.cpp index 7963fba0..e5c1c053 100644 --- a/device/cluster.cpp +++ b/device/cluster.cpp @@ -1344,7 +1344,12 @@ Cluster::~Cluster() { dynamic_tlb_ordering_modes.clear(); } -tlb_configuration Cluster::get_tlb_data_from_target(const tt_cxy_pair& target) { +std::optional> Cluster::get_tlb_data_from_target(const tt_cxy_pair& target) { + auto tlb_configuration = get_tlb_configuration(target); + return std::tuple(tlb_configuration.tlb_offset, tlb_configuration.size); +} + +tlb_configuration Cluster::get_tlb_configuration(const tt_cxy_pair& target) { log_assert(is_tlb_mapped(target), "TLB not mapped for core: {}", target.str()); int tlb_index = map_core_to_tlb_per_chip.at(target.chip).at(tt_xy_pair(target.x, target.y)); From 362cddc720f7a04fdd3bf821008bd1a1141e2a83 Mon Sep 17 00:00:00 2001 From: Bojan Rosko Date: Mon, 23 Dec 2024 07:32:31 +0000 Subject: [PATCH 3/5] minor fix --- device/cluster.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/device/cluster.cpp b/device/cluster.cpp index e5c1c053..6b9dc121 100644 --- a/device/cluster.cpp +++ b/device/cluster.cpp @@ -1346,7 +1346,7 @@ Cluster::~Cluster() { std::optional> Cluster::get_tlb_data_from_target(const tt_cxy_pair& target) { auto tlb_configuration = get_tlb_configuration(target); - return std::tuple(tlb_configuration.tlb_offset, tlb_configuration.size); + return std::tuple((uint32_t)tlb_configuration.tlb_offset, (uint32_t)tlb_configuration.size); } tlb_configuration Cluster::get_tlb_configuration(const tt_cxy_pair& target) { From b535c5a261254cad42602cb10e74eaf26403c91d Mon Sep 17 00:00:00 2001 From: Bojan Rosko Date: Mon, 23 Dec 2024 07:41:21 +0000 Subject: [PATCH 4/5] fix after rebase --- device/api/umd/device/cluster.h | 6 +++++- device/cluster.cpp | 5 +++++ 2 files changed, 10 insertions(+), 1 deletion(-) diff --git a/device/api/umd/device/cluster.h b/device/api/umd/device/cluster.h index 1c6cdac9..6df72001 100644 --- a/device/api/umd/device/cluster.h +++ b/device/api/umd/device/cluster.h @@ -656,7 +656,10 @@ class Cluster : public tt_device { /** * If the tlbs are initialized, returns a tuple with the TLB base address and its size */ - [[deprecated]] std::optional> get_tlb_data_from_target(const tt_cxy_pair& target); + std::optional> get_tlb_data_from_target(const tt_cxy_pair& target); + /** + * Returns a struct with the TLB configuration, or throws an exception if the target does not have a static TLB. + */ tlb_configuration get_tlb_configuration(const tt_cxy_pair& target); /** * Provide fast write access to a statically-mapped TLB. @@ -708,6 +711,7 @@ class Cluster : public tt_device { const std::string& fallback_tlb); std::optional> get_tlb_data_from_target( const chip_id_t chip, const tt::umd::CoreCoord core); + tlb_configuration get_tlb_configuration(const chip_id_t chip, const tt::umd::CoreCoord core); tt::Writer get_static_tlb_writer(const chip_id_t chip, const tt::umd::CoreCoord target); virtual void configure_active_ethernet_cores_for_mmio_device( const std::unordered_set& active_eth_cores_per_chip, chip_id_t mmio_chip); diff --git a/device/cluster.cpp b/device/cluster.cpp index 6b9dc121..f3912c3f 100644 --- a/device/cluster.cpp +++ b/device/cluster.cpp @@ -1361,6 +1361,11 @@ std::optional> Cluster::get_tlb_data_from_target( return get_tlb_data_from_target({(size_t)chip, virtual_coord}); } +tlb_configuration Cluster::get_tlb_configuration(const chip_id_t chip, CoreCoord core) { + const CoreCoord virtual_coord = translate_chip_coord(chip, core, CoordSystem::VIRTUAL); + return get_tlb_configuration({(size_t)chip, virtual_coord}); +} + void Cluster::configure_tlb( chip_id_t logical_device_id, tt_xy_pair core, int32_t tlb_index, uint64_t address, uint64_t ordering) { log_assert( From d658c6c14617bc6bb204c28a5046b0d1140d20fd Mon Sep 17 00:00:00 2001 From: Bojan Rosko Date: Mon, 23 Dec 2024 07:43:01 +0000 Subject: [PATCH 5/5] remove auto --- device/cluster.cpp | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/device/cluster.cpp b/device/cluster.cpp index f3912c3f..a592fe67 100644 --- a/device/cluster.cpp +++ b/device/cluster.cpp @@ -173,7 +173,7 @@ bool Cluster::is_tlb_mapped(tt_cxy_pair target, uint64_t address, uint32_t size_ auto* dev = get_tt_device(target.chip); int32_t tlb_index = map_core_to_tlb_per_chip.at(target.chip).at(tt_xy_pair(target.x, target.y)); - auto tlb_description = dev->get_architecture_implementation()->get_tlb_configuration(tlb_index); + tlb_configuration tlb_description = dev->get_architecture_implementation()->get_tlb_configuration(tlb_index); return address_in_tlb_space(address, size_in_bytes, tlb_index, tlb_description.size, target.chip); } @@ -1089,7 +1089,7 @@ tt::Writer Cluster::get_static_tlb_writer(tt_cxy_pair target) { } auto tlb_index = map_core_to_tlb_per_chip.at(target.chip).at(tt_xy_pair(target.x, target.y)); - auto tlb_description = dev->get_architecture_implementation()->get_tlb_configuration(tlb_index); + tlb_configuration tlb_description = dev->get_architecture_implementation()->get_tlb_configuration(tlb_index); auto* base = reinterpret_cast(dev->get_pci_device()->bar0_wc); return tt::Writer(base + tlb_description.tlb_offset, tlb_description.size); @@ -1120,7 +1120,7 @@ void Cluster::write_device_memory( small_access); if (is_tlb_mapped(target, address, size_in_bytes)) { - auto tlb_description = dev->get_architecture_implementation()->get_tlb_configuration( + tlb_configuration tlb_description = dev->get_architecture_implementation()->get_tlb_configuration( map_core_to_tlb_per_chip.at(target.chip).at(tt_xy_pair(target.x, target.y))); if (dev->get_pci_device()->bar4_wc != nullptr && tlb_description.size == BH_4GB_TLB_SIZE) { // This is only for Blackhole. If we want to write to DRAM (BAR4 space), we add offset @@ -1169,7 +1169,7 @@ void Cluster::read_device_memory( log_debug(LogSiliconDriver, " tlb_index: {}, tlb_data.has_value(): {}", tlb_index, tlb_data.has_value()); if (is_tlb_mapped(target, address, size_in_bytes)) { - auto tlb_description = dev->get_architecture_implementation()->get_tlb_configuration( + tlb_configuration tlb_description = dev->get_architecture_implementation()->get_tlb_configuration( map_core_to_tlb_per_chip.at(target.chip).at(tt_xy_pair(target.x, target.y))); if (dev->get_pci_device()->bar4_wc != nullptr && tlb_description.size == BH_4GB_TLB_SIZE) { // This is only for Blackhole. If we want to read from DRAM (BAR4 space), we add offset @@ -1391,7 +1391,7 @@ void Cluster::configure_tlb( TTDevice* tt_device = get_tt_device(logical_device_id); tt_device->set_dynamic_tlb( tlb_index, harvested_coord_translation.at(logical_device_id).at(core), address, ordering); - auto tlb_size = tt_device->get_architecture_implementation()->get_tlb_configuration(tlb_index).size; + uint64_t tlb_size = tt_device->get_architecture_implementation()->get_tlb_configuration(tlb_index).size; tlb_config_map.at(logical_device_id).insert({tlb_index, (address / tlb_size) * tlb_size}); map_core_to_tlb_per_chip.at(logical_device_id).insert({core, tlb_index}); }