diff --git a/device/api/umd/device/architecture_implementation.h b/device/api/umd/device/architecture_implementation.h index 8935621f..8efc165b 100644 --- a/device/api/umd/device/architecture_implementation.h +++ b/device/api/umd/device/architecture_implementation.h @@ -67,7 +67,6 @@ class architecture_implementation { virtual std::tuple multicast_workaround(xy_pair start, xy_pair end) const = 0; virtual tlb_configuration get_tlb_configuration(uint32_t tlb_index) const = 0; - virtual std::optional> describe_tlb(std::int32_t tlb_index) const = 0; virtual std::pair get_tlb_data( std::uint32_t tlb_index, const tlb_data& data) const = 0; diff --git a/device/api/umd/device/blackhole_implementation.h b/device/api/umd/device/blackhole_implementation.h index b248bcfa..d2820403 100644 --- a/device/api/umd/device/blackhole_implementation.h +++ b/device/api/umd/device/blackhole_implementation.h @@ -306,7 +306,6 @@ class blackhole_implementation : public architecture_implementation { std::tuple multicast_workaround(xy_pair start, xy_pair end) const override; tlb_configuration get_tlb_configuration(uint32_t tlb_index) const override; - std::optional> describe_tlb(std::int32_t tlb_index) const override; std::pair get_tlb_data(std::uint32_t tlb_index, const tlb_data& data) const override; tt_device_l1_address_params get_l1_address_params() const override; diff --git a/device/api/umd/device/cluster.h b/device/api/umd/device/cluster.h index 7bde8beb..99947d73 100644 --- a/device/api/umd/device/cluster.h +++ b/device/api/umd/device/cluster.h @@ -562,7 +562,7 @@ class Cluster : public tt_device { /** * If the tlbs are initialized, returns a tuple with the TLB base address and its size */ - std::optional> get_tlb_data_from_target(const tt_cxy_pair& target); + tlb_configuration get_tlb_data_from_target(const tt_cxy_pair& target); /** * This API allows you to write directly to device memory that is addressable by a static TLB */ diff --git a/device/api/umd/device/grayskull_implementation.h b/device/api/umd/device/grayskull_implementation.h index ab33f15d..2313415a 100644 --- a/device/api/umd/device/grayskull_implementation.h +++ b/device/api/umd/device/grayskull_implementation.h @@ -296,7 +296,6 @@ class grayskull_implementation : public architecture_implementation { std::tuple multicast_workaround(xy_pair start, xy_pair end) const override; tlb_configuration get_tlb_configuration(uint32_t tlb_index) const override; - std::optional> describe_tlb(std::int32_t tlb_index) const override; std::pair get_tlb_data(std::uint32_t tlb_index, const tlb_data& data) const override; tt_device_l1_address_params get_l1_address_params() const override; diff --git a/device/api/umd/device/types/tlb.h b/device/api/umd/device/types/tlb.h index 30094202..5f291442 100644 --- a/device/api/umd/device/types/tlb.h +++ b/device/api/umd/device/types/tlb.h @@ -50,9 +50,10 @@ struct tlb_data { struct tlb_configuration { uint64_t size; - uint32_t base; - uint32_t cfg_addr; - uint32_t index_offset; + uint64_t base; + uint64_t cfg_addr; + uint64_t index_offset; + uint64_t tlb_offset; tlb_offsets offset; }; diff --git a/device/api/umd/device/wormhole_implementation.h b/device/api/umd/device/wormhole_implementation.h index ce1bf036..0df17669 100644 --- a/device/api/umd/device/wormhole_implementation.h +++ b/device/api/umd/device/wormhole_implementation.h @@ -330,7 +330,6 @@ class wormhole_implementation : public architecture_implementation { std::tuple multicast_workaround(xy_pair start, xy_pair end) const override; tlb_configuration get_tlb_configuration(uint32_t tlb_index) const override; - std::optional> describe_tlb(std::int32_t tlb_index) const override; std::pair get_tlb_data(std::uint32_t tlb_index, const tlb_data& data) const override; tt_device_l1_address_params get_l1_address_params() const override; diff --git a/device/blackhole/blackhole_implementation.cpp b/device/blackhole/blackhole_implementation.cpp index f5e0ddcf..3dc25ef4 100644 --- a/device/blackhole/blackhole_implementation.cpp +++ b/device/blackhole/blackhole_implementation.cpp @@ -34,6 +34,8 @@ tlb_configuration blackhole_implementation::get_tlb_configuration(uint32_t tlb_i .base = blackhole::DYNAMIC_TLB_4G_BASE, .cfg_addr = blackhole::DYNAMIC_TLB_4G_CFG_ADDR, .index_offset = tlb_index - blackhole::TLB_BASE_INDEX_4G, + .tlb_offset = blackhole::DYNAMIC_TLB_4G_BASE + + (tlb_index - blackhole::TLB_BASE_INDEX_4G) * blackhole::DYNAMIC_TLB_4G_SIZE, .offset = blackhole::TLB_4G_OFFSET, }; } @@ -43,34 +45,12 @@ tlb_configuration blackhole_implementation::get_tlb_configuration(uint32_t tlb_i .base = blackhole::DYNAMIC_TLB_2M_BASE, .cfg_addr = blackhole::DYNAMIC_TLB_2M_CFG_ADDR, .index_offset = tlb_index - blackhole::TLB_BASE_INDEX_2M, + .tlb_offset = blackhole::DYNAMIC_TLB_2M_BASE + + (tlb_index - blackhole::TLB_BASE_INDEX_2M) * blackhole::DYNAMIC_TLB_2M_SIZE, .offset = blackhole::TLB_2M_OFFSET, }; } -std::optional> blackhole_implementation::describe_tlb( - std::int32_t tlb_index) const { - std::uint32_t TLB_COUNT_2M = 202; - - std::uint32_t TLB_BASE_2M = 0; - if (tlb_index < 0) { - return std::nullopt; - } - - if (tlb_index >= TLB_COUNT_2M && tlb_index < TLB_COUNT_2M + blackhole::TLB_COUNT_4G) { - auto tlb_offset = tlb_index - TLB_COUNT_2M; - auto size = blackhole::TLB_4G_SIZE; - return std::tuple(blackhole::TLB_BASE_4G + tlb_offset * size, size); - } - - if (tlb_index >= 0 && tlb_index < TLB_COUNT_2M) { - auto tlb_offset = tlb_index; - auto size = 1 << 21; - return std::tuple(TLB_BASE_2M + tlb_offset * size, size); - } - - return std::nullopt; -} - std::pair blackhole_implementation::get_tlb_data( std::uint32_t tlb_index, const tlb_data& data) const { if (tlb_index < blackhole::TLB_COUNT_2M) { diff --git a/device/cluster.cpp b/device/cluster.cpp index 6c70f075..3190832d 100644 --- a/device/cluster.cpp +++ b/device/cluster.cpp @@ -172,10 +172,9 @@ bool Cluster::is_tlb_mapped(tt_cxy_pair target, uint64_t address, uint32_t size_ auto* dev = get_tt_device(target.chip); int32_t tlb_index = map_core_to_tlb_per_chip.at(target.chip).at(tt_xy_pair(target.x, target.y)); - auto tlb_description = dev->get_architecture_implementation()->describe_tlb(tlb_index); + auto tlb_description = dev->get_architecture_implementation()->get_tlb_configuration(tlb_index); - return tlb_description.has_value() && - address_in_tlb_space(address, size_in_bytes, tlb_index, std::get<1>(tlb_description.value()), target.chip); + return address_in_tlb_space(address, size_in_bytes, tlb_index, tlb_description.size, target.chip); } void Cluster::initialize_interprocess_mutexes(int pci_interface_id, bool cleanup_mutexes_in_shm) { @@ -1075,12 +1074,10 @@ tt::Writer Cluster::get_static_tlb_writer(tt_cxy_pair target) { } auto tlb_index = map_core_to_tlb_per_chip.at(target.chip).at(tt_xy_pair(target.x, target.y)); - auto tlb_data = dev->get_architecture_implementation()->describe_tlb(tlb_index); + auto tlb_description = dev->get_architecture_implementation()->get_tlb_configuration(tlb_index); - auto [tlb_offset, tlb_size] = tlb_data.value(); auto* base = reinterpret_cast(dev->get_pci_device()->bar0_wc); - - return tt::Writer(base + tlb_offset, tlb_size); + return tt::Writer(base + tlb_description.tlb_offset, tlb_description.size); } void Cluster::write_device_memory( @@ -1103,15 +1100,17 @@ void Cluster::write_device_memory( small_access); if (is_tlb_mapped(target, address, size_in_bytes)) { - auto tlb_description = dev->get_architecture_implementation()->describe_tlb( + auto tlb_description = dev->get_architecture_implementation()->get_tlb_configuration( map_core_to_tlb_per_chip.at(target.chip).at(tt_xy_pair(target.x, target.y))); - auto [tlb_offset, tlb_size] = tlb_description.value(); - if (dev->get_pci_device()->bar4_wc != nullptr && tlb_size == BH_4GB_TLB_SIZE) { + if (dev->get_pci_device()->bar4_wc != nullptr && tlb_description.size == BH_4GB_TLB_SIZE) { // This is only for Blackhole. If we want to write to DRAM (BAR4 space), we add offset // to which we write so write_block knows it needs to target BAR4 - dev->write_block((tlb_offset + address % tlb_size) + BAR0_BH_SIZE, size_in_bytes, buffer_addr); + dev->write_block( + (tlb_description.tlb_offset + address % tlb_description.size) + BAR0_BH_SIZE, + size_in_bytes, + buffer_addr); } else { - dev->write_block(tlb_offset + address % tlb_size, size_in_bytes, buffer_addr); + dev->write_block(tlb_description.tlb_offset + address % tlb_description.size, size_in_bytes, buffer_addr); } } else { const auto tlb_index = dynamic_tlb_config.at(fallback_tlb); @@ -1151,17 +1150,23 @@ void Cluster::read_device_memory( log_debug(LogSiliconDriver, " tlb_index: {}, tlb_data.has_value(): {}", tlb_index, tlb_data.has_value()); if (is_tlb_mapped(target, address, size_in_bytes)) { - auto tlb_description = dev->get_architecture_implementation()->describe_tlb( + auto tlb_description = dev->get_architecture_implementation()->get_tlb_configuration( map_core_to_tlb_per_chip.at(target.chip).at(tt_xy_pair(target.x, target.y))); - auto [tlb_offset, tlb_size] = tlb_description.value(); - if (dev->get_pci_device()->bar4_wc != nullptr && tlb_size == BH_4GB_TLB_SIZE) { + if (dev->get_pci_device()->bar4_wc != nullptr && tlb_description.size == BH_4GB_TLB_SIZE) { // This is only for Blackhole. If we want to read from DRAM (BAR4 space), we add offset // from which we read so read_block knows it needs to target BAR4 - dev->read_block((tlb_offset + address % tlb_size) + BAR0_BH_SIZE, size_in_bytes, buffer_addr); + dev->read_block( + (tlb_description.tlb_offset + address % tlb_description.size) + BAR0_BH_SIZE, + size_in_bytes, + buffer_addr); } else { - dev->read_block(tlb_offset + address % tlb_size, size_in_bytes, buffer_addr); + dev->read_block(tlb_description.tlb_offset + address % tlb_description.size, size_in_bytes, buffer_addr); } - log_debug(LogSiliconDriver, " read_block called with tlb_offset: {}, tlb_size: {}", tlb_offset, tlb_size); + log_debug( + LogSiliconDriver, + " read_block called with tlb_offset: {}, tlb_size: {}", + tlb_description.tlb_offset, + tlb_description.size); } else { const auto tlb_index = dynamic_tlb_config.at(fallback_tlb); const scoped_lock lock(*get_mutex(fallback_tlb, dev->get_pci_device()->get_device_num())); @@ -1321,13 +1326,11 @@ Cluster::~Cluster() { dynamic_tlb_ordering_modes.clear(); } -std::optional> Cluster::get_tlb_data_from_target(const tt_cxy_pair& target) { - if (!is_tlb_mapped(target)) { - return std::nullopt; - } +tlb_configuration Cluster::get_tlb_data_from_target(const tt_cxy_pair& target) { + log_assert(is_tlb_mapped(target), "TLB not mapped for core: {}", target.str()); int tlb_index = map_core_to_tlb_per_chip.at(target.chip).at(tt_xy_pair(target.x, target.y)); - return get_tt_device(target.chip)->get_architecture_implementation()->describe_tlb(tlb_index); + return get_tt_device(target.chip)->get_architecture_implementation()->get_tlb_configuration(tlb_index); } void Cluster::configure_tlb( @@ -1354,7 +1357,7 @@ void Cluster::configure_tlb( TTDevice* tt_device = get_tt_device(logical_device_id); tt_device->set_dynamic_tlb(tlb_index, core, address, harvested_coord_translation.at(logical_device_id), ordering); - auto tlb_size = std::get<1>(tt_device->get_architecture_implementation()->describe_tlb(tlb_index).value()); + auto tlb_size = tt_device->get_architecture_implementation()->get_tlb_configuration(tlb_index).size; tlb_config_map.at(logical_device_id).insert({tlb_index, (address / tlb_size) * tlb_size}); map_core_to_tlb_per_chip.at(logical_device_id).insert({core, tlb_index}); } diff --git a/device/grayskull/grayskull_implementation.cpp b/device/grayskull/grayskull_implementation.cpp index f0fbe478..af3d8a77 100644 --- a/device/grayskull/grayskull_implementation.cpp +++ b/device/grayskull/grayskull_implementation.cpp @@ -25,6 +25,8 @@ tlb_configuration grayskull_implementation::get_tlb_configuration(uint32_t tlb_i .base = grayskull::DYNAMIC_TLB_16M_BASE, .cfg_addr = grayskull::DYNAMIC_TLB_16M_CFG_ADDR, .index_offset = tlb_index - grayskull::TLB_BASE_INDEX_16M, + .tlb_offset = grayskull::DYNAMIC_TLB_16M_BASE + + (tlb_index - grayskull::TLB_BASE_INDEX_16M) * grayskull::DYNAMIC_TLB_16M_SIZE, .offset = grayskull::TLB_16M_OFFSET, }; } else if (tlb_index >= grayskull::TLB_BASE_INDEX_2M) { @@ -33,6 +35,8 @@ tlb_configuration grayskull_implementation::get_tlb_configuration(uint32_t tlb_i .base = grayskull::DYNAMIC_TLB_2M_BASE, .cfg_addr = grayskull::DYNAMIC_TLB_2M_CFG_ADDR, .index_offset = tlb_index - grayskull::TLB_BASE_INDEX_2M, + .tlb_offset = grayskull::DYNAMIC_TLB_2M_BASE + + (tlb_index - grayskull::TLB_BASE_INDEX_2M) * grayskull::DYNAMIC_TLB_2M_SIZE, .offset = grayskull::TLB_2M_OFFSET, }; } else { @@ -41,41 +45,13 @@ tlb_configuration grayskull_implementation::get_tlb_configuration(uint32_t tlb_i .base = grayskull::DYNAMIC_TLB_1M_BASE, .cfg_addr = grayskull::DYNAMIC_TLB_1M_CFG_ADDR, .index_offset = tlb_index - grayskull::TLB_BASE_INDEX_1M, + .tlb_offset = grayskull::DYNAMIC_TLB_1M_BASE + + (tlb_index - grayskull::TLB_BASE_INDEX_1M) * grayskull::DYNAMIC_TLB_1M_SIZE, .offset = grayskull::TLB_1M_OFFSET, }; } } -std::optional> grayskull_implementation::describe_tlb( - std::int32_t tlb_index) const { - std::uint32_t TLB_COUNT_1M = 156; - std::uint32_t TLB_COUNT_2M = 10; - std::uint32_t TLB_COUNT_16M = 20; - - std::uint32_t TLB_BASE_1M = 0; - std::uint32_t TLB_BASE_2M = TLB_COUNT_1M * (1 << 20); - std::uint32_t TLB_BASE_16M = TLB_BASE_2M + TLB_COUNT_2M * (1 << 21); - - if (tlb_index < 0) { - return std::nullopt; - } - - if (tlb_index >= 0 && tlb_index < TLB_COUNT_1M) { - std::uint32_t size = 1 << 20; - return std::tuple(TLB_BASE_1M + size * tlb_index, size); - } else if (tlb_index >= 0 && tlb_index < TLB_COUNT_1M + TLB_COUNT_2M) { - auto tlb_offset = tlb_index - TLB_COUNT_1M; - auto size = 1 << 21; - return std::tuple(TLB_BASE_2M + tlb_offset * size, size); - } else if (tlb_index >= 0 and tlb_index < TLB_COUNT_1M + TLB_COUNT_2M + TLB_COUNT_16M) { - auto tlb_offset = tlb_index - (TLB_COUNT_1M + TLB_COUNT_2M); - auto size = 1 << 24; - return std::tuple(TLB_BASE_16M + tlb_offset * size, size); - } - - return std::nullopt; -} - std::pair grayskull_implementation::get_tlb_data( std::uint32_t tlb_index, const tlb_data &data) const { if (tlb_index < grayskull::TLB_COUNT_1M) { diff --git a/device/wormhole/wormhole_implementation.cpp b/device/wormhole/wormhole_implementation.cpp index d80a72eb..2e1aadc9 100644 --- a/device/wormhole/wormhole_implementation.cpp +++ b/device/wormhole/wormhole_implementation.cpp @@ -31,6 +31,8 @@ tlb_configuration wormhole_implementation::get_tlb_configuration(uint32_t tlb_in .base = wormhole::DYNAMIC_TLB_16M_BASE, .cfg_addr = wormhole::DYNAMIC_TLB_16M_CFG_ADDR, .index_offset = tlb_index - wormhole::TLB_BASE_INDEX_16M, + .tlb_offset = wormhole::DYNAMIC_TLB_16M_BASE + + (tlb_index - wormhole::TLB_BASE_INDEX_16M) * wormhole::DYNAMIC_TLB_16M_SIZE, .offset = wormhole::TLB_16M_OFFSET, }; } else if (tlb_index >= wormhole::TLB_BASE_INDEX_2M) { @@ -39,6 +41,8 @@ tlb_configuration wormhole_implementation::get_tlb_configuration(uint32_t tlb_in .base = wormhole::DYNAMIC_TLB_2M_BASE, .cfg_addr = wormhole::DYNAMIC_TLB_2M_CFG_ADDR, .index_offset = tlb_index - wormhole::TLB_BASE_INDEX_2M, + .tlb_offset = wormhole::DYNAMIC_TLB_2M_BASE + + (tlb_index - wormhole::TLB_BASE_INDEX_2M) * wormhole::DYNAMIC_TLB_2M_SIZE, .offset = wormhole::TLB_2M_OFFSET, }; } else { @@ -47,40 +51,13 @@ tlb_configuration wormhole_implementation::get_tlb_configuration(uint32_t tlb_in .base = wormhole::DYNAMIC_TLB_1M_BASE, .cfg_addr = wormhole::DYNAMIC_TLB_1M_CFG_ADDR, .index_offset = tlb_index - wormhole::TLB_BASE_INDEX_1M, + .tlb_offset = wormhole::DYNAMIC_TLB_1M_BASE + + (tlb_index - wormhole::TLB_BASE_INDEX_1M) * wormhole::DYNAMIC_TLB_1M_SIZE, .offset = wormhole::TLB_1M_OFFSET, }; } } -std::optional> wormhole_implementation::describe_tlb( - std::int32_t tlb_index) const { - std::uint32_t TLB_COUNT_1M = 156; - std::uint32_t TLB_COUNT_2M = 10; - std::uint32_t TLB_COUNT_16M = 20; - - std::uint32_t TLB_BASE_1M = 0; - std::uint32_t TLB_BASE_2M = TLB_COUNT_1M * (1 << 20); - std::uint32_t TLB_BASE_16M = TLB_BASE_2M + TLB_COUNT_2M * (1 << 21); - if (tlb_index < 0) { - return std::nullopt; - } - - if (tlb_index >= 0 && tlb_index < TLB_COUNT_1M) { - std::uint32_t size = 1 << 20; - return std::tuple(TLB_BASE_1M + size * tlb_index, size); - } else if (tlb_index >= 0 && tlb_index < TLB_COUNT_1M + TLB_COUNT_2M) { - auto tlb_offset = tlb_index - TLB_COUNT_1M; - auto size = 1 << 21; - return std::tuple(TLB_BASE_2M + tlb_offset * size, size); - } else if (tlb_index >= 0 and tlb_index < TLB_COUNT_1M + TLB_COUNT_2M + TLB_COUNT_16M) { - auto tlb_offset = tlb_index - (TLB_COUNT_1M + TLB_COUNT_2M); - auto size = 1 << 24; - return std::tuple(TLB_BASE_16M + tlb_offset * size, size); - } - - return std::nullopt; -} - std::pair wormhole_implementation::get_tlb_data( std::uint32_t tlb_index, const tlb_data &data) const { std::uint32_t TLB_COUNT_1M = 156;