diff --git a/device/api/umd/device/architecture_implementation.h b/device/api/umd/device/architecture_implementation.h index 8935621f..8efc165b 100644 --- a/device/api/umd/device/architecture_implementation.h +++ b/device/api/umd/device/architecture_implementation.h @@ -67,7 +67,6 @@ class architecture_implementation { virtual std::tuple multicast_workaround(xy_pair start, xy_pair end) const = 0; virtual tlb_configuration get_tlb_configuration(uint32_t tlb_index) const = 0; - virtual std::optional> describe_tlb(std::int32_t tlb_index) const = 0; virtual std::pair get_tlb_data( std::uint32_t tlb_index, const tlb_data& data) const = 0; diff --git a/device/api/umd/device/blackhole_implementation.h b/device/api/umd/device/blackhole_implementation.h index b248bcfa..d2820403 100644 --- a/device/api/umd/device/blackhole_implementation.h +++ b/device/api/umd/device/blackhole_implementation.h @@ -306,7 +306,6 @@ class blackhole_implementation : public architecture_implementation { std::tuple multicast_workaround(xy_pair start, xy_pair end) const override; tlb_configuration get_tlb_configuration(uint32_t tlb_index) const override; - std::optional> describe_tlb(std::int32_t tlb_index) const override; std::pair get_tlb_data(std::uint32_t tlb_index, const tlb_data& data) const override; tt_device_l1_address_params get_l1_address_params() const override; diff --git a/device/api/umd/device/cluster.h b/device/api/umd/device/cluster.h index 08797d35..6df72001 100644 --- a/device/api/umd/device/cluster.h +++ b/device/api/umd/device/cluster.h @@ -657,6 +657,10 @@ class Cluster : public tt_device { * If the tlbs are initialized, returns a tuple with the TLB base address and its size */ std::optional> get_tlb_data_from_target(const tt_cxy_pair& target); + /** + * Returns a struct with the TLB configuration, or throws an exception if the target does not have a static TLB. + */ + tlb_configuration get_tlb_configuration(const tt_cxy_pair& target); /** * Provide fast write access to a statically-mapped TLB. * It is the caller's responsibility to ensure that @@ -707,6 +711,7 @@ class Cluster : public tt_device { const std::string& fallback_tlb); std::optional> get_tlb_data_from_target( const chip_id_t chip, const tt::umd::CoreCoord core); + tlb_configuration get_tlb_configuration(const chip_id_t chip, const tt::umd::CoreCoord core); tt::Writer get_static_tlb_writer(const chip_id_t chip, const tt::umd::CoreCoord target); virtual void configure_active_ethernet_cores_for_mmio_device( const std::unordered_set& active_eth_cores_per_chip, chip_id_t mmio_chip); diff --git a/device/api/umd/device/grayskull_implementation.h b/device/api/umd/device/grayskull_implementation.h index ab33f15d..2313415a 100644 --- a/device/api/umd/device/grayskull_implementation.h +++ b/device/api/umd/device/grayskull_implementation.h @@ -296,7 +296,6 @@ class grayskull_implementation : public architecture_implementation { std::tuple multicast_workaround(xy_pair start, xy_pair end) const override; tlb_configuration get_tlb_configuration(uint32_t tlb_index) const override; - std::optional> describe_tlb(std::int32_t tlb_index) const override; std::pair get_tlb_data(std::uint32_t tlb_index, const tlb_data& data) const override; tt_device_l1_address_params get_l1_address_params() const override; diff --git a/device/api/umd/device/types/tlb.h b/device/api/umd/device/types/tlb.h index 30094202..5f291442 100644 --- a/device/api/umd/device/types/tlb.h +++ b/device/api/umd/device/types/tlb.h @@ -50,9 +50,10 @@ struct tlb_data { struct tlb_configuration { uint64_t size; - uint32_t base; - uint32_t cfg_addr; - uint32_t index_offset; + uint64_t base; + uint64_t cfg_addr; + uint64_t index_offset; + uint64_t tlb_offset; tlb_offsets offset; }; diff --git a/device/api/umd/device/wormhole_implementation.h b/device/api/umd/device/wormhole_implementation.h index ce1bf036..0df17669 100644 --- a/device/api/umd/device/wormhole_implementation.h +++ b/device/api/umd/device/wormhole_implementation.h @@ -330,7 +330,6 @@ class wormhole_implementation : public architecture_implementation { std::tuple multicast_workaround(xy_pair start, xy_pair end) const override; tlb_configuration get_tlb_configuration(uint32_t tlb_index) const override; - std::optional> describe_tlb(std::int32_t tlb_index) const override; std::pair get_tlb_data(std::uint32_t tlb_index, const tlb_data& data) const override; tt_device_l1_address_params get_l1_address_params() const override; diff --git a/device/blackhole/blackhole_implementation.cpp b/device/blackhole/blackhole_implementation.cpp index f5e0ddcf..3dc25ef4 100644 --- a/device/blackhole/blackhole_implementation.cpp +++ b/device/blackhole/blackhole_implementation.cpp @@ -34,6 +34,8 @@ tlb_configuration blackhole_implementation::get_tlb_configuration(uint32_t tlb_i .base = blackhole::DYNAMIC_TLB_4G_BASE, .cfg_addr = blackhole::DYNAMIC_TLB_4G_CFG_ADDR, .index_offset = tlb_index - blackhole::TLB_BASE_INDEX_4G, + .tlb_offset = blackhole::DYNAMIC_TLB_4G_BASE + + (tlb_index - blackhole::TLB_BASE_INDEX_4G) * blackhole::DYNAMIC_TLB_4G_SIZE, .offset = blackhole::TLB_4G_OFFSET, }; } @@ -43,34 +45,12 @@ tlb_configuration blackhole_implementation::get_tlb_configuration(uint32_t tlb_i .base = blackhole::DYNAMIC_TLB_2M_BASE, .cfg_addr = blackhole::DYNAMIC_TLB_2M_CFG_ADDR, .index_offset = tlb_index - blackhole::TLB_BASE_INDEX_2M, + .tlb_offset = blackhole::DYNAMIC_TLB_2M_BASE + + (tlb_index - blackhole::TLB_BASE_INDEX_2M) * blackhole::DYNAMIC_TLB_2M_SIZE, .offset = blackhole::TLB_2M_OFFSET, }; } -std::optional> blackhole_implementation::describe_tlb( - std::int32_t tlb_index) const { - std::uint32_t TLB_COUNT_2M = 202; - - std::uint32_t TLB_BASE_2M = 0; - if (tlb_index < 0) { - return std::nullopt; - } - - if (tlb_index >= TLB_COUNT_2M && tlb_index < TLB_COUNT_2M + blackhole::TLB_COUNT_4G) { - auto tlb_offset = tlb_index - TLB_COUNT_2M; - auto size = blackhole::TLB_4G_SIZE; - return std::tuple(blackhole::TLB_BASE_4G + tlb_offset * size, size); - } - - if (tlb_index >= 0 && tlb_index < TLB_COUNT_2M) { - auto tlb_offset = tlb_index; - auto size = 1 << 21; - return std::tuple(TLB_BASE_2M + tlb_offset * size, size); - } - - return std::nullopt; -} - std::pair blackhole_implementation::get_tlb_data( std::uint32_t tlb_index, const tlb_data& data) const { if (tlb_index < blackhole::TLB_COUNT_2M) { diff --git a/device/cluster.cpp b/device/cluster.cpp index ffc626f9..a592fe67 100644 --- a/device/cluster.cpp +++ b/device/cluster.cpp @@ -173,10 +173,9 @@ bool Cluster::is_tlb_mapped(tt_cxy_pair target, uint64_t address, uint32_t size_ auto* dev = get_tt_device(target.chip); int32_t tlb_index = map_core_to_tlb_per_chip.at(target.chip).at(tt_xy_pair(target.x, target.y)); - auto tlb_description = dev->get_architecture_implementation()->describe_tlb(tlb_index); + tlb_configuration tlb_description = dev->get_architecture_implementation()->get_tlb_configuration(tlb_index); - return tlb_description.has_value() && - address_in_tlb_space(address, size_in_bytes, tlb_index, std::get<1>(tlb_description.value()), target.chip); + return address_in_tlb_space(address, size_in_bytes, tlb_index, tlb_description.size, target.chip); } void Cluster::initialize_interprocess_mutexes(int logical_device_id, bool cleanup_mutexes_in_shm) { @@ -1090,12 +1089,10 @@ tt::Writer Cluster::get_static_tlb_writer(tt_cxy_pair target) { } auto tlb_index = map_core_to_tlb_per_chip.at(target.chip).at(tt_xy_pair(target.x, target.y)); - auto tlb_data = dev->get_architecture_implementation()->describe_tlb(tlb_index); + tlb_configuration tlb_description = dev->get_architecture_implementation()->get_tlb_configuration(tlb_index); - auto [tlb_offset, tlb_size] = tlb_data.value(); auto* base = reinterpret_cast(dev->get_pci_device()->bar0_wc); - - return tt::Writer(base + tlb_offset, tlb_size); + return tt::Writer(base + tlb_description.tlb_offset, tlb_description.size); } tt::Writer Cluster::get_static_tlb_writer(const chip_id_t chip, const CoreCoord target) { @@ -1123,15 +1120,17 @@ void Cluster::write_device_memory( small_access); if (is_tlb_mapped(target, address, size_in_bytes)) { - auto tlb_description = dev->get_architecture_implementation()->describe_tlb( + tlb_configuration tlb_description = dev->get_architecture_implementation()->get_tlb_configuration( map_core_to_tlb_per_chip.at(target.chip).at(tt_xy_pair(target.x, target.y))); - auto [tlb_offset, tlb_size] = tlb_description.value(); - if (dev->get_pci_device()->bar4_wc != nullptr && tlb_size == BH_4GB_TLB_SIZE) { + if (dev->get_pci_device()->bar4_wc != nullptr && tlb_description.size == BH_4GB_TLB_SIZE) { // This is only for Blackhole. If we want to write to DRAM (BAR4 space), we add offset // to which we write so write_block knows it needs to target BAR4 - dev->write_block((tlb_offset + address % tlb_size) + BAR0_BH_SIZE, size_in_bytes, buffer_addr); + dev->write_block( + (tlb_description.tlb_offset + address % tlb_description.size) + BAR0_BH_SIZE, + size_in_bytes, + buffer_addr); } else { - dev->write_block(tlb_offset + address % tlb_size, size_in_bytes, buffer_addr); + dev->write_block(tlb_description.tlb_offset + address % tlb_description.size, size_in_bytes, buffer_addr); } } else { const auto tlb_index = dynamic_tlb_config.at(fallback_tlb); @@ -1170,17 +1169,23 @@ void Cluster::read_device_memory( log_debug(LogSiliconDriver, " tlb_index: {}, tlb_data.has_value(): {}", tlb_index, tlb_data.has_value()); if (is_tlb_mapped(target, address, size_in_bytes)) { - auto tlb_description = dev->get_architecture_implementation()->describe_tlb( + tlb_configuration tlb_description = dev->get_architecture_implementation()->get_tlb_configuration( map_core_to_tlb_per_chip.at(target.chip).at(tt_xy_pair(target.x, target.y))); - auto [tlb_offset, tlb_size] = tlb_description.value(); - if (dev->get_pci_device()->bar4_wc != nullptr && tlb_size == BH_4GB_TLB_SIZE) { + if (dev->get_pci_device()->bar4_wc != nullptr && tlb_description.size == BH_4GB_TLB_SIZE) { // This is only for Blackhole. If we want to read from DRAM (BAR4 space), we add offset // from which we read so read_block knows it needs to target BAR4 - dev->read_block((tlb_offset + address % tlb_size) + BAR0_BH_SIZE, size_in_bytes, buffer_addr); + dev->read_block( + (tlb_description.tlb_offset + address % tlb_description.size) + BAR0_BH_SIZE, + size_in_bytes, + buffer_addr); } else { - dev->read_block(tlb_offset + address % tlb_size, size_in_bytes, buffer_addr); + dev->read_block(tlb_description.tlb_offset + address % tlb_description.size, size_in_bytes, buffer_addr); } - log_debug(LogSiliconDriver, " read_block called with tlb_offset: {}, tlb_size: {}", tlb_offset, tlb_size); + log_debug( + LogSiliconDriver, + " read_block called with tlb_offset: {}, tlb_size: {}", + tlb_description.tlb_offset, + tlb_description.size); } else { const auto tlb_index = dynamic_tlb_config.at(fallback_tlb); const scoped_lock lock(*get_mutex(fallback_tlb, target.chip)); @@ -1340,12 +1345,15 @@ Cluster::~Cluster() { } std::optional> Cluster::get_tlb_data_from_target(const tt_cxy_pair& target) { - if (!is_tlb_mapped(target)) { - return std::nullopt; - } + auto tlb_configuration = get_tlb_configuration(target); + return std::tuple((uint32_t)tlb_configuration.tlb_offset, (uint32_t)tlb_configuration.size); +} + +tlb_configuration Cluster::get_tlb_configuration(const tt_cxy_pair& target) { + log_assert(is_tlb_mapped(target), "TLB not mapped for core: {}", target.str()); int tlb_index = map_core_to_tlb_per_chip.at(target.chip).at(tt_xy_pair(target.x, target.y)); - return get_tt_device(target.chip)->get_architecture_implementation()->describe_tlb(tlb_index); + return get_tt_device(target.chip)->get_architecture_implementation()->get_tlb_configuration(tlb_index); } std::optional> Cluster::get_tlb_data_from_target(const chip_id_t chip, CoreCoord core) { @@ -1353,6 +1361,11 @@ std::optional> Cluster::get_tlb_data_from_target( return get_tlb_data_from_target({(size_t)chip, virtual_coord}); } +tlb_configuration Cluster::get_tlb_configuration(const chip_id_t chip, CoreCoord core) { + const CoreCoord virtual_coord = translate_chip_coord(chip, core, CoordSystem::VIRTUAL); + return get_tlb_configuration({(size_t)chip, virtual_coord}); +} + void Cluster::configure_tlb( chip_id_t logical_device_id, tt_xy_pair core, int32_t tlb_index, uint64_t address, uint64_t ordering) { log_assert( @@ -1378,7 +1391,7 @@ void Cluster::configure_tlb( TTDevice* tt_device = get_tt_device(logical_device_id); tt_device->set_dynamic_tlb( tlb_index, harvested_coord_translation.at(logical_device_id).at(core), address, ordering); - auto tlb_size = std::get<1>(tt_device->get_architecture_implementation()->describe_tlb(tlb_index).value()); + uint64_t tlb_size = tt_device->get_architecture_implementation()->get_tlb_configuration(tlb_index).size; tlb_config_map.at(logical_device_id).insert({tlb_index, (address / tlb_size) * tlb_size}); map_core_to_tlb_per_chip.at(logical_device_id).insert({core, tlb_index}); } diff --git a/device/grayskull/grayskull_implementation.cpp b/device/grayskull/grayskull_implementation.cpp index f0fbe478..af3d8a77 100644 --- a/device/grayskull/grayskull_implementation.cpp +++ b/device/grayskull/grayskull_implementation.cpp @@ -25,6 +25,8 @@ tlb_configuration grayskull_implementation::get_tlb_configuration(uint32_t tlb_i .base = grayskull::DYNAMIC_TLB_16M_BASE, .cfg_addr = grayskull::DYNAMIC_TLB_16M_CFG_ADDR, .index_offset = tlb_index - grayskull::TLB_BASE_INDEX_16M, + .tlb_offset = grayskull::DYNAMIC_TLB_16M_BASE + + (tlb_index - grayskull::TLB_BASE_INDEX_16M) * grayskull::DYNAMIC_TLB_16M_SIZE, .offset = grayskull::TLB_16M_OFFSET, }; } else if (tlb_index >= grayskull::TLB_BASE_INDEX_2M) { @@ -33,6 +35,8 @@ tlb_configuration grayskull_implementation::get_tlb_configuration(uint32_t tlb_i .base = grayskull::DYNAMIC_TLB_2M_BASE, .cfg_addr = grayskull::DYNAMIC_TLB_2M_CFG_ADDR, .index_offset = tlb_index - grayskull::TLB_BASE_INDEX_2M, + .tlb_offset = grayskull::DYNAMIC_TLB_2M_BASE + + (tlb_index - grayskull::TLB_BASE_INDEX_2M) * grayskull::DYNAMIC_TLB_2M_SIZE, .offset = grayskull::TLB_2M_OFFSET, }; } else { @@ -41,41 +45,13 @@ tlb_configuration grayskull_implementation::get_tlb_configuration(uint32_t tlb_i .base = grayskull::DYNAMIC_TLB_1M_BASE, .cfg_addr = grayskull::DYNAMIC_TLB_1M_CFG_ADDR, .index_offset = tlb_index - grayskull::TLB_BASE_INDEX_1M, + .tlb_offset = grayskull::DYNAMIC_TLB_1M_BASE + + (tlb_index - grayskull::TLB_BASE_INDEX_1M) * grayskull::DYNAMIC_TLB_1M_SIZE, .offset = grayskull::TLB_1M_OFFSET, }; } } -std::optional> grayskull_implementation::describe_tlb( - std::int32_t tlb_index) const { - std::uint32_t TLB_COUNT_1M = 156; - std::uint32_t TLB_COUNT_2M = 10; - std::uint32_t TLB_COUNT_16M = 20; - - std::uint32_t TLB_BASE_1M = 0; - std::uint32_t TLB_BASE_2M = TLB_COUNT_1M * (1 << 20); - std::uint32_t TLB_BASE_16M = TLB_BASE_2M + TLB_COUNT_2M * (1 << 21); - - if (tlb_index < 0) { - return std::nullopt; - } - - if (tlb_index >= 0 && tlb_index < TLB_COUNT_1M) { - std::uint32_t size = 1 << 20; - return std::tuple(TLB_BASE_1M + size * tlb_index, size); - } else if (tlb_index >= 0 && tlb_index < TLB_COUNT_1M + TLB_COUNT_2M) { - auto tlb_offset = tlb_index - TLB_COUNT_1M; - auto size = 1 << 21; - return std::tuple(TLB_BASE_2M + tlb_offset * size, size); - } else if (tlb_index >= 0 and tlb_index < TLB_COUNT_1M + TLB_COUNT_2M + TLB_COUNT_16M) { - auto tlb_offset = tlb_index - (TLB_COUNT_1M + TLB_COUNT_2M); - auto size = 1 << 24; - return std::tuple(TLB_BASE_16M + tlb_offset * size, size); - } - - return std::nullopt; -} - std::pair grayskull_implementation::get_tlb_data( std::uint32_t tlb_index, const tlb_data &data) const { if (tlb_index < grayskull::TLB_COUNT_1M) { diff --git a/device/wormhole/wormhole_implementation.cpp b/device/wormhole/wormhole_implementation.cpp index d80a72eb..2e1aadc9 100644 --- a/device/wormhole/wormhole_implementation.cpp +++ b/device/wormhole/wormhole_implementation.cpp @@ -31,6 +31,8 @@ tlb_configuration wormhole_implementation::get_tlb_configuration(uint32_t tlb_in .base = wormhole::DYNAMIC_TLB_16M_BASE, .cfg_addr = wormhole::DYNAMIC_TLB_16M_CFG_ADDR, .index_offset = tlb_index - wormhole::TLB_BASE_INDEX_16M, + .tlb_offset = wormhole::DYNAMIC_TLB_16M_BASE + + (tlb_index - wormhole::TLB_BASE_INDEX_16M) * wormhole::DYNAMIC_TLB_16M_SIZE, .offset = wormhole::TLB_16M_OFFSET, }; } else if (tlb_index >= wormhole::TLB_BASE_INDEX_2M) { @@ -39,6 +41,8 @@ tlb_configuration wormhole_implementation::get_tlb_configuration(uint32_t tlb_in .base = wormhole::DYNAMIC_TLB_2M_BASE, .cfg_addr = wormhole::DYNAMIC_TLB_2M_CFG_ADDR, .index_offset = tlb_index - wormhole::TLB_BASE_INDEX_2M, + .tlb_offset = wormhole::DYNAMIC_TLB_2M_BASE + + (tlb_index - wormhole::TLB_BASE_INDEX_2M) * wormhole::DYNAMIC_TLB_2M_SIZE, .offset = wormhole::TLB_2M_OFFSET, }; } else { @@ -47,40 +51,13 @@ tlb_configuration wormhole_implementation::get_tlb_configuration(uint32_t tlb_in .base = wormhole::DYNAMIC_TLB_1M_BASE, .cfg_addr = wormhole::DYNAMIC_TLB_1M_CFG_ADDR, .index_offset = tlb_index - wormhole::TLB_BASE_INDEX_1M, + .tlb_offset = wormhole::DYNAMIC_TLB_1M_BASE + + (tlb_index - wormhole::TLB_BASE_INDEX_1M) * wormhole::DYNAMIC_TLB_1M_SIZE, .offset = wormhole::TLB_1M_OFFSET, }; } } -std::optional> wormhole_implementation::describe_tlb( - std::int32_t tlb_index) const { - std::uint32_t TLB_COUNT_1M = 156; - std::uint32_t TLB_COUNT_2M = 10; - std::uint32_t TLB_COUNT_16M = 20; - - std::uint32_t TLB_BASE_1M = 0; - std::uint32_t TLB_BASE_2M = TLB_COUNT_1M * (1 << 20); - std::uint32_t TLB_BASE_16M = TLB_BASE_2M + TLB_COUNT_2M * (1 << 21); - if (tlb_index < 0) { - return std::nullopt; - } - - if (tlb_index >= 0 && tlb_index < TLB_COUNT_1M) { - std::uint32_t size = 1 << 20; - return std::tuple(TLB_BASE_1M + size * tlb_index, size); - } else if (tlb_index >= 0 && tlb_index < TLB_COUNT_1M + TLB_COUNT_2M) { - auto tlb_offset = tlb_index - TLB_COUNT_1M; - auto size = 1 << 21; - return std::tuple(TLB_BASE_2M + tlb_offset * size, size); - } else if (tlb_index >= 0 and tlb_index < TLB_COUNT_1M + TLB_COUNT_2M + TLB_COUNT_16M) { - auto tlb_offset = tlb_index - (TLB_COUNT_1M + TLB_COUNT_2M); - auto size = 1 << 24; - return std::tuple(TLB_BASE_16M + tlb_offset * size, size); - } - - return std::nullopt; -} - std::pair wormhole_implementation::get_tlb_data( std::uint32_t tlb_index, const tlb_data &data) const { std::uint32_t TLB_COUNT_1M = 156;