diff --git a/tt_metal/impl/device/device_pool.cpp b/tt_metal/impl/device/device_pool.cpp index fe4c53757c6..6a3c9222d20 100644 --- a/tt_metal/impl/device/device_pool.cpp +++ b/tt_metal/impl/device/device_pool.cpp @@ -206,6 +206,7 @@ void DevicePool::initialize( // Never skip for TG Cluster bool skip = not tt::Cluster::instance().is_galaxy_cluster(); + std::vector target_mmio_ids; for (const auto& device_id : device_ids) { TT_FATAL( device_id < tt::Cluster::instance().number_of_devices(), @@ -213,13 +214,23 @@ void DevicePool::initialize( device_id, tt::Cluster::instance().number_of_devices()); const auto& mmio_device_id = tt::Cluster::instance().get_associated_mmio_device(device_id); + if (std::find(target_mmio_ids.begin(), target_mmio_ids.end(), mmio_device_id) == target_mmio_ids.end()) { + target_mmio_ids.push_back(mmio_device_id); + } skip &= (device_id == mmio_device_id); } + if (target_mmio_ids.size() != tt::Cluster::instance().number_of_pci_devices()) { + log_warning( + tt::LogMetal, + "Opening subset of mmio devices slows down UMD read/write to remote chips. If opening more devices, " + "consider using CreateDevices API."); + } + _inst->skip_remote_devices = skip; _inst->add_devices_to_pool(device_ids); _inst->init_firmware_on_active_devices(); - tt::Cluster::instance().set_internal_routing_info_for_ethernet_cores(true); + tt::Cluster::instance().set_internal_routing_info_for_ethernet_cores(true, target_mmio_ids); _inst->init_profiler_devices(); } diff --git a/tt_metal/llrt/tt_cluster.cpp b/tt_metal/llrt/tt_cluster.cpp index 67e7e6e33ad..28e70c2937e 100644 --- a/tt_metal/llrt/tt_cluster.cpp +++ b/tt_metal/llrt/tt_cluster.cpp @@ -973,18 +973,21 @@ std::tuple Cluster::get_eth_tunnel_core( } // TODO: ALLAN Can change to write one bit -void Cluster::set_internal_routing_info_for_ethernet_cores(bool enable_internal_routing) const { +void Cluster::set_internal_routing_info_for_ethernet_cores(bool enable_internal_routing, const std::vector &target_mmio_devices) const { log_debug(tt::LogDevice, "Set internal routing bit {}", enable_internal_routing); const uint32_t routing_info_addr = eth_l1_mem::address_map::ERISC_APP_ROUTING_INFO_BASE; // TODO: initialize devices if user does not // Must initialize remote chips first, then mmio chips since once mmio chips are doing fd routing // we do not always context switch to base FW - std::vector mmio_devices; - mmio_devices.reserve(this->devices_grouped_by_assoc_mmio_device_.size()); std::vector non_mmio_devices; - for (const auto &[assoc_mmio_device, devices] : this->devices_grouped_by_assoc_mmio_device_) { - mmio_devices.emplace_back(assoc_mmio_device); - for (const auto &chip_id : devices) { + std::vector mmio_devices = target_mmio_devices; + if (mmio_devices.size() == 0) { + for (const auto &[assoc_mmio_device, devices] : this->devices_grouped_by_assoc_mmio_device_) { + mmio_devices.emplace_back(assoc_mmio_device); + } + } + for (const auto &mmio_chip_id : mmio_devices) { + for (const auto &chip_id : this->devices_grouped_by_assoc_mmio_device_.at(mmio_chip_id)) { non_mmio_devices.emplace_back(chip_id); } } diff --git a/tt_metal/llrt/tt_cluster.hpp b/tt_metal/llrt/tt_cluster.hpp index 629f23ddd23..1a4e3e6293a 100644 --- a/tt_metal/llrt/tt_cluster.hpp +++ b/tt_metal/llrt/tt_cluster.hpp @@ -182,7 +182,7 @@ class Cluster { // set_internal_routing_info_for_ethernet_cores(false); // CloseDevice(0) // CloseDevice(1) - void set_internal_routing_info_for_ethernet_cores(bool enable_internal_routing) const; + void set_internal_routing_info_for_ethernet_cores(bool enable_internal_routing, const std::vector& target_mmio_devices = {}) const; // Returns MMIO device ID (logical) that controls given `device_id`. If `device_id` is MMIO device it is returned. chip_id_t get_associated_mmio_device(chip_id_t device_id) const {