From f8bd917609db6bcc435c503a5f69d63503d7d843 Mon Sep 17 00:00:00 2001 From: David Ma Date: Fri, 6 Dec 2024 01:02:23 +0000 Subject: [PATCH] #0: Bugfix - support for remote-only on T3K --- tt_metal/impl/device/device_pool.cpp | 24 +++++++++++++++++------- tt_metal/impl/dispatch/arch.cpp | 2 +- 2 files changed, 18 insertions(+), 8 deletions(-) diff --git a/tt_metal/impl/device/device_pool.cpp b/tt_metal/impl/device/device_pool.cpp index d754b45b3ba2..d0a3f84910fb 100644 --- a/tt_metal/impl/device/device_pool.cpp +++ b/tt_metal/impl/device/device_pool.cpp @@ -315,14 +315,12 @@ bool DevicePool::is_device_active(chip_id_t id) const { } void DevicePool::add_devices_to_pool(const std::vector& device_ids) { - populate_fd_kernels(device_ids, this->num_hw_cqs); + std::vector devices_to_activate; if (this->skip_remote_devices) { for (const auto& device_id : device_ids) { const auto& mmio_device_id = tt::Cluster::instance().get_associated_mmio_device(device_id); TT_ASSERT(device_id == mmio_device_id, "Skipping remote devices is only available for mmio devices"); - if (not this->is_device_active(device_id)) { - this->activate_device(device_id); - } + devices_to_activate.push_back(device_id); } } else { std::vector all_device_ids = {}; @@ -331,12 +329,24 @@ void DevicePool::add_devices_to_pool(const std::vector& device_ids) { const auto& mmio_device_id = tt::Cluster::instance().get_associated_mmio_device(device_id); for (const auto& mmio_controlled_device_id : tt::Cluster::instance().get_devices_controlled_by_mmio_device(mmio_device_id)) { - if (not this->is_device_active(mmio_controlled_device_id)) { - this->activate_device(mmio_controlled_device_id); - } + devices_to_activate.push_back(mmio_controlled_device_id); } } } + + std::string ids_str = ""; + for (auto id : devices_to_activate) { + ids_str += fmt::format("{}, ", id); + } + log_warning("Device Pool init, ids: {}skip_remote_devices: {}", ids_str, this->skip_remote_devices); + if (llrt::OptionsG.get_use_new_fd_init()) { + populate_fd_kernels(devices_to_activate, this->num_hw_cqs); + } + for (const auto& device_id : devices_to_activate) { + if (not this->is_device_active(device_id)) { + this->activate_device(device_id); + } + } } void DevicePool::register_worker_thread_for_device(v1::DeviceHandle device, std::thread::id worker_thread_id) { diff --git a/tt_metal/impl/dispatch/arch.cpp b/tt_metal/impl/dispatch/arch.cpp index 4a49d909397f..7d370af8a078 100644 --- a/tt_metal/impl/dispatch/arch.cpp +++ b/tt_metal/impl/dispatch/arch.cpp @@ -144,13 +144,13 @@ void populate_fd_kernels(const std::vector &device_ids, uint32_t num_ // Need to determine the submesh of devices that are being used uint32_t mmio_count = 0; uint32_t remote_count = 0; - tt::log_warning("T3000, mmio_count={}, remote_count={}", mmio_count, remote_count); for (auto id : device_ids) { if (tt::Cluster::instance().get_associated_mmio_device(id) == id) mmio_count++; else remote_count++; } + tt::log_warning("T3000, mmio_count={}, remote_count={}", mmio_count, remote_count); // Supported grid either has one remote per mmio or none TT_ASSERT(mmio_count == remote_count or remote_count == 0, "Unexpected device grid");