Skip to content

Commit

Permalink
#0: Bugfix - support for remote-only on T3K
Browse files Browse the repository at this point in the history
  • Loading branch information
tt-dma committed Dec 6, 2024
1 parent 1f0f5c9 commit f8bd917
Show file tree
Hide file tree
Showing 2 changed files with 18 additions and 8 deletions.
24 changes: 17 additions & 7 deletions tt_metal/impl/device/device_pool.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -315,14 +315,12 @@ bool DevicePool::is_device_active(chip_id_t id) const {
}

void DevicePool::add_devices_to_pool(const std::vector<chip_id_t>& device_ids) {
populate_fd_kernels(device_ids, this->num_hw_cqs);
std::vector<chip_id_t> devices_to_activate;
if (this->skip_remote_devices) {
for (const auto& device_id : device_ids) {
const auto& mmio_device_id = tt::Cluster::instance().get_associated_mmio_device(device_id);
TT_ASSERT(device_id == mmio_device_id, "Skipping remote devices is only available for mmio devices");
if (not this->is_device_active(device_id)) {
this->activate_device(device_id);
}
devices_to_activate.push_back(device_id);
}
} else {
std::vector<chip_id_t> all_device_ids = {};
Expand All @@ -331,12 +329,24 @@ void DevicePool::add_devices_to_pool(const std::vector<chip_id_t>& device_ids) {
const auto& mmio_device_id = tt::Cluster::instance().get_associated_mmio_device(device_id);
for (const auto& mmio_controlled_device_id :
tt::Cluster::instance().get_devices_controlled_by_mmio_device(mmio_device_id)) {
if (not this->is_device_active(mmio_controlled_device_id)) {
this->activate_device(mmio_controlled_device_id);
}
devices_to_activate.push_back(mmio_controlled_device_id);
}
}
}

std::string ids_str = "";
for (auto id : devices_to_activate) {
ids_str += fmt::format("{}, ", id);
}
log_warning("Device Pool init, ids: {}skip_remote_devices: {}", ids_str, this->skip_remote_devices);
if (llrt::OptionsG.get_use_new_fd_init()) {
populate_fd_kernels(devices_to_activate, this->num_hw_cqs);
}
for (const auto& device_id : devices_to_activate) {
if (not this->is_device_active(device_id)) {
this->activate_device(device_id);
}
}
}

void DevicePool::register_worker_thread_for_device(v1::DeviceHandle device, std::thread::id worker_thread_id) {
Expand Down
2 changes: 1 addition & 1 deletion tt_metal/impl/dispatch/arch.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -144,13 +144,13 @@ void populate_fd_kernels(const std::vector<chip_id_t> &device_ids, uint32_t num_
// Need to determine the submesh of devices that are being used
uint32_t mmio_count = 0;
uint32_t remote_count = 0;
tt::log_warning("T3000, mmio_count={}, remote_count={}", mmio_count, remote_count);
for (auto id : device_ids) {
if (tt::Cluster::instance().get_associated_mmio_device(id) == id)
mmio_count++;
else
remote_count++;
}
tt::log_warning("T3000, mmio_count={}, remote_count={}", mmio_count, remote_count);

// Supported grid either has one remote per mmio or none
TT_ASSERT(mmio_count == remote_count or remote_count == 0, "Unexpected device grid");
Expand Down

0 comments on commit f8bd917

Please sign in to comment.