Skip to content

Commit

Permalink
#0: Fix for number of expected devices in FD setup
Browse files Browse the repository at this point in the history
  • Loading branch information
tt-dma committed Dec 3, 2024
1 parent ed37787 commit 693d8cf
Show file tree
Hide file tree
Showing 5 changed files with 17 additions and 9 deletions.
4 changes: 2 additions & 2 deletions tt_metal/impl/device/device.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -36,8 +36,8 @@ namespace tt {
namespace tt_metal {

Device::Device(
chip_id_t device_id, const uint8_t num_hw_cqs, size_t l1_small_size, size_t trace_region_size, tt::stl::Span<const std::uint32_t> l1_bank_remap, bool minimal, uint32_t worker_core, uint32_t completion_queue_reader_core) :
id_(device_id), worker_thread_core(worker_core), completion_queue_reader_core(completion_queue_reader_core), work_executor(worker_core, device_id) {
chip_id_t device_id, const uint8_t num_hw_cqs, size_t l1_small_size, size_t trace_region_size, tt::stl::Span<const std::uint32_t> l1_bank_remap, bool minimal, uint32_t worker_core, uint32_t completion_queue_reader_core, uint32_t total_devices) :
id_(device_id), worker_thread_core(worker_core), completion_queue_reader_core(completion_queue_reader_core), work_executor(worker_core, device_id), total_devices_(total_devices) {
ZoneScoped;
tunnel_device_dispatch_workers_ = {};
this->initialize(num_hw_cqs, l1_small_size, trace_region_size, l1_bank_remap, minimal);
Expand Down
6 changes: 5 additions & 1 deletion tt_metal/impl/device/device.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -75,7 +75,8 @@ class Device {
tt::stl::Span<const std::uint32_t> l1_bank_remap = {},
bool minimal = false,
uint32_t worker_core = 0,
uint32_t completion_queue_reader_core = 0);
uint32_t completion_queue_reader_core = 0,
uint32_t total_devices = 0);

~Device();

Expand All @@ -90,6 +91,8 @@ class Device {

chip_id_t id() const { return id_; }

uint32_t total_devices() const { return total_devices_; }

uint32_t build_key() const { return build_key_; }

uint8_t num_hw_cqs() const { return num_hw_cqs_; }
Expand Down Expand Up @@ -300,6 +303,7 @@ class Device {

static constexpr MemoryAllocator allocator_scheme_ = MemoryAllocator::L1_BANKING;
chip_id_t id_;
uint32_t total_devices_;
uint32_t build_key_;
// Leaving here for compatibility with current reacharounds
// TODO: Replace with get_initialized_allocator()
Expand Down
9 changes: 5 additions & 4 deletions tt_metal/impl/device/device_pool.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -256,7 +256,7 @@ void DevicePool::initialize_device(v1::DeviceHandle handle) const {
}
}

void DevicePool::activate_device(chip_id_t id) {
void DevicePool::activate_device(chip_id_t id, uint32_t total_devices) {
TT_FATAL(
id < tt::Cluster::instance().number_of_devices(),
"Device index {} out of range. There are {} devices available.",
Expand All @@ -278,7 +278,8 @@ void DevicePool::activate_device(chip_id_t id) {
this->l1_bank_remap,
false,
worker_core_thread_core,
completion_queue_reader_core);
completion_queue_reader_core,
total_devices);
dev->update_dispatch_cores_for_multi_cq_eth_dispatch();
if (!this->firmware_built_keys.contains(dev->build_key())) {
dev->build_firmware();
Expand Down Expand Up @@ -319,7 +320,7 @@ void DevicePool::add_devices_to_pool(const std::vector<chip_id_t>& device_ids) {
const auto& mmio_device_id = tt::Cluster::instance().get_associated_mmio_device(device_id);
TT_ASSERT(device_id == mmio_device_id, "Skipping remote devices is only available for mmio devices");
if (not this->is_device_active(device_id)) {
this->activate_device(device_id);
this->activate_device(device_id, device_ids.size());
}
}
} else {
Expand All @@ -330,7 +331,7 @@ void DevicePool::add_devices_to_pool(const std::vector<chip_id_t>& device_ids) {
for (const auto& mmio_controlled_device_id :
tt::Cluster::instance().get_devices_controlled_by_mmio_device(mmio_device_id)) {
if (not this->is_device_active(mmio_controlled_device_id)) {
this->activate_device(mmio_controlled_device_id);
this->activate_device(mmio_controlled_device_id, device_ids.size());
}
}
}
Expand Down
2 changes: 1 addition & 1 deletion tt_metal/impl/device/device_pool.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,7 @@ class DevicePool {
std::unordered_map<uint32_t, uint32_t> completion_queue_reader_to_cpu_core_map;
void init_firmware_on_active_devices() const;
void init_profiler_devices() const;
void activate_device(chip_id_t id);
void activate_device(chip_id_t id, uint32_t total_devices);
void initialize_device(tt_metal::v1::DeviceHandle dev) const;
void add_devices_to_pool(const std::vector<chip_id_t>& device_ids);
static DevicePool* _inst;
Expand Down
5 changes: 4 additions & 1 deletion tt_metal/impl/dispatch/arch.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -94,7 +94,10 @@ std::vector<FDKernel *> node_id_to_kernel;
// Helper function to get the right struct for dispatch kernels. TODO: replace with reading yaml file later?
inline std::vector<dispatch_kernel_node_t> get_nodes(Device *device) {
std::vector<dispatch_kernel_node_t> nodes;
uint32_t num_devices = tt::Cluster::instance().number_of_user_devices();
uint32_t num_devices = device->total_devices();
if (num_devices == 0)
num_devices = tt::Cluster::instance().number_of_user_devices();

if (num_devices == 1) { // E150, N150
nodes = (device->num_hw_cqs() == 1) ? single_card_arch_1cq : single_card_arch_2cq;
} else if (num_devices == 2) { // N300
Expand Down

0 comments on commit 693d8cf

Please sign in to comment.