From 2c314780523636e9608cc175ca8d1e95b6040597 Mon Sep 17 00:00:00 2001 From: Vincent Tang Date: Tue, 19 Nov 2024 20:49:31 +0000 Subject: [PATCH] Fix simulator setup - HACK for passing BH pcie coordinates during simulation mode - Correct compute grid size for BH simulator --- tt_metal/common/core_descriptor.cpp | 2 +- .../blackhole_simulation_1x2_arch.yaml | 8 ++++---- tt_metal/impl/device/device.cpp | 14 ++++++++++---- 3 files changed, 15 insertions(+), 9 deletions(-) diff --git a/tt_metal/common/core_descriptor.cpp b/tt_metal/common/core_descriptor.cpp index 9531870de86..989f1dd6685 100644 --- a/tt_metal/common/core_descriptor.cpp +++ b/tt_metal/common/core_descriptor.cpp @@ -121,7 +121,7 @@ const core_descriptor_t& get_core_descriptor_config( } dispatch_cores.push_back(coord); } - TT_ASSERT(dispatch_cores.size(), "Dispatch cores size must be positive"); + TT_ASSERT(dispatch_cores.size() || std::getenv("TT_METAL_SIMULATOR_EN"), "Dispatch cores size must be positive"); config_by_num_cqs[num_hw_cqs] = core_descriptor_t{ .compute_grid_size = compute_grid_size, diff --git a/tt_metal/core_descriptors/blackhole_simulation_1x2_arch.yaml b/tt_metal/core_descriptors/blackhole_simulation_1x2_arch.yaml index 7bb6564f58f..e81d0cd57c7 100644 --- a/tt_metal/core_descriptors/blackhole_simulation_1x2_arch.yaml +++ b/tt_metal/core_descriptors/blackhole_simulation_1x2_arch.yaml @@ -9,8 +9,8 @@ blackhole: col: 1: compute_with_storage_grid_range: # Logical only start and end [x, y] - start: [0, 1] - end: [1, 1] + start: [0, 0] + end: [1, 0] storage_cores: # Relative to grid of tensix cores [] @@ -22,8 +22,8 @@ blackhole: "tensix" 2: compute_with_storage_grid_range: # Logical only start and end [x, y] - start: [0, 1] - end: [1, 1] + start: [0, 0] + end: [1, 0] storage_cores: # Relative to grid of tensix cores [] diff --git a/tt_metal/impl/device/device.cpp b/tt_metal/impl/device/device.cpp index 46ad43ec3b3..d980a224c05 100644 --- a/tt_metal/impl/device/device.cpp +++ b/tt_metal/impl/device/device.cpp @@ -321,10 +321,16 @@ void Device::initialize_device_kernel_defines() const metal_SocDescriptor& soc_d = tt::Cluster::instance().get_soc_desc(this->id()); auto pcie_cores = soc_d.get_pcie_cores(); auto grid_size = this->grid_size(); - this->device_kernel_defines_.emplace("PCIE_NOC_X", std::to_string(pcie_cores[0].x)); - this->device_kernel_defines_.emplace("PCIE_NOC_Y", std::to_string(pcie_cores[0].y)); - this->device_kernel_defines_.emplace("PCIE_NOC1_X", std::to_string(tt::tt_metal::hal.noc_coordinate(NOC::NOC_1, grid_size.x, pcie_cores[0].x))); - this->device_kernel_defines_.emplace("PCIE_NOC1_Y", std::to_string(tt::tt_metal::hal.noc_coordinate(NOC::NOC_1, grid_size.x, pcie_cores[0].y))); + + // Workaround for Simulator integration as they use a 2x2 grid which would underflow PCIE_NOC1* + CoreCoord pcie_core = pcie_cores.empty() ? grid_size : pcie_cores[0]; + auto pcie_noc1_x = pcie_cores.empty() ? 14 : tt::tt_metal::hal.noc_coordinate(NOC::NOC_1, grid_size.x, pcie_cores[0].x); + auto pcie_noc1_y = pcie_cores.empty() ? 11 : tt::tt_metal::hal.noc_coordinate(NOC::NOC_1, grid_size.x, pcie_cores[0].y); + + this->device_kernel_defines_.emplace("PCIE_NOC_X", std::to_string(pcie_core.x)); + this->device_kernel_defines_.emplace("PCIE_NOC_Y", std::to_string(pcie_core.y)); + this->device_kernel_defines_.emplace("PCIE_NOC1_X", std::to_string(pcie_noc1_x)); + this->device_kernel_defines_.emplace("PCIE_NOC1_Y", std::to_string(pcie_noc1_x)); } void Device::initialize_build() {