From 883edcd1c391174e6eefe29271b8f9b07e00af81 Mon Sep 17 00:00:00 2001 From: pjanevski Date: Thu, 5 Dec 2024 11:15:29 +0000 Subject: [PATCH] Implement Soc descriptor CoreCoord API --- .github/workflows/build-tests.yml | 4 +- .../umd/device/blackhole_coordinate_manager.h | 9 + device/api/umd/device/coordinate_manager.h | 40 ++- device/api/umd/device/tt_soc_descriptor.h | 96 ++++--- .../blackhole_coordinate_manager.cpp | 95 +++++++ device/coordinate_manager.cpp | 132 ++++++++++ device/tt_soc_descriptor.cpp | 101 +++++++- tests/api/CMakeLists.txt | 1 + tests/api/test_soc_descriptor.cpp | 241 ++++++++++++++++++ tests/soc_descs/blackhole_simulation_1x2.yaml | 55 ++++ 10 files changed, 719 insertions(+), 55 deletions(-) create mode 100644 tests/api/test_soc_descriptor.cpp create mode 100644 tests/soc_descs/blackhole_simulation_1x2.yaml diff --git a/.github/workflows/build-tests.yml b/.github/workflows/build-tests.yml index 3916e4bf..bff1d0af 100644 --- a/.github/workflows/build-tests.yml +++ b/.github/workflows/build-tests.yml @@ -34,6 +34,7 @@ env: DEPS_OUTPUT_DIR: ./build/_deps TEST_OUTPUT_DIR: ./build/test CLUSTER_DESCRIPTORS_DIR: ./tests/api/cluster_descriptor_examples + SOC_DESCRIPTORS_DIR: ./tests/soc_descs jobs: build: @@ -77,7 +78,8 @@ jobs: tar cvf artifact.tar ${{ env.TEST_OUTPUT_DIR }} \ ${{ env.LIB_OUTPUT_DIR }} \ ${{ env.DEPS_OUTPUT_DIR }} \ - ${{ env.CLUSTER_DESCRIPTORS_DIR }} + ${{ env.CLUSTER_DESCRIPTORS_DIR }} \ + ${{ env.SOC_DESCRIPTORS_DIR }} - name: Upload build artifacts archive uses: actions/upload-artifact@v4 diff --git a/device/api/umd/device/blackhole_coordinate_manager.h b/device/api/umd/device/blackhole_coordinate_manager.h index 6d5acdd3..1e4dae63 100644 --- a/device/api/umd/device/blackhole_coordinate_manager.h +++ b/device/api/umd/device/blackhole_coordinate_manager.h @@ -34,6 +34,15 @@ class BlackholeCoordinateManager : public CoordinateManager { void fill_pcie_physical_translated_mapping() override; void fill_dram_physical_translated_mapping() override; + std::vector get_tensix_cores() const override; + std::vector get_harvested_tensix_cores() const override; + std::vector get_dram_cores() const override; + std::vector get_harvested_dram_cores() const override; + tt_xy_pair get_tensix_grid_size() const override; + tt_xy_pair get_dram_grid_size() const override; + tt_xy_pair get_harvested_tensix_grid_size() const override; + tt_xy_pair get_harvested_dram_grid_size() const override; + private: void map_column_of_dram_banks(const size_t start_bank, const size_t end_bank, const size_t x_coord); }; diff --git a/device/api/umd/device/coordinate_manager.h b/device/api/umd/device/coordinate_manager.h index f609cb94..66033692 100644 --- a/device/api/umd/device/coordinate_manager.h +++ b/device/api/umd/device/coordinate_manager.h @@ -42,6 +42,12 @@ class CoordinateManager { tt::umd::CoreCoord to(const tt::umd::CoreCoord core_coord, const CoordSystem coord_system); + std::vector get_cores(const CoreType core_type) const; + tt_xy_pair get_grid_size(const CoreType core_type) const; + + std::vector get_harvested_cores(const CoreType core_type) const; + tt_xy_pair get_harvested_grid_size(const CoreType core_type) const; + virtual ~CoordinateManager() = default; size_t get_tensix_harvesting_mask() const; @@ -52,6 +58,9 @@ class CoordinateManager { static void assert_create_coordinate_manager( const tt::ARCH arch, const size_t tensix_harvesting_mask, const size_t dram_harvesting_mask); + const std::vector& get_physical_pairs(const CoreType core_type) const; + std::vector get_all_physical_cores(const CoreType core_type) const; + protected: /* * Constructor for Coordinate Manager. @@ -87,6 +96,15 @@ class CoordinateManager { void identity_map_physical_cores(); void add_core_translation(const tt::umd::CoreCoord& core_coord, const tt_xy_pair& physical_pair); + virtual std::vector get_tensix_cores() const; + virtual std::vector get_harvested_tensix_cores() const; + virtual std::vector get_dram_cores() const; + virtual std::vector get_harvested_dram_cores() const; + virtual tt_xy_pair get_tensix_grid_size() const; + virtual tt_xy_pair get_dram_grid_size() const; + virtual tt_xy_pair get_harvested_tensix_grid_size() const; + virtual tt_xy_pair get_harvested_dram_grid_size() const; + /* * Fills the logical to translated mapping for the tensix cores. * By default, translated coordinates are the same as physical coordinates. @@ -134,23 +152,21 @@ class CoordinateManager { std::map to_physical_map; std::map, tt::umd::CoreCoord> from_physical_map; - const tt_xy_pair tensix_grid_size; - const std::vector& tensix_cores; + tt_xy_pair tensix_grid_size; + const std::vector tensix_cores; size_t tensix_harvesting_mask; const size_t physical_layout_tensix_harvesting_mask; - const tt_xy_pair dram_grid_size; - const std::vector& dram_cores; + tt_xy_pair dram_grid_size; + const std::vector dram_cores; size_t dram_harvesting_mask; - const tt_xy_pair eth_grid_size; - const std::vector& eth_cores; + tt_xy_pair eth_grid_size; + const std::vector eth_cores; - const tt_xy_pair arc_grid_size; - const std::vector& arc_cores; + tt_xy_pair arc_grid_size; + const std::vector arc_cores; - const tt_xy_pair pcie_grid_size; - const std::vector& pcie_cores; + tt_xy_pair pcie_grid_size; + const std::vector pcie_cores; }; - -// friend diff --git a/device/api/umd/device/tt_soc_descriptor.h b/device/api/umd/device/tt_soc_descriptor.h index ad338ea4..1669ff5a 100644 --- a/device/api/umd/device/tt_soc_descriptor.h +++ b/device/api/umd/device/tt_soc_descriptor.h @@ -46,42 +46,6 @@ struct CoreDescriptor { */ class tt_SocDescriptor { public: - tt::ARCH arch; - tt_xy_pair grid_size; - tt_xy_pair physical_grid_size; - tt_xy_pair worker_grid_size; - std::unordered_map cores; - std::vector arc_cores; - std::vector workers; - std::vector harvested_workers; - std::vector pcie_cores; - std::unordered_map worker_log_to_routing_x; - std::unordered_map worker_log_to_routing_y; - std::unordered_map routing_x_to_worker_x; - std::unordered_map routing_y_to_worker_y; - std::vector> dram_cores; // per channel list of dram cores - std::unordered_map> dram_core_channel_map; // map dram core to chan/subchan - std::vector ethernet_cores; // ethernet cores (index == channel id) - std::unordered_map ethernet_core_channel_map; - std::vector trisc_sizes; // Most of software stack assumes same trisc size for whole chip.. - std::string device_descriptor_file_path = std::string(""); - - bool has(tt_xy_pair input) { return cores.find(input) != cores.end(); } - - int overlay_version; - int unpacker_version; - int dst_size_alignment; - int packer_version; - int worker_l1_size; - int eth_l1_size; - bool noc_translation_id_enabled; - uint64_t dram_bank_size; - - int get_num_dram_channels() const; - bool is_worker_core(const tt_xy_pair &core) const; - tt_xy_pair get_core_for_dram_channel(int dram_chan, int subchannel) const; - bool is_ethernet_core(const tt_xy_pair &core) const; - // Default constructor. Creates uninitialized object with public access to all of its attributes. tt_SocDescriptor() = default; // Constructor used to build object from device descriptor file. @@ -119,22 +83,78 @@ class tt_SocDescriptor { eth_l1_size(other.eth_l1_size), noc_translation_id_enabled(other.noc_translation_id_enabled), dram_bank_size(other.dram_bank_size), - coordinate_manager(other.coordinate_manager) {} + coordinate_manager(other.coordinate_manager), + cores_map(other.cores_map), + grid_size_map(other.grid_size_map), + harvested_cores_map(other.harvested_cores_map), + harvested_grid_size_map(other.harvested_grid_size_map) {} // CoreCoord conversions. - tt::umd::CoreCoord to(const tt::umd::CoreCoord core_coord, const CoordSystem coord_system); + tt::umd::CoreCoord to(const tt::umd::CoreCoord core_coord, const CoordSystem coord_system) const; static std::string get_soc_descriptor_path(tt::ARCH arch); + std::vector get_cores(const CoreType core_type) const; + std::vector get_harvested_cores(const CoreType core_type) const; + tt_xy_pair get_grid_size(const CoreType core_type) const; + tt_xy_pair get_harvested_grid_size(const CoreType core_type) const; + + int get_num_dram_channels() const; + + bool is_worker_core(const tt_xy_pair &core) const; + + tt_xy_pair get_core_for_dram_channel(int dram_chan, int subchannel) const; + + tt::umd::CoreCoord get_dram_core_for_channel(int dram_chan, int subchannel) const; + tt::umd::CoreCoord get_dram_core(uint32_t dram_chan, uint32_t subchannel) const; + + bool is_ethernet_core(const tt_xy_pair &core) const; + + tt::ARCH arch; + tt_xy_pair grid_size; + tt_xy_pair physical_grid_size; + tt_xy_pair worker_grid_size; + std::unordered_map cores; + std::vector arc_cores; + std::vector workers; + std::vector harvested_workers; + std::vector pcie_cores; + std::unordered_map worker_log_to_routing_x; + std::unordered_map worker_log_to_routing_y; + std::unordered_map routing_x_to_worker_x; + std::unordered_map routing_y_to_worker_y; + std::vector> dram_cores; // per channel list of dram cores + std::unordered_map> dram_core_channel_map; // map dram core to chan/subchan + std::vector ethernet_cores; // ethernet cores (index == channel id) + std::unordered_map ethernet_core_channel_map; + std::vector trisc_sizes; // Most of software stack assumes same trisc size for whole chip.. + std::string device_descriptor_file_path = std::string(""); + + int overlay_version; + int unpacker_version; + int dst_size_alignment; + int packer_version; + int worker_l1_size; + int eth_l1_size; + bool noc_translation_id_enabled; + uint64_t dram_bank_size; + private: void create_coordinate_manager(const std::size_t tensix_harvesting_mask, const std::size_t dram_harvesting_mask); void load_core_descriptors_from_device_descriptor(YAML::Node &device_descriptor_yaml); void load_soc_features_from_device_descriptor(YAML::Node &device_descriptor_yaml); + void get_cores_and_grid_size_from_coordinate_manager(); + + static tt_xy_pair calculate_grid_size(const std::vector &cores); // TODO: change this to unique pointer as soon as copying of tt_SocDescriptor // is not needed anymore. Soc descriptor and coordinate manager should be // created once per chip. std::shared_ptr coordinate_manager = nullptr; + std::map> cores_map; + std::map grid_size_map; + std::map> harvested_cores_map; + std::map harvested_grid_size_map; }; // Allocates a new soc descriptor on the heap. Returns an owning pointer. diff --git a/device/blackhole/blackhole_coordinate_manager.cpp b/device/blackhole/blackhole_coordinate_manager.cpp index 7a6f605a..2ed09be6 100644 --- a/device/blackhole/blackhole_coordinate_manager.cpp +++ b/device/blackhole/blackhole_coordinate_manager.cpp @@ -180,6 +180,25 @@ void BlackholeCoordinateManager::map_column_of_dram_banks( } void BlackholeCoordinateManager::fill_dram_physical_translated_mapping() { + if (dram_grid_size.x < blackhole::NUM_DRAM_BANKS) { + // If the number of DRAM banks is less than num dram banks for standard SOC for Blackhole, + // map the translated DRAM cores to be the same as physical DRAM cores. + // TODO: Figure out how DRAM is going to be mapped to translated coordinates when there is less DRAM banks. + for (size_t x = 0; x < dram_grid_size.x; x++) { + for (size_t y = 0; y < dram_grid_size.y; y++) { + const CoreCoord logical_dram_core = CoreCoord(x, y, CoreType::DRAM, CoordSystem::LOGICAL); + const tt_xy_pair physical_dram_core = to_physical_map[logical_dram_core]; + + CoreCoord translated_dram_core = + CoreCoord(physical_dram_core.x, physical_dram_core.y, CoreType::DRAM, CoordSystem::TRANSLATED); + to_physical_map[translated_dram_core] = physical_dram_core; + from_physical_map[{{physical_dram_core.x, physical_dram_core.y}, CoordSystem::TRANSLATED}] = + translated_dram_core; + } + } + return; + } + const std::vector harvested_banks = CoordinateManager::get_harvested_indices(dram_harvesting_mask); if (harvested_banks.empty()) { @@ -234,3 +253,79 @@ void BlackholeCoordinateManager::fill_dram_physical_translated_mapping() { add_core_translation(translated_coord, physical_core); } } + +std::vector BlackholeCoordinateManager::get_tensix_cores() const { + std::vector harvested_x_coords = get_harvested_indices(tensix_harvesting_mask); + std::vector unharvested_tensix_cores; + for (size_t y = 0; y < tensix_grid_size.y; y++) { + for (size_t x = 0; x < tensix_grid_size.x; x++) { + const tt_xy_pair core = tensix_cores[y * tensix_grid_size.x + x]; + CoreCoord core_coord(core.x, core.y, CoreType::TENSIX, CoordSystem::PHYSICAL); + if (std::find(harvested_x_coords.begin(), harvested_x_coords.end(), x) == harvested_x_coords.end()) { + unharvested_tensix_cores.push_back(core_coord); + } + } + } + return unharvested_tensix_cores; +} + +std::vector BlackholeCoordinateManager::get_harvested_tensix_cores() const { + std::vector harvested_x_coords = get_harvested_indices(tensix_harvesting_mask); + std::vector harvested_tensix_cores; + for (size_t y = 0; y < tensix_grid_size.y; y++) { + for (size_t x = 0; x < tensix_grid_size.x; x++) { + const tt_xy_pair core = tensix_cores[y * tensix_grid_size.x + x]; + CoreCoord core_coord(core.x, core.y, CoreType::TENSIX, CoordSystem::PHYSICAL); + if (std::find(harvested_x_coords.begin(), harvested_x_coords.end(), x) != harvested_x_coords.end()) { + harvested_tensix_cores.push_back(core_coord); + } + } + } + return harvested_tensix_cores; +} + +std::vector BlackholeCoordinateManager::get_dram_cores() const { + std::vector harvested_banks = get_harvested_indices(dram_harvesting_mask); + std::vector unharvested_dram_cores; + for (size_t x = 0; x < dram_grid_size.x; x++) { + if (std::find(harvested_banks.begin(), harvested_banks.end(), x) == harvested_banks.end()) { + for (size_t y = 0; y < dram_grid_size.y; y++) { + const tt_xy_pair core = dram_cores[x * dram_grid_size.y + y]; + CoreCoord core_coord(core.x, core.y, CoreType::DRAM, CoordSystem::PHYSICAL); + unharvested_dram_cores.push_back(core_coord); + } + } + } + return unharvested_dram_cores; +} + +std::vector BlackholeCoordinateManager::get_harvested_dram_cores() const { + std::vector harvested_banks = get_harvested_indices(dram_harvesting_mask); + std::vector harvested_dram_cores; + for (size_t x = 0; x < dram_grid_size.x; x++) { + if (std::find(harvested_banks.begin(), harvested_banks.end(), x) != harvested_banks.end()) { + for (size_t y = 0; y < dram_grid_size.y; y++) { + const tt_xy_pair core = dram_cores[x * dram_grid_size.y + y]; + CoreCoord core_coord(core.x, core.y, CoreType::DRAM, CoordSystem::PHYSICAL); + harvested_dram_cores.push_back(core_coord); + } + } + } + return harvested_dram_cores; +} + +tt_xy_pair BlackholeCoordinateManager::get_harvested_tensix_grid_size() const { + return {CoordinateManager::get_num_harvested(tensix_harvesting_mask), tensix_grid_size.y}; +} + +tt_xy_pair BlackholeCoordinateManager::get_harvested_dram_grid_size() const { + return {CoordinateManager::get_num_harvested(dram_harvesting_mask), dram_grid_size.y}; +} + +tt_xy_pair BlackholeCoordinateManager::get_tensix_grid_size() const { + return {tensix_grid_size.x - CoordinateManager::get_num_harvested(tensix_harvesting_mask), tensix_grid_size.y}; +} + +tt_xy_pair BlackholeCoordinateManager::get_dram_grid_size() const { + return {dram_grid_size.x - CoordinateManager::get_num_harvested(dram_harvesting_mask), dram_grid_size.y}; +} diff --git a/device/coordinate_manager.cpp b/device/coordinate_manager.cpp index 84ce2a22..88e08693 100644 --- a/device/coordinate_manager.cpp +++ b/device/coordinate_manager.cpp @@ -5,6 +5,7 @@ */ #include "umd/device/coordinate_manager.h" +#include "api/umd/device/tt_core_coordinates.h" #include "logger.hpp" #include "umd/device/blackhole_coordinate_manager.h" #include "umd/device/grayskull_coordinate_manager.h" @@ -312,6 +313,137 @@ void CoordinateManager::shuffle_tensix_harvesting_mask(const std::vector& CoordinateManager::get_physical_pairs(const CoreType core_type) const { + switch (core_type) { + case CoreType::TENSIX: + return tensix_cores; + case CoreType::DRAM: + return dram_cores; + case CoreType::ETH: + return eth_cores; + case CoreType::ARC: + return arc_cores; + case CoreType::PCIE: + return pcie_cores; + default: + throw std::runtime_error("Core type is not supported for getting physical pairs"); + } +} + +std::vector CoordinateManager::get_all_physical_cores(const CoreType core_type) const { + const std::vector& physical_pairs = get_physical_pairs(core_type); + std::vector physical_cores; + for (const tt_xy_pair& core : physical_pairs) { + CoreCoord core_coord(core.x, core.y, core_type, CoordSystem::PHYSICAL); + physical_cores.push_back(core_coord); + } + return physical_cores; +} + +std::vector CoordinateManager::get_tensix_cores() const { + std::vector harvested_y_coords = get_harvested_indices(tensix_harvesting_mask); + std::vector unharvested_tensix_cores; + for (size_t y = 0; y < tensix_grid_size.y; y++) { + if (std::find(harvested_y_coords.begin(), harvested_y_coords.end(), y) == harvested_y_coords.end()) { + for (size_t x = 0; x < tensix_grid_size.x; x++) { + const tt_xy_pair core = tensix_cores[y * tensix_grid_size.x + x]; + CoreCoord core_coord(core.x, core.y, CoreType::TENSIX, CoordSystem::PHYSICAL); + + unharvested_tensix_cores.push_back(core_coord); + } + } + } + return unharvested_tensix_cores; +} + +std::vector CoordinateManager::get_harvested_tensix_cores() const { + std::vector harvested_y_coords = get_harvested_indices(tensix_harvesting_mask); + std::vector harvested_tensix_cores; + for (size_t y = 0; y < tensix_grid_size.y; y++) { + if (std::find(harvested_y_coords.begin(), harvested_y_coords.end(), y) != harvested_y_coords.end()) { + for (size_t x = 0; x < tensix_grid_size.x; x++) { + const tt_xy_pair core = tensix_cores[y * tensix_grid_size.x + x]; + CoreCoord core_coord(core.x, core.y, CoreType::TENSIX, CoordSystem::PHYSICAL); + + harvested_tensix_cores.push_back(core_coord); + } + } + } + return harvested_tensix_cores; +} + +std::vector CoordinateManager::get_dram_cores() const { return get_all_physical_cores(CoreType::DRAM); } + +std::vector CoordinateManager::get_harvested_dram_cores() const { return {}; } + +std::vector CoordinateManager::get_cores(const CoreType core_type) const { + switch (core_type) { + case CoreType::TENSIX: + return get_tensix_cores(); + case CoreType::DRAM: + return get_dram_cores(); + case CoreType::ETH: + return get_all_physical_cores(CoreType::ETH); + case CoreType::ARC: + return get_all_physical_cores(CoreType::ARC); + case CoreType::PCIE: + return get_all_physical_cores(CoreType::PCIE); + default: + throw std::runtime_error("Core type is not supported for getting cores"); + } +} + +tt_xy_pair CoordinateManager::get_tensix_grid_size() const { + return {tensix_grid_size.x, tensix_grid_size.y - CoordinateManager::get_num_harvested(tensix_harvesting_mask)}; +} + +tt_xy_pair CoordinateManager::get_dram_grid_size() const { return dram_grid_size; } + +tt_xy_pair CoordinateManager::get_grid_size(const CoreType core_type) const { + switch (core_type) { + case CoreType::TENSIX: + return get_tensix_grid_size(); + case CoreType::DRAM: + return get_dram_grid_size(); + case CoreType::ETH: + return eth_grid_size; + case CoreType::ARC: + return arc_grid_size; + case CoreType::PCIE: + return pcie_grid_size; + default: + throw std::runtime_error("Core type is not supported for getting grid size"); + } +} + +std::vector CoordinateManager::get_harvested_cores(const CoreType core_type) const { + switch (core_type) { + case CoreType::TENSIX: + return get_harvested_tensix_cores(); + case CoreType::DRAM: + return get_harvested_dram_cores(); + default: + throw std::runtime_error("Core type is not supported for getting harvested cores"); + } +} + +tt_xy_pair CoordinateManager::get_harvested_tensix_grid_size() const { + return {tensix_grid_size.x, CoordinateManager::get_num_harvested(tensix_harvesting_mask)}; +} + +tt_xy_pair CoordinateManager::get_harvested_dram_grid_size() const { return {0, 0}; } + +tt_xy_pair CoordinateManager::get_harvested_grid_size(const CoreType core_type) const { + switch (core_type) { + case CoreType::TENSIX: + return get_harvested_tensix_grid_size(); + case CoreType::DRAM: + return get_harvested_dram_grid_size(); + default: + throw std::runtime_error("Core type is not supported for getting harvested grid size"); + } +} + std::shared_ptr CoordinateManager::create_coordinate_manager( tt::ARCH arch, const size_t tensix_harvesting_mask, const size_t dram_harvesting_mask) { assert_create_coordinate_manager(arch, tensix_harvesting_mask, dram_harvesting_mask); diff --git a/device/tt_soc_descriptor.cpp b/device/tt_soc_descriptor.cpp index 92092a1d..a3b8ed30 100644 --- a/device/tt_soc_descriptor.cpp +++ b/device/tt_soc_descriptor.cpp @@ -12,12 +12,15 @@ #include #include +#include "api/umd/device/tt_soc_descriptor.h" #include "fmt/core.h" #include "utils.hpp" #include "yaml-cpp/yaml.h" // #include "l1_address_map.h" +using namespace tt::umd; + std::string format_node(tt_xy_pair xy) { return fmt::format("{}-{}", xy.x, xy.y); } tt_xy_pair format_node(std::string str) { @@ -168,13 +171,47 @@ void tt_SocDescriptor::load_core_descriptors_from_device_descriptor(YAML::Node & } } +tt_xy_pair tt_SocDescriptor::calculate_grid_size(const std::vector &cores) { + std::unordered_set x; + std::unordered_set y; + for (auto core : cores) { + x.insert(core.x); + y.insert(core.y); + } + return {x.size(), y.size()}; +} + void tt_SocDescriptor::create_coordinate_manager( const std::size_t tensix_harvesting_mask, const std::size_t dram_harvesting_mask) { - coordinate_manager = - CoordinateManager::create_coordinate_manager(arch, tensix_harvesting_mask, dram_harvesting_mask); + const tt_xy_pair dram_grid_size = tt_xy_pair(dram_cores.size(), dram_cores.empty() ? 0 : dram_cores[0].size()); + const tt_xy_pair arc_grid_size = tt_SocDescriptor::calculate_grid_size(arc_cores); + const tt_xy_pair pcie_grid_size = tt_SocDescriptor::calculate_grid_size(pcie_cores); + const tt_xy_pair eth_grid_size = tt_SocDescriptor::calculate_grid_size(ethernet_cores); + + std::vector dram_cores_unpacked; + for (const auto &vec : dram_cores) { + for (const auto &core : vec) { + dram_cores_unpacked.push_back(core); + } + } + coordinate_manager = CoordinateManager::create_coordinate_manager( + arch, + worker_grid_size, + workers, + tensix_harvesting_mask, + dram_grid_size, + dram_cores_unpacked, + dram_harvesting_mask, + eth_grid_size, + ethernet_cores, + arc_grid_size, + arc_cores, + pcie_grid_size, + pcie_cores); + get_cores_and_grid_size_from_coordinate_manager(); } -tt::umd::CoreCoord tt_SocDescriptor::to(const tt::umd::CoreCoord core_coord, const CoordSystem coord_system) { +tt::umd::CoreCoord tt_SocDescriptor::to(const tt::umd::CoreCoord core_coord, const CoordSystem coord_system) const { return coordinate_manager->to(core_coord, coord_system); } @@ -228,7 +265,12 @@ bool tt_SocDescriptor::is_worker_core(const tt_xy_pair &core) const { tt_xy_pair tt_SocDescriptor::get_core_for_dram_channel(int dram_chan, int subchannel) const { return this->dram_cores.at(dram_chan).at(subchannel); -}; +} + +CoreCoord tt_SocDescriptor::get_dram_core_for_channel(int dram_chan, int subchannel) const { + const CoreCoord logical_dram_coord = CoreCoord(dram_chan, subchannel, CoreType::DRAM, CoordSystem::LOGICAL); + return to(logical_dram_coord, CoordSystem::PHYSICAL); +} bool tt_SocDescriptor::is_ethernet_core(const tt_xy_pair &core) const { return this->ethernet_core_channel_map.find(core) != ethernet_core_channel_map.end(); @@ -249,3 +291,54 @@ std::string tt_SocDescriptor::get_soc_descriptor_path(tt::ARCH arch) { throw std::runtime_error("Invalid architecture"); } } + +void tt_SocDescriptor::get_cores_and_grid_size_from_coordinate_manager() { + cores_map.insert({CoreType::TENSIX, coordinate_manager->get_cores(CoreType::TENSIX)}); + grid_size_map.insert({CoreType::TENSIX, coordinate_manager->get_grid_size(CoreType::TENSIX)}); + + cores_map.insert({CoreType::DRAM, coordinate_manager->get_cores(CoreType::DRAM)}); + grid_size_map.insert({CoreType::DRAM, coordinate_manager->get_grid_size(CoreType::DRAM)}); + + cores_map.insert({CoreType::ETH, coordinate_manager->get_cores(CoreType::ETH)}); + grid_size_map.insert({CoreType::ETH, coordinate_manager->get_grid_size(CoreType::ETH)}); + + cores_map.insert({CoreType::ARC, coordinate_manager->get_cores(CoreType::ARC)}); + grid_size_map.insert({CoreType::ARC, coordinate_manager->get_grid_size(CoreType::ARC)}); + + cores_map.insert({CoreType::PCIE, coordinate_manager->get_cores(CoreType::PCIE)}); + grid_size_map.insert({CoreType::PCIE, coordinate_manager->get_grid_size(CoreType::PCIE)}); + + harvested_cores_map.insert({CoreType::TENSIX, coordinate_manager->get_harvested_cores(CoreType::TENSIX)}); + harvested_grid_size_map.insert({CoreType::TENSIX, coordinate_manager->get_harvested_grid_size(CoreType::TENSIX)}); + + harvested_cores_map.insert({CoreType::DRAM, coordinate_manager->get_harvested_cores(CoreType::DRAM)}); + harvested_grid_size_map.insert({CoreType::DRAM, coordinate_manager->get_harvested_grid_size(CoreType::DRAM)}); +} + +std::vector tt_SocDescriptor::get_cores(const CoreType core_type) const { + if (cores_map.find(core_type) == cores_map.end()) { + return {}; + } + return cores_map.at(core_type); +} + +std::vector tt_SocDescriptor::get_harvested_cores(const CoreType core_type) const { + if (harvested_cores_map.find(core_type) == harvested_cores_map.end()) { + return {}; + } + return harvested_cores_map.at(core_type); +} + +tt_xy_pair tt_SocDescriptor::get_grid_size(const CoreType core_type) const { + if (grid_size_map.find(core_type) == grid_size_map.end()) { + return {0, 0}; + } + return grid_size_map.at(core_type); +} + +tt_xy_pair tt_SocDescriptor::get_harvested_grid_size(const CoreType core_type) const { + if (harvested_grid_size_map.find(core_type) == harvested_grid_size_map.end()) { + return {0, 0}; + } + return harvested_grid_size_map.at(core_type); +} diff --git a/tests/api/CMakeLists.txt b/tests/api/CMakeLists.txt index fc0bc55f..8d12a1ae 100644 --- a/tests/api/CMakeLists.txt +++ b/tests/api/CMakeLists.txt @@ -6,6 +6,7 @@ set(API_TESTS_SRCS test_core_coord_translation_wh.cpp test_core_coord_translation_bh.cpp test_mockup_device.cpp + test_soc_descriptor.cpp ) add_executable(api_tests ${API_TESTS_SRCS}) diff --git a/tests/api/test_soc_descriptor.cpp b/tests/api/test_soc_descriptor.cpp new file mode 100644 index 00000000..f094443c --- /dev/null +++ b/tests/api/test_soc_descriptor.cpp @@ -0,0 +1,241 @@ +/* + * SPDX-FileCopyrightText: (c) 2024 Tenstorrent Inc. + * + * SPDX-License-Identifier: Apache-2.0 + */ +#include "gtest/gtest.h" +#include "tests/test_utils/generate_cluster_desc.hpp" +#include "umd/device/blackhole_implementation.h" +#include "umd/device/grayskull_implementation.h" +#include "umd/device/tt_soc_descriptor.h" +#include "umd/device/wormhole_implementation.h" + +using namespace tt::umd; + +// Test soc descriptor API for Wormhole when there is no harvesting. +TEST(SocDescriptor, SocDescriptorGrayskullNoHarvesting) { + tt_SocDescriptor soc_desc(test_utils::GetAbsPath("tests/soc_descs/grayskull_10x12.yaml")); + + const std::vector grayskull_tensix_cores = tt::umd::grayskull::TENSIX_CORES; + + ASSERT_EQ(soc_desc.get_num_dram_channels(), tt::umd::grayskull::NUM_DRAM_BANKS); + + for (const tt_xy_pair& tensix_core : grayskull_tensix_cores) { + ASSERT_TRUE(soc_desc.is_worker_core(tensix_core)); + ASSERT_FALSE(soc_desc.is_ethernet_core(tensix_core)); + } + + ASSERT_TRUE(soc_desc.get_harvested_cores(CoreType::TENSIX).empty()); + ASSERT_TRUE(soc_desc.get_harvested_cores(CoreType::DRAM).empty()); +} + +// Test soc descriptor API for Grayskull when there is tensix harvesting. +TEST(SocDescriptor, SocDescriptorGrayskullOneRowHarvesting) { + const tt_xy_pair grayskull_tensix_grid_size = tt::umd::grayskull::TENSIX_GRID_SIZE; + const std::vector grayskull_tensix_cores = tt::umd::grayskull::TENSIX_CORES; + const size_t harvesting_mask = (1 << tt::umd::grayskull::LOGICAL_HARVESTING_LAYOUT[0]); + + tt_SocDescriptor soc_desc(test_utils::GetAbsPath("tests/soc_descs/grayskull_10x12.yaml"), harvesting_mask); + + const std::vector tensix_cores = soc_desc.get_cores(CoreType::TENSIX); + + ASSERT_EQ(tensix_cores.size(), grayskull_tensix_grid_size.x * (grayskull_tensix_grid_size.y - 1)); + + size_t index = grayskull_tensix_grid_size.x; + + for (size_t core_index = 0; core_index < tensix_cores.size(); core_index++) { + ASSERT_EQ(tensix_cores[core_index].x, grayskull_tensix_cores[index].x); + ASSERT_EQ(tensix_cores[core_index].y, grayskull_tensix_cores[index].y); + index++; + } + + const std::vector harvested_cores = soc_desc.get_harvested_cores(CoreType::TENSIX); + + ASSERT_FALSE(harvested_cores.empty()); + + ASSERT_TRUE(soc_desc.get_harvested_cores(CoreType::DRAM).empty()); +} + +// Test soc descriptor API for Wormhole when there is no harvesting. +TEST(SocDescriptor, SocDescriptorWormholeNoHarvesting) { + tt_SocDescriptor soc_desc(test_utils::GetAbsPath("tests/soc_descs/wormhole_b0_8x10.yaml")); + + const std::vector wormhole_tensix_cores = tt::umd::wormhole::TENSIX_CORES; + + ASSERT_EQ(soc_desc.get_num_dram_channels(), tt::umd::wormhole::NUM_DRAM_BANKS); + + for (const tt_xy_pair& tensix_core : wormhole_tensix_cores) { + ASSERT_TRUE(soc_desc.is_worker_core(tensix_core)); + ASSERT_FALSE(soc_desc.is_ethernet_core(tensix_core)); + } + + ASSERT_TRUE(soc_desc.get_harvested_cores(CoreType::TENSIX).empty()); + ASSERT_TRUE(soc_desc.get_harvested_cores(CoreType::DRAM).empty()); +} + +// Test soc descriptor API for Wormhole when there is tensix harvesting. +TEST(SocDescriptor, SocDescriptorWormholeOneRowHarvesting) { + const tt_xy_pair wormhole_tensix_grid_size = tt::umd::wormhole::TENSIX_GRID_SIZE; + const std::vector wormhole_tensix_cores = tt::umd::wormhole::TENSIX_CORES; + const size_t harvesting_mask = (1 << tt::umd::wormhole::LOGICAL_HARVESTING_LAYOUT[0]); + + tt_SocDescriptor soc_desc(test_utils::GetAbsPath("tests/soc_descs/wormhole_b0_8x10.yaml"), harvesting_mask); + + const std::vector tensix_cores = soc_desc.get_cores(CoreType::TENSIX); + + ASSERT_EQ(tensix_cores.size(), wormhole_tensix_grid_size.x * (wormhole_tensix_grid_size.y - 1)); + + size_t index = wormhole_tensix_grid_size.x; + + for (size_t core_index = 0; core_index < tensix_cores.size(); core_index++) { + ASSERT_EQ(tensix_cores[core_index].x, wormhole_tensix_cores[index].x); + ASSERT_EQ(tensix_cores[core_index].y, wormhole_tensix_cores[index].y); + index++; + } + + const std::vector harvested_cores = soc_desc.get_harvested_cores(CoreType::TENSIX); + + ASSERT_FALSE(harvested_cores.empty()); + + ASSERT_TRUE(soc_desc.get_harvested_cores(CoreType::DRAM).empty()); +} + +// Test soc descriptor API for Blackhole when there is no harvesting. +TEST(SocDescriptor, SocDescriptorBlackholeNoHarvesting) { + tt_SocDescriptor soc_desc(test_utils::GetAbsPath("tests/soc_descs/blackhole_140_arch_no_eth.yaml")); + + const std::vector blackhole_tensix_cores = tt::umd::blackhole::TENSIX_CORES; + + ASSERT_EQ(soc_desc.get_num_dram_channels(), tt::umd::blackhole::NUM_DRAM_BANKS); + + for (const tt_xy_pair& tensix_core : blackhole_tensix_cores) { + ASSERT_TRUE(soc_desc.is_worker_core(tensix_core)); + ASSERT_FALSE(soc_desc.is_ethernet_core(tensix_core)); + } + + ASSERT_TRUE(soc_desc.get_harvested_cores(CoreType::TENSIX).empty()); + ASSERT_TRUE(soc_desc.get_harvested_cores(CoreType::DRAM).empty()); +} + +// Test soc descriptor API for Blackhole when there is tensix harvesting. +TEST(SocDescriptor, SocDescriptorBlackholeOneRowHarvesting) { + const tt_xy_pair blackhole_tensix_grid_size = tt::umd::blackhole::TENSIX_GRID_SIZE; + const std::vector blackhole_tensix_cores = tt::umd::blackhole::TENSIX_CORES; + + tt_SocDescriptor soc_desc(test_utils::GetAbsPath("tests/soc_descs/blackhole_140_arch_no_eth.yaml"), 1); + + const std::vector tensix_cores = soc_desc.get_cores(CoreType::TENSIX); + + ASSERT_EQ(tensix_cores.size(), (blackhole_tensix_grid_size.x - 1) * blackhole_tensix_grid_size.y); + + size_t index = 1; + + for (size_t core_index = 0; core_index < tensix_cores.size(); core_index++) { + ASSERT_EQ(tensix_cores[core_index].x, blackhole_tensix_cores[index].x); + ASSERT_EQ(tensix_cores[core_index].y, blackhole_tensix_cores[index].y); + index++; + if (index % blackhole_tensix_grid_size.x == 0) { + index++; + } + } + + const std::vector harvested_cores = soc_desc.get_harvested_cores(CoreType::TENSIX); + + ASSERT_FALSE(harvested_cores.empty()); + + ASSERT_TRUE(soc_desc.get_harvested_cores(CoreType::DRAM).empty()); +} + +// Test soc descriptor API for Blackhole when there is DRAM harvesting. +TEST(SocDescriptor, SocDescriptorBlackholeDRAMHarvesting) { + const tt_xy_pair blackhole_tensix_grid_size = tt::umd::blackhole::TENSIX_GRID_SIZE; + const std::vector blackhole_tensix_cores = tt::umd::blackhole::TENSIX_CORES; + const std::vector blackhole_dram_cores = tt::umd::blackhole::DRAM_CORES; + const size_t num_dram_banks = tt::umd::blackhole::NUM_DRAM_BANKS; + const size_t num_noc_ports_per_bank = tt::umd::blackhole::NUM_NOC_PORTS_PER_DRAM_BANK; + + tt_SocDescriptor soc_desc(test_utils::GetAbsPath("tests/soc_descs/blackhole_140_arch_no_eth.yaml"), 0, 1); + + const std::vector tensix_cores = soc_desc.get_cores(CoreType::TENSIX); + + ASSERT_EQ(tensix_cores.size(), blackhole_tensix_grid_size.x * blackhole_tensix_grid_size.y); + + size_t index = 0; + for (size_t core_index = 0; core_index < tensix_cores.size(); core_index++) { + ASSERT_EQ(tensix_cores[core_index].x, blackhole_tensix_cores[index].x); + ASSERT_EQ(tensix_cores[core_index].y, blackhole_tensix_cores[index].y); + index++; + } + + ASSERT_TRUE(soc_desc.get_harvested_cores(CoreType::TENSIX).empty()); + + const std::vector dram_cores = soc_desc.get_cores(CoreType::DRAM); + + ASSERT_EQ(dram_cores.size(), (num_dram_banks - 1) * num_noc_ports_per_bank); + + const std::vector harvested_dram_cores = soc_desc.get_harvested_cores(CoreType::DRAM); + + ASSERT_EQ(harvested_dram_cores.size(), num_noc_ports_per_bank); + + for (size_t core_index = 0; core_index < num_noc_ports_per_bank; core_index++) { + ASSERT_EQ(harvested_dram_cores[core_index].x, blackhole_dram_cores[core_index].x); + ASSERT_EQ(harvested_dram_cores[core_index].y, blackhole_dram_cores[core_index].y); + } +} + +TEST(SocDescriptor, CustomSocDescriptor) { + tt_SocDescriptor soc_desc(test_utils::GetAbsPath("tests/soc_descs/blackhole_simulation_1x2.yaml"), 0, 0); + + const CoreCoord tensix_core_01 = CoreCoord(0, 1, CoreType::TENSIX, CoordSystem::PHYSICAL); + const CoreCoord tensix_core_01_virtual = soc_desc.to(tensix_core_01, CoordSystem::VIRTUAL); + const CoreCoord tensix_core_01_logical = soc_desc.to(tensix_core_01, CoordSystem::LOGICAL); + const CoreCoord tensix_core_01_translated = soc_desc.to(tensix_core_01, CoordSystem::TRANSLATED); + + EXPECT_EQ(tensix_core_01_virtual.x, tensix_core_01.x); + EXPECT_EQ(tensix_core_01_virtual.y, tensix_core_01.y); + + EXPECT_EQ(tensix_core_01_virtual.x, tensix_core_01_translated.x); + EXPECT_EQ(tensix_core_01_virtual.y, tensix_core_01_translated.y); + + EXPECT_EQ(tensix_core_01_logical.x, 0); + EXPECT_EQ(tensix_core_01_logical.y, 0); + + const CoreCoord tensix_core_11 = CoreCoord(1, 1, CoreType::TENSIX, CoordSystem::PHYSICAL); + const CoreCoord tensix_core_11_virtual = soc_desc.to(tensix_core_11, CoordSystem::VIRTUAL); + const CoreCoord tensix_core_11_logical = soc_desc.to(tensix_core_11, CoordSystem::LOGICAL); + const CoreCoord tensix_core_11_translated = soc_desc.to(tensix_core_11, CoordSystem::TRANSLATED); + + EXPECT_EQ(tensix_core_11_virtual.x, tensix_core_11.x); + EXPECT_EQ(tensix_core_11_virtual.y, tensix_core_11.y); + + EXPECT_EQ(tensix_core_11_virtual.x, tensix_core_11_translated.x); + EXPECT_EQ(tensix_core_11_virtual.y, tensix_core_11_translated.y); + + EXPECT_EQ(tensix_core_11_logical.x, 1); + EXPECT_EQ(tensix_core_11_logical.y, 0); + + std::vector cores = soc_desc.get_cores(CoreType::TENSIX); + EXPECT_EQ(cores.size(), 2); + + EXPECT_EQ(cores[0], tensix_core_01); + EXPECT_EQ(cores[1], tensix_core_11); + + std::vector harvested_tensix_cores = soc_desc.get_harvested_cores(CoreType::TENSIX); + EXPECT_TRUE(harvested_tensix_cores.empty()); + + const CoreCoord dram_core_10 = CoreCoord(1, 0, CoreType::DRAM, CoordSystem::PHYSICAL); + const CoreCoord dram_core_10_virtual = soc_desc.to(dram_core_10, CoordSystem::VIRTUAL); + const CoreCoord dram_core_10_logical = soc_desc.to(dram_core_10, CoordSystem::LOGICAL); + const CoreCoord dram_core_10_translated = soc_desc.to(dram_core_10, CoordSystem::TRANSLATED); + + EXPECT_EQ(dram_core_10_virtual.x, dram_core_10.x); + EXPECT_EQ(dram_core_10_virtual.y, dram_core_10.y); + + EXPECT_EQ(dram_core_10.x, dram_core_10_translated.x); + EXPECT_EQ(dram_core_10.y, dram_core_10_translated.y); + + EXPECT_EQ(dram_core_10_logical.x, 0); + EXPECT_EQ(dram_core_10_logical.y, 0); + + EXPECT_EQ(soc_desc.get_num_dram_channels(), 1); +} diff --git a/tests/soc_descs/blackhole_simulation_1x2.yaml b/tests/soc_descs/blackhole_simulation_1x2.yaml new file mode 100644 index 00000000..471a6458 --- /dev/null +++ b/tests/soc_descs/blackhole_simulation_1x2.yaml @@ -0,0 +1,55 @@ +grid: + x_size: 2 + y_size: 2 + +arc: + [] + +pcie: + [] + +dram: + [[1-0]] + +dram_preferred_eth_endpoint: + [ 1-0 ] + +dram_preferred_worker_endpoint: + [ 1-0 ] + +dram_address_offsets: + [ 0 ] + +eth: + [] + +functional_workers: + [0-1, 1-1] + +harvested_workers: + [] + +router_only: + [0-0] + +worker_l1_size: + 1499136 + +dram_bank_size: + 1073741824 + +eth_l1_size: + 0 + +arch_name: BLACKHOLE + +features: + unpacker: + version: 1 + inline_srca_trans_without_srca_trans_instr: False + math: + dst_size_alignment: 32768 + packer: + version: 1 + overlay: + version: 1