Skip to content

Commit

Permalink
Implement soc descriptor CoreCoord API
Browse files Browse the repository at this point in the history
  • Loading branch information
pjanevskiTT committed Dec 10, 2024
1 parent 44bec36 commit 882bfa5
Show file tree
Hide file tree
Showing 11 changed files with 639 additions and 51 deletions.
3 changes: 3 additions & 0 deletions device/api/umd/device/blackhole_coordinate_manager.h
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,9 @@ class BlackholeCoordinateManager : public CoordinateManager {
void fill_pcie_logical_to_translated() override;
void fill_dram_logical_to_translated() override;

void fill_tensix_core_structures() override;
void fill_dram_core_structures() override;

private:
void map_column_of_dram_banks(const size_t start_bank, const size_t end_bank, const size_t x_coord);
};
30 changes: 25 additions & 5 deletions device/api/umd/device/coordinate_manager.h
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,10 @@ class CoordinateManager {

tt::umd::CoreCoord to(const tt::umd::CoreCoord core_coord, const CoordSystem coord_system);

std::pair<tt_xy_pair, std::vector<tt::umd::CoreCoord>> get_cores_and_grid(const CoreType core_type);

std::pair<tt_xy_pair, std::vector<tt::umd::CoreCoord>> get_harvested_cores_and_grid(const CoreType core_type);

virtual ~CoordinateManager() = default;

private:
Expand All @@ -74,6 +78,13 @@ class CoordinateManager {
virtual void translate_arc_coords();
virtual void translate_pcie_coords();

void fill_core_structures();
virtual void fill_tensix_core_structures();
virtual void fill_dram_core_structures();
virtual void fill_eth_core_structures();
virtual void fill_arc_core_structures();
virtual void fill_pcie_core_structures();

/*
* Fills the logical to translated mapping for the tensix cores.
* By default, translated coordinates are the same as physical coordinates.
Expand Down Expand Up @@ -121,20 +132,29 @@ class CoordinateManager {
std::map<tt::umd::CoreCoord, tt_xy_pair> to_physical_map;
std::map<std::pair<tt_xy_pair, CoordSystem>, tt::umd::CoreCoord> from_physical_map;

const tt_xy_pair tensix_grid_size;
tt_xy_pair tensix_grid_size;
const std::vector<tt_xy_pair>& tensix_cores;
std::vector<tt::umd::CoreCoord> unharvested_tensix_cores;
tt_xy_pair harvested_tensix_grid_size;
std::vector<tt::umd::CoreCoord> harvested_tensix_cores;
size_t tensix_harvesting_mask;

const tt_xy_pair dram_grid_size;
tt_xy_pair dram_grid_size;
const std::vector<tt_xy_pair>& dram_cores;
std::vector<tt::umd::CoreCoord> unharvested_dram_cores;
tt_xy_pair harvested_dram_grid_size;
std::vector<tt::umd::CoreCoord> harvested_dram_cores;
size_t dram_harvesting_mask;

const tt_xy_pair eth_grid_size;
tt_xy_pair eth_grid_size;
const std::vector<tt_xy_pair>& eth_cores;
std::vector<tt::umd::CoreCoord> unharvested_eth_cores;

const tt_xy_pair arc_grid_size;
tt_xy_pair arc_grid_size;
const std::vector<tt_xy_pair>& arc_cores;
std::vector<tt::umd::CoreCoord> unharvested_arc_cores;

const tt_xy_pair pcie_grid_size;
tt_xy_pair pcie_grid_size;
const std::vector<tt_xy_pair>& pcie_cores;
std::vector<tt::umd::CoreCoord> unharvested_pcie_cores;
};
91 changes: 53 additions & 38 deletions device/api/umd/device/tt_soc_descriptor.h
Original file line number Diff line number Diff line change
Expand Up @@ -83,42 +83,6 @@ struct CoreDescriptor {
*/
class tt_SocDescriptor {
public:
tt::ARCH arch;
tt_xy_pair grid_size;
tt_xy_pair physical_grid_size;
tt_xy_pair worker_grid_size;
std::unordered_map<tt_xy_pair, CoreDescriptor> cores;
std::vector<tt_xy_pair> arc_cores;
std::vector<tt_xy_pair> workers;
std::vector<tt_xy_pair> harvested_workers;
std::vector<tt_xy_pair> pcie_cores;
std::unordered_map<int, int> worker_log_to_routing_x;
std::unordered_map<int, int> worker_log_to_routing_y;
std::unordered_map<int, int> routing_x_to_worker_x;
std::unordered_map<int, int> routing_y_to_worker_y;
std::vector<std::vector<tt_xy_pair>> dram_cores; // per channel list of dram cores
std::unordered_map<tt_xy_pair, std::tuple<int, int>> dram_core_channel_map; // map dram core to chan/subchan
std::vector<tt_xy_pair> ethernet_cores; // ethernet cores (index == channel id)
std::unordered_map<tt_xy_pair, int> ethernet_core_channel_map;
std::vector<std::size_t> trisc_sizes; // Most of software stack assumes same trisc size for whole chip..
std::string device_descriptor_file_path = std::string("");

bool has(tt_xy_pair input) { return cores.find(input) != cores.end(); }

int overlay_version;
int unpacker_version;
int dst_size_alignment;
int packer_version;
int worker_l1_size;
int eth_l1_size;
bool noc_translation_id_enabled;
uint64_t dram_bank_size;

int get_num_dram_channels() const;
bool is_worker_core(const tt_xy_pair &core) const;
tt_xy_pair get_core_for_dram_channel(int dram_chan, int subchannel) const;
bool is_ethernet_core(const tt_xy_pair &core) const;

// Default constructor. Creates uninitialized object with public access to all of its attributes.
tt_SocDescriptor() = default;
// Constructor used to build object from device descriptor file.
Expand Down Expand Up @@ -156,22 +120,73 @@ class tt_SocDescriptor {
eth_l1_size(other.eth_l1_size),
noc_translation_id_enabled(other.noc_translation_id_enabled),
dram_bank_size(other.dram_bank_size),
coordinate_manager(other.coordinate_manager) {}
coordinate_manager(other.coordinate_manager),
cores_map(other.cores_map),
harvested_cores_map(other.harvested_cores_map) {}

// CoreCoord conversions.
tt::umd::CoreCoord to(const tt::umd::CoreCoord core_coord, const CoordSystem coord_system);
tt::umd::CoreCoord to(const tt::umd::CoreCoord core_coord, const CoordSystem coord_system) const;

static std::string get_soc_descriptor_path(tt::ARCH arch);

std::vector<tt::umd::CoreCoord> get_cores(const CoreType core_type) const;
std::vector<tt::umd::CoreCoord> get_harvested_cores(const CoreType core_type) const;

int get_num_dram_channels() const;

bool is_worker_core(const tt_xy_pair &core) const;

tt_xy_pair get_core_for_dram_channel(int dram_chan, int subchannel) const;

tt::umd::CoreCoord get_dram_core_for_channel(int dram_chan, int subchannel) const;
tt::umd::CoreCoord get_dram_core(uint32_t dram_chan, uint32_t subchannel) const;

bool is_ethernet_core(const tt_xy_pair &core) const;

int overlay_version;
int unpacker_version;
int dst_size_alignment;
int packer_version;
int worker_l1_size;
int eth_l1_size;
bool noc_translation_id_enabled;
uint64_t dram_bank_size;

tt::ARCH arch;
tt_xy_pair grid_size;
tt_xy_pair physical_grid_size;
tt_xy_pair worker_grid_size;
std::unordered_map<tt_xy_pair, CoreDescriptor> cores;
std::vector<tt_xy_pair> arc_cores;
std::vector<tt_xy_pair> workers;
std::vector<tt_xy_pair> harvested_workers;
std::vector<tt_xy_pair> pcie_cores;
std::unordered_map<int, int> worker_log_to_routing_x;
std::unordered_map<int, int> worker_log_to_routing_y;
std::unordered_map<int, int> routing_x_to_worker_x;
std::unordered_map<int, int> routing_y_to_worker_y;
std::vector<std::vector<tt_xy_pair>> dram_cores; // per channel list of dram cores
std::unordered_map<tt_xy_pair, std::tuple<int, int>> dram_core_channel_map; // map dram core to chan/subchan
std::vector<tt_xy_pair> ethernet_cores; // ethernet cores (index == channel id)
std::unordered_map<tt_xy_pair, int> ethernet_core_channel_map;
std::vector<std::size_t> trisc_sizes; // Most of software stack assumes same trisc size for whole chip..
std::string device_descriptor_file_path = std::string("");

private:
void create_coordinate_manager(const std::size_t tensix_harvesting_mask, const std::size_t dram_harvesting_mask);
void load_core_descriptors_from_device_descriptor(YAML::Node &device_descriptor_yaml);
void load_soc_features_from_device_descriptor(YAML::Node &device_descriptor_yaml);
void get_coordinates_from_coordinate_manager();

static tt_xy_pair calculate_grid_size(const std::vector<tt_xy_pair> &cores);

// TODO: change this to unique pointer as soon as copying of tt_SocDescriptor
// is not needed anymore. Soc descriptor and coordinate manager should be
// created once per chip.
std::shared_ptr<CoordinateManager> coordinate_manager = nullptr;

std::unordered_map<CoreType, std::pair<tt_xy_pair, std::vector<tt::umd::CoreCoord>>> cores_map;
std::unordered_map<CoreType, std::pair<tt_xy_pair, std::vector<tt::umd::CoreCoord>>> harvested_cores_map;
};

// Allocates a new soc descriptor on the heap. Returns an owning pointer.
Expand Down
58 changes: 58 additions & 0 deletions device/blackhole/blackhole_coordinate_manager.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@ BlackholeCoordinateManager::BlackholeCoordinateManager(
this->translate_eth_coords();
this->translate_arc_coords();
this->translate_pcie_coords();
this->fill_core_structures();
}

void BlackholeCoordinateManager::translate_tensix_coords() {
Expand Down Expand Up @@ -233,6 +234,25 @@ void BlackholeCoordinateManager::map_column_of_dram_banks(
}

void BlackholeCoordinateManager::fill_dram_logical_to_translated() {
if (dram_grid_size.x < blackhole::NUM_DRAM_BANKS) {
// If the number of DRAM banks is less than num dram banks for standard SOC for Blackhole,
// map the translated DRAM cores to be the same as physical DRAM cores.
// TODO: Figure out how DRAM is going to be mapped to translated coordinates when there is less DRAM banks.
for (size_t x = 0; x < dram_grid_size.x; x++) {
for (size_t y = 0; y < dram_grid_size.y; y++) {
const CoreCoord logical_dram_core = CoreCoord(x, y, CoreType::DRAM, CoordSystem::LOGICAL);
const tt_xy_pair physical_dram_core = to_physical_map[logical_dram_core];

CoreCoord translated_dram_core =
CoreCoord(physical_dram_core.x, physical_dram_core.y, CoreType::DRAM, CoordSystem::TRANSLATED);
to_physical_map[translated_dram_core] = physical_dram_core;
from_physical_map[{{physical_dram_core.x, physical_dram_core.y}, CoordSystem::TRANSLATED}] =
translated_dram_core;
}
}
return;
}

const std::vector<size_t> harvested_banks = CoordinateManager::get_harvested_indices(dram_harvesting_mask);

if (harvested_banks.empty()) {
Expand Down Expand Up @@ -288,3 +308,41 @@ void BlackholeCoordinateManager::fill_dram_logical_to_translated() {
from_physical_map[{{physical_core.x, physical_core.y}, CoordSystem::TRANSLATED}] = translated_coord;
}
}

void BlackholeCoordinateManager::fill_tensix_core_structures() {
std::vector<size_t> harvested_x_coords = get_harvested_indices(tensix_harvesting_mask);
for (size_t y = 0; y < tensix_grid_size.y; y++) {
for (size_t x = 0; x < tensix_grid_size.x; x++) {
const tt_xy_pair core = tensix_cores[y * tensix_grid_size.x + x];
CoreCoord core_coord(core.x, core.y, CoreType::TENSIX, CoordSystem::PHYSICAL);
if (std::find(harvested_x_coords.begin(), harvested_x_coords.end(), x) == harvested_x_coords.end()) {
unharvested_tensix_cores.push_back(core_coord);
} else {
harvested_tensix_cores.push_back(core_coord);
}
}
}
const size_t num_harvested_x = harvested_x_coords.size();
tensix_grid_size.x -= num_harvested_x;
harvested_tensix_grid_size.x = num_harvested_x;
harvested_tensix_grid_size.y = tensix_grid_size.y;
}

void BlackholeCoordinateManager::fill_dram_core_structures() {
std::vector<size_t> harvested_banks = get_harvested_indices(dram_harvesting_mask);
for (size_t bank = 0; bank < dram_grid_size.x; bank++) {
for (size_t port = 0; port < dram_grid_size.y; port++) {
const tt_xy_pair core = dram_cores[bank * dram_grid_size.y + port];
CoreCoord core_coord(core.x, core.y, CoreType::DRAM, CoordSystem::PHYSICAL);
if (std::find(harvested_banks.begin(), harvested_banks.end(), bank) == harvested_banks.end()) {
unharvested_dram_cores.push_back(core_coord);
} else {
harvested_dram_cores.push_back(core_coord);
}
}
}
const size_t num_harvested_banks = harvested_banks.size();
dram_grid_size.x -= num_harvested_banks;
harvested_dram_grid_size.x = num_harvested_banks;
harvested_dram_grid_size.y = dram_grid_size.y;
}
101 changes: 101 additions & 0 deletions device/coordinate_manager.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
*/
#include "umd/device/coordinate_manager.h"

#include "api/umd/device/tt_core_coordinates.h"
#include "logger.hpp"
#include "umd/device/blackhole_coordinate_manager.h"
#include "umd/device/grayskull_coordinate_manager.h"
Expand Down Expand Up @@ -321,6 +322,75 @@ void CoordinateManager::fill_arc_logical_to_translated() {
}
}

void CoordinateManager::fill_tensix_core_structures() {
std::vector<size_t> harvested_y_coords = get_harvested_indices(tensix_harvesting_mask);
for (size_t y = 0; y < tensix_grid_size.y; y++) {
for (size_t x = 0; x < tensix_grid_size.x; x++) {
const tt_xy_pair core = tensix_cores[y * tensix_grid_size.x + x];
CoreCoord core_coord(core.x, core.y, CoreType::TENSIX, CoordSystem::PHYSICAL);
if (std::find(harvested_y_coords.begin(), harvested_y_coords.end(), y) == harvested_y_coords.end()) {
unharvested_tensix_cores.push_back(core_coord);
} else {
harvested_tensix_cores.push_back(core_coord);
}
}
}
const size_t num_harvested_y = harvested_y_coords.size();
tensix_grid_size.y -= num_harvested_y;
harvested_tensix_grid_size.x = tensix_grid_size.x;
harvested_tensix_grid_size.y = num_harvested_y;
}

void CoordinateManager::fill_dram_core_structures() {
for (size_t bank = 0; bank < dram_grid_size.x; bank++) {
for (size_t port = 0; port < dram_grid_size.y; port++) {
const tt_xy_pair core = dram_cores[bank * dram_grid_size.y + port];
CoreCoord core_coord(core.x, core.y, CoreType::DRAM, CoordSystem::PHYSICAL);
unharvested_dram_cores.push_back(core_coord);
}
}
harvested_dram_grid_size.x = 0;
harvested_dram_grid_size.y = 0;
}

void CoordinateManager::fill_eth_core_structures() {
for (size_t y = 0; y < eth_grid_size.y; y++) {
for (size_t x = 0; x < eth_grid_size.x; x++) {
const tt_xy_pair core = eth_cores[y * eth_grid_size.x + x];
CoreCoord core_coord(core.x, core.y, CoreType::ETH, CoordSystem::PHYSICAL);
unharvested_eth_cores.push_back(core_coord);
}
}
}

void CoordinateManager::fill_arc_core_structures() {
for (size_t y = 0; y < arc_grid_size.y; y++) {
for (size_t x = 0; x < arc_grid_size.x; x++) {
const tt_xy_pair core = arc_cores[y * arc_grid_size.x + x];
CoreCoord core_coord(core.x, core.y, CoreType::ARC, CoordSystem::PHYSICAL);
unharvested_arc_cores.push_back(core_coord);
}
}
}

void CoordinateManager::fill_pcie_core_structures() {
for (size_t y = 0; y < pcie_grid_size.y; y++) {
for (size_t x = 0; x < pcie_grid_size.x; x++) {
const tt_xy_pair core = pcie_cores[y * pcie_grid_size.x + x];
CoreCoord core_coord(core.x, core.y, CoreType::PCIE, CoordSystem::PHYSICAL);
unharvested_pcie_cores.push_back(core_coord);
}
}
}

void CoordinateManager::fill_core_structures() {
fill_tensix_core_structures();
fill_dram_core_structures();
fill_eth_core_structures();
fill_arc_core_structures();
fill_pcie_core_structures();
}

void CoordinateManager::assert_create_coordinate_manager(
const tt::ARCH arch, const size_t tensix_harvesting_mask, const size_t dram_harvesting_mask) {
log_assert(
Expand All @@ -331,6 +401,35 @@ void CoordinateManager::assert_create_coordinate_manager(
}
}

std::pair<tt_xy_pair, std::vector<tt::umd::CoreCoord>> CoordinateManager::get_cores_and_grid(const CoreType core_type) {
switch (core_type) {
case CoreType::TENSIX:
return {tensix_grid_size, unharvested_tensix_cores};
case CoreType::DRAM:
return {dram_grid_size, unharvested_dram_cores};
case CoreType::ETH:
return {eth_grid_size, unharvested_eth_cores};
case CoreType::ARC:
return {arc_grid_size, unharvested_arc_cores};
case CoreType::PCIE:
return {pcie_grid_size, unharvested_pcie_cores};
default:
throw std::runtime_error("Core type is not supported for getting cores");
}
}

std::pair<tt_xy_pair, std::vector<tt::umd::CoreCoord>> CoordinateManager::get_harvested_cores_and_grid(
const CoreType core_type) {
switch (core_type) {
case CoreType::TENSIX:
return {harvested_tensix_grid_size, harvested_tensix_cores};
case CoreType::DRAM:
return {harvested_dram_grid_size, harvested_dram_cores};
default:
throw std::runtime_error("Core type is not supported for getting harvested cores");
}
}

std::shared_ptr<CoordinateManager> CoordinateManager::create_coordinate_manager(
tt::ARCH arch, const size_t tensix_harvesting_mask, const size_t dram_harvesting_mask) {
assert_create_coordinate_manager(arch, tensix_harvesting_mask, dram_harvesting_mask);
Expand Down Expand Up @@ -381,6 +480,8 @@ std::shared_ptr<CoordinateManager> CoordinateManager::create_coordinate_manager(
tt::umd::blackhole::ARC_CORES,
tt::umd::blackhole::PCIE_GRID_SIZE,
tt::umd::blackhole::PCIE_CORES);
default:
throw std::runtime_error("Invalid architecture specified for creating coordinate manager");
}
}

Expand Down
Loading

0 comments on commit 882bfa5

Please sign in to comment.