Skip to content

Commit

Permalink
Implement Soc descriptor CoreCoord API
Browse files Browse the repository at this point in the history
  • Loading branch information
pjanevskiTT committed Dec 16, 2024
1 parent 1e214f4 commit 883edcd
Show file tree
Hide file tree
Showing 10 changed files with 719 additions and 55 deletions.
4 changes: 3 additions & 1 deletion .github/workflows/build-tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@ env:
DEPS_OUTPUT_DIR: ./build/_deps
TEST_OUTPUT_DIR: ./build/test
CLUSTER_DESCRIPTORS_DIR: ./tests/api/cluster_descriptor_examples
SOC_DESCRIPTORS_DIR: ./tests/soc_descs

jobs:
build:
Expand Down Expand Up @@ -77,7 +78,8 @@ jobs:
tar cvf artifact.tar ${{ env.TEST_OUTPUT_DIR }} \
${{ env.LIB_OUTPUT_DIR }} \
${{ env.DEPS_OUTPUT_DIR }} \
${{ env.CLUSTER_DESCRIPTORS_DIR }}
${{ env.CLUSTER_DESCRIPTORS_DIR }} \
${{ env.SOC_DESCRIPTORS_DIR }}
- name: Upload build artifacts archive
uses: actions/upload-artifact@v4
Expand Down
9 changes: 9 additions & 0 deletions device/api/umd/device/blackhole_coordinate_manager.h
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,15 @@ class BlackholeCoordinateManager : public CoordinateManager {
void fill_pcie_physical_translated_mapping() override;
void fill_dram_physical_translated_mapping() override;

std::vector<tt::umd::CoreCoord> get_tensix_cores() const override;
std::vector<tt::umd::CoreCoord> get_harvested_tensix_cores() const override;
std::vector<tt::umd::CoreCoord> get_dram_cores() const override;
std::vector<tt::umd::CoreCoord> get_harvested_dram_cores() const override;
tt_xy_pair get_tensix_grid_size() const override;
tt_xy_pair get_dram_grid_size() const override;
tt_xy_pair get_harvested_tensix_grid_size() const override;
tt_xy_pair get_harvested_dram_grid_size() const override;

private:
void map_column_of_dram_banks(const size_t start_bank, const size_t end_bank, const size_t x_coord);
};
40 changes: 28 additions & 12 deletions device/api/umd/device/coordinate_manager.h
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,12 @@ class CoordinateManager {

tt::umd::CoreCoord to(const tt::umd::CoreCoord core_coord, const CoordSystem coord_system);

std::vector<tt::umd::CoreCoord> get_cores(const CoreType core_type) const;
tt_xy_pair get_grid_size(const CoreType core_type) const;

std::vector<tt::umd::CoreCoord> get_harvested_cores(const CoreType core_type) const;
tt_xy_pair get_harvested_grid_size(const CoreType core_type) const;

virtual ~CoordinateManager() = default;

size_t get_tensix_harvesting_mask() const;
Expand All @@ -52,6 +58,9 @@ class CoordinateManager {
static void assert_create_coordinate_manager(
const tt::ARCH arch, const size_t tensix_harvesting_mask, const size_t dram_harvesting_mask);

const std::vector<tt_xy_pair>& get_physical_pairs(const CoreType core_type) const;
std::vector<tt::umd::CoreCoord> get_all_physical_cores(const CoreType core_type) const;

protected:
/*
* Constructor for Coordinate Manager.
Expand Down Expand Up @@ -87,6 +96,15 @@ class CoordinateManager {
void identity_map_physical_cores();
void add_core_translation(const tt::umd::CoreCoord& core_coord, const tt_xy_pair& physical_pair);

virtual std::vector<tt::umd::CoreCoord> get_tensix_cores() const;
virtual std::vector<tt::umd::CoreCoord> get_harvested_tensix_cores() const;
virtual std::vector<tt::umd::CoreCoord> get_dram_cores() const;
virtual std::vector<tt::umd::CoreCoord> get_harvested_dram_cores() const;
virtual tt_xy_pair get_tensix_grid_size() const;
virtual tt_xy_pair get_dram_grid_size() const;
virtual tt_xy_pair get_harvested_tensix_grid_size() const;
virtual tt_xy_pair get_harvested_dram_grid_size() const;

/*
* Fills the logical to translated mapping for the tensix cores.
* By default, translated coordinates are the same as physical coordinates.
Expand Down Expand Up @@ -134,23 +152,21 @@ class CoordinateManager {
std::map<tt::umd::CoreCoord, tt_xy_pair> to_physical_map;
std::map<std::pair<tt_xy_pair, CoordSystem>, tt::umd::CoreCoord> from_physical_map;

const tt_xy_pair tensix_grid_size;
const std::vector<tt_xy_pair>& tensix_cores;
tt_xy_pair tensix_grid_size;
const std::vector<tt_xy_pair> tensix_cores;
size_t tensix_harvesting_mask;
const size_t physical_layout_tensix_harvesting_mask;

const tt_xy_pair dram_grid_size;
const std::vector<tt_xy_pair>& dram_cores;
tt_xy_pair dram_grid_size;
const std::vector<tt_xy_pair> dram_cores;
size_t dram_harvesting_mask;

const tt_xy_pair eth_grid_size;
const std::vector<tt_xy_pair>& eth_cores;
tt_xy_pair eth_grid_size;
const std::vector<tt_xy_pair> eth_cores;

const tt_xy_pair arc_grid_size;
const std::vector<tt_xy_pair>& arc_cores;
tt_xy_pair arc_grid_size;
const std::vector<tt_xy_pair> arc_cores;

const tt_xy_pair pcie_grid_size;
const std::vector<tt_xy_pair>& pcie_cores;
tt_xy_pair pcie_grid_size;
const std::vector<tt_xy_pair> pcie_cores;
};

// friend
96 changes: 58 additions & 38 deletions device/api/umd/device/tt_soc_descriptor.h
Original file line number Diff line number Diff line change
Expand Up @@ -46,42 +46,6 @@ struct CoreDescriptor {
*/
class tt_SocDescriptor {
public:
tt::ARCH arch;
tt_xy_pair grid_size;
tt_xy_pair physical_grid_size;
tt_xy_pair worker_grid_size;
std::unordered_map<tt_xy_pair, CoreDescriptor> cores;
std::vector<tt_xy_pair> arc_cores;
std::vector<tt_xy_pair> workers;
std::vector<tt_xy_pair> harvested_workers;
std::vector<tt_xy_pair> pcie_cores;
std::unordered_map<int, int> worker_log_to_routing_x;
std::unordered_map<int, int> worker_log_to_routing_y;
std::unordered_map<int, int> routing_x_to_worker_x;
std::unordered_map<int, int> routing_y_to_worker_y;
std::vector<std::vector<tt_xy_pair>> dram_cores; // per channel list of dram cores
std::unordered_map<tt_xy_pair, std::tuple<int, int>> dram_core_channel_map; // map dram core to chan/subchan
std::vector<tt_xy_pair> ethernet_cores; // ethernet cores (index == channel id)
std::unordered_map<tt_xy_pair, int> ethernet_core_channel_map;
std::vector<std::size_t> trisc_sizes; // Most of software stack assumes same trisc size for whole chip..
std::string device_descriptor_file_path = std::string("");

bool has(tt_xy_pair input) { return cores.find(input) != cores.end(); }

int overlay_version;
int unpacker_version;
int dst_size_alignment;
int packer_version;
int worker_l1_size;
int eth_l1_size;
bool noc_translation_id_enabled;
uint64_t dram_bank_size;

int get_num_dram_channels() const;
bool is_worker_core(const tt_xy_pair &core) const;
tt_xy_pair get_core_for_dram_channel(int dram_chan, int subchannel) const;
bool is_ethernet_core(const tt_xy_pair &core) const;

// Default constructor. Creates uninitialized object with public access to all of its attributes.
tt_SocDescriptor() = default;
// Constructor used to build object from device descriptor file.
Expand Down Expand Up @@ -119,22 +83,78 @@ class tt_SocDescriptor {
eth_l1_size(other.eth_l1_size),
noc_translation_id_enabled(other.noc_translation_id_enabled),
dram_bank_size(other.dram_bank_size),
coordinate_manager(other.coordinate_manager) {}
coordinate_manager(other.coordinate_manager),
cores_map(other.cores_map),
grid_size_map(other.grid_size_map),
harvested_cores_map(other.harvested_cores_map),
harvested_grid_size_map(other.harvested_grid_size_map) {}

// CoreCoord conversions.
tt::umd::CoreCoord to(const tt::umd::CoreCoord core_coord, const CoordSystem coord_system);
tt::umd::CoreCoord to(const tt::umd::CoreCoord core_coord, const CoordSystem coord_system) const;

static std::string get_soc_descriptor_path(tt::ARCH arch);

std::vector<tt::umd::CoreCoord> get_cores(const CoreType core_type) const;
std::vector<tt::umd::CoreCoord> get_harvested_cores(const CoreType core_type) const;
tt_xy_pair get_grid_size(const CoreType core_type) const;
tt_xy_pair get_harvested_grid_size(const CoreType core_type) const;

int get_num_dram_channels() const;

bool is_worker_core(const tt_xy_pair &core) const;

tt_xy_pair get_core_for_dram_channel(int dram_chan, int subchannel) const;

tt::umd::CoreCoord get_dram_core_for_channel(int dram_chan, int subchannel) const;
tt::umd::CoreCoord get_dram_core(uint32_t dram_chan, uint32_t subchannel) const;

bool is_ethernet_core(const tt_xy_pair &core) const;

tt::ARCH arch;
tt_xy_pair grid_size;
tt_xy_pair physical_grid_size;
tt_xy_pair worker_grid_size;
std::unordered_map<tt_xy_pair, CoreDescriptor> cores;
std::vector<tt_xy_pair> arc_cores;
std::vector<tt_xy_pair> workers;
std::vector<tt_xy_pair> harvested_workers;
std::vector<tt_xy_pair> pcie_cores;
std::unordered_map<int, int> worker_log_to_routing_x;
std::unordered_map<int, int> worker_log_to_routing_y;
std::unordered_map<int, int> routing_x_to_worker_x;
std::unordered_map<int, int> routing_y_to_worker_y;
std::vector<std::vector<tt_xy_pair>> dram_cores; // per channel list of dram cores
std::unordered_map<tt_xy_pair, std::tuple<int, int>> dram_core_channel_map; // map dram core to chan/subchan
std::vector<tt_xy_pair> ethernet_cores; // ethernet cores (index == channel id)
std::unordered_map<tt_xy_pair, int> ethernet_core_channel_map;
std::vector<std::size_t> trisc_sizes; // Most of software stack assumes same trisc size for whole chip..
std::string device_descriptor_file_path = std::string("");

int overlay_version;
int unpacker_version;
int dst_size_alignment;
int packer_version;
int worker_l1_size;
int eth_l1_size;
bool noc_translation_id_enabled;
uint64_t dram_bank_size;

private:
void create_coordinate_manager(const std::size_t tensix_harvesting_mask, const std::size_t dram_harvesting_mask);
void load_core_descriptors_from_device_descriptor(YAML::Node &device_descriptor_yaml);
void load_soc_features_from_device_descriptor(YAML::Node &device_descriptor_yaml);
void get_cores_and_grid_size_from_coordinate_manager();

static tt_xy_pair calculate_grid_size(const std::vector<tt_xy_pair> &cores);

// TODO: change this to unique pointer as soon as copying of tt_SocDescriptor
// is not needed anymore. Soc descriptor and coordinate manager should be
// created once per chip.
std::shared_ptr<CoordinateManager> coordinate_manager = nullptr;
std::map<CoreType, std::vector<tt::umd::CoreCoord>> cores_map;
std::map<CoreType, tt_xy_pair> grid_size_map;
std::map<CoreType, std::vector<tt::umd::CoreCoord>> harvested_cores_map;
std::map<CoreType, tt_xy_pair> harvested_grid_size_map;
};

// Allocates a new soc descriptor on the heap. Returns an owning pointer.
Expand Down
95 changes: 95 additions & 0 deletions device/blackhole/blackhole_coordinate_manager.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -180,6 +180,25 @@ void BlackholeCoordinateManager::map_column_of_dram_banks(
}

void BlackholeCoordinateManager::fill_dram_physical_translated_mapping() {
if (dram_grid_size.x < blackhole::NUM_DRAM_BANKS) {
// If the number of DRAM banks is less than num dram banks for standard SOC for Blackhole,
// map the translated DRAM cores to be the same as physical DRAM cores.
// TODO: Figure out how DRAM is going to be mapped to translated coordinates when there is less DRAM banks.
for (size_t x = 0; x < dram_grid_size.x; x++) {
for (size_t y = 0; y < dram_grid_size.y; y++) {
const CoreCoord logical_dram_core = CoreCoord(x, y, CoreType::DRAM, CoordSystem::LOGICAL);
const tt_xy_pair physical_dram_core = to_physical_map[logical_dram_core];

CoreCoord translated_dram_core =
CoreCoord(physical_dram_core.x, physical_dram_core.y, CoreType::DRAM, CoordSystem::TRANSLATED);
to_physical_map[translated_dram_core] = physical_dram_core;
from_physical_map[{{physical_dram_core.x, physical_dram_core.y}, CoordSystem::TRANSLATED}] =
translated_dram_core;
}
}
return;
}

const std::vector<size_t> harvested_banks = CoordinateManager::get_harvested_indices(dram_harvesting_mask);

if (harvested_banks.empty()) {
Expand Down Expand Up @@ -234,3 +253,79 @@ void BlackholeCoordinateManager::fill_dram_physical_translated_mapping() {
add_core_translation(translated_coord, physical_core);
}
}

std::vector<CoreCoord> BlackholeCoordinateManager::get_tensix_cores() const {
std::vector<size_t> harvested_x_coords = get_harvested_indices(tensix_harvesting_mask);
std::vector<CoreCoord> unharvested_tensix_cores;
for (size_t y = 0; y < tensix_grid_size.y; y++) {
for (size_t x = 0; x < tensix_grid_size.x; x++) {
const tt_xy_pair core = tensix_cores[y * tensix_grid_size.x + x];
CoreCoord core_coord(core.x, core.y, CoreType::TENSIX, CoordSystem::PHYSICAL);
if (std::find(harvested_x_coords.begin(), harvested_x_coords.end(), x) == harvested_x_coords.end()) {
unharvested_tensix_cores.push_back(core_coord);
}
}
}
return unharvested_tensix_cores;
}

std::vector<CoreCoord> BlackholeCoordinateManager::get_harvested_tensix_cores() const {
std::vector<size_t> harvested_x_coords = get_harvested_indices(tensix_harvesting_mask);
std::vector<CoreCoord> harvested_tensix_cores;
for (size_t y = 0; y < tensix_grid_size.y; y++) {
for (size_t x = 0; x < tensix_grid_size.x; x++) {
const tt_xy_pair core = tensix_cores[y * tensix_grid_size.x + x];
CoreCoord core_coord(core.x, core.y, CoreType::TENSIX, CoordSystem::PHYSICAL);
if (std::find(harvested_x_coords.begin(), harvested_x_coords.end(), x) != harvested_x_coords.end()) {
harvested_tensix_cores.push_back(core_coord);
}
}
}
return harvested_tensix_cores;
}

std::vector<CoreCoord> BlackholeCoordinateManager::get_dram_cores() const {
std::vector<size_t> harvested_banks = get_harvested_indices(dram_harvesting_mask);
std::vector<CoreCoord> unharvested_dram_cores;
for (size_t x = 0; x < dram_grid_size.x; x++) {
if (std::find(harvested_banks.begin(), harvested_banks.end(), x) == harvested_banks.end()) {
for (size_t y = 0; y < dram_grid_size.y; y++) {
const tt_xy_pair core = dram_cores[x * dram_grid_size.y + y];
CoreCoord core_coord(core.x, core.y, CoreType::DRAM, CoordSystem::PHYSICAL);
unharvested_dram_cores.push_back(core_coord);
}
}
}
return unharvested_dram_cores;
}

std::vector<CoreCoord> BlackholeCoordinateManager::get_harvested_dram_cores() const {
std::vector<size_t> harvested_banks = get_harvested_indices(dram_harvesting_mask);
std::vector<CoreCoord> harvested_dram_cores;
for (size_t x = 0; x < dram_grid_size.x; x++) {
if (std::find(harvested_banks.begin(), harvested_banks.end(), x) != harvested_banks.end()) {
for (size_t y = 0; y < dram_grid_size.y; y++) {
const tt_xy_pair core = dram_cores[x * dram_grid_size.y + y];
CoreCoord core_coord(core.x, core.y, CoreType::DRAM, CoordSystem::PHYSICAL);
harvested_dram_cores.push_back(core_coord);
}
}
}
return harvested_dram_cores;
}

tt_xy_pair BlackholeCoordinateManager::get_harvested_tensix_grid_size() const {
return {CoordinateManager::get_num_harvested(tensix_harvesting_mask), tensix_grid_size.y};
}

tt_xy_pair BlackholeCoordinateManager::get_harvested_dram_grid_size() const {
return {CoordinateManager::get_num_harvested(dram_harvesting_mask), dram_grid_size.y};
}

tt_xy_pair BlackholeCoordinateManager::get_tensix_grid_size() const {
return {tensix_grid_size.x - CoordinateManager::get_num_harvested(tensix_harvesting_mask), tensix_grid_size.y};
}

tt_xy_pair BlackholeCoordinateManager::get_dram_grid_size() const {
return {dram_grid_size.x - CoordinateManager::get_num_harvested(dram_harvesting_mask), dram_grid_size.y};
}
Loading

0 comments on commit 883edcd

Please sign in to comment.