Skip to content

Commit

Permalink
[Harvesting] noc_translation_enabled added to tt_SocDescriptor (#443)
Browse files Browse the repository at this point in the history
### Issue
Related to #439 

### Description
The last missing feature of Coordinate Manager is noc_translation
enabled feature. If this is disabled, translated coords will be same as
physical coords.

### List of the changes
- Add bool noc_translation_enabled to CoordinateManager constructor
- Added default versions of translate-physical mapping, which do
identity mapping.
- Added a test to test if this flag works
- The translation flag is now required, so added it explicitly to tests
where needed.

### Testing
Added another test, existing CI tests.

### API Changes
There are no API changes in this PR.
  • Loading branch information
broskoTT authored Jan 10, 2025
1 parent 2cfb641 commit 0f2f8d8
Show file tree
Hide file tree
Showing 20 changed files with 288 additions and 123 deletions.
6 changes: 4 additions & 2 deletions device/api/umd/device/blackhole_coordinate_manager.h
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
class BlackholeCoordinateManager : public CoordinateManager {
public:
BlackholeCoordinateManager(
const bool noc_translation_enabled,
const tt_xy_pair& tensix_grid_size,
const std::vector<tt_xy_pair>& tensix_cores,
const size_t tensix_harvesting_mask,
Expand All @@ -29,14 +30,15 @@ class BlackholeCoordinateManager : public CoordinateManager {
protected:
void assert_coordinate_manager_constructor() override;

void translate_dram_coords() override;
void translate_tensix_coords() override;
void translate_dram_coords() override;
void translate_eth_coords() override;

void fill_tensix_physical_translated_mapping() override;
void fill_dram_physical_translated_mapping() override;
void fill_eth_physical_translated_mapping() override;
void fill_pcie_physical_translated_mapping() override;
void fill_dram_physical_translated_mapping() override;
void fill_arc_physical_translated_mapping() override;

std::vector<tt::umd::CoreCoord> get_tensix_cores() const override;
std::vector<tt::umd::CoreCoord> get_harvested_tensix_cores() const override;
Expand Down
10 changes: 10 additions & 0 deletions device/api/umd/device/cluster.h
Original file line number Diff line number Diff line change
Expand Up @@ -868,8 +868,16 @@ class Cluster : public tt_device {

// This functions has to be called for local chip, and then it will wait for all connected remote chips to flush.
void wait_for_connected_non_mmio_flush(chip_id_t chip_id);

// Helper functions for constructing the chips from the cluster descriptor.
std::unique_ptr<Chip> construct_chip_from_cluster(
chip_id_t chip_id, tt_ClusterDescriptor* cluster_desc, tt_SocDescriptor& soc_desc);
std::unique_ptr<Chip> construct_chip_from_cluster(
const std::string& soc_desc_path,
chip_id_t chip_id,
tt_ClusterDescriptor* cluster_desc,
bool perform_harvesting,
std::unordered_map<chip_id_t, uint32_t>& simulated_harvesting_masks);
std::unique_ptr<Chip> construct_chip_from_cluster(
chip_id_t logical_device_id,
tt_ClusterDescriptor* cluster_desc,
Expand All @@ -887,6 +895,8 @@ class Cluster : public tt_device {
const bool clean_system_resources,
bool perform_harvesting,
std::unordered_map<chip_id_t, uint32_t> simulated_harvesting_masks);

// Helper function for translating chip coordinates.
tt::umd::CoreCoord translate_chip_coord(
const chip_id_t chip, const tt::umd::CoreCoord core_coord, const CoordSystem coord_system) const;

Expand Down
31 changes: 25 additions & 6 deletions device/api/umd/device/coordinate_manager.h
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ class CoordinateManager {
*/
static std::shared_ptr<CoordinateManager> create_coordinate_manager(
tt::ARCH arch,
const bool noc_translation_enabled,
const tt_xy_pair& tensix_grid_size,
const std::vector<tt_xy_pair>& tensix_cores,
const size_t tensix_harvesting_mask,
Expand All @@ -40,6 +41,7 @@ class CoordinateManager {

static std::shared_ptr<CoordinateManager> create_coordinate_manager(
tt::ARCH arch,
const bool noc_translation_enabled,
const size_t tensix_harvesting_mask = 0,
const size_t dram_harvesting_mask = 0,
const size_t eth_harvesting_mask = 0,
Expand Down Expand Up @@ -88,6 +90,7 @@ class CoordinateManager {
* layout of the tensix cores.
*/
CoordinateManager(
const bool noc_translation_enabled,
const tt_xy_pair& tensix_grid_size,
const std::vector<tt_xy_pair>& tensix_cores,
const size_t tensix_harvesting_mask,
Expand Down Expand Up @@ -129,13 +132,23 @@ class CoordinateManager {
virtual tt_xy_pair get_harvested_eth_grid_size() const;

/*
* Fills the logical to translated mapping for the tensix cores.
* By default, translated coordinates are the same as physical coordinates.
* This will be true for all architectures if noc_translation_enabled is false.
*/
void fill_tensix_default_physical_translated_mapping();
void fill_eth_default_physical_translated_mapping();
void fill_dram_default_physical_translated_mapping();
void fill_pcie_default_physical_translated_mapping();
void fill_arc_default_physical_translated_mapping();

/*
* Fills the physical to translated mapping for the tensix cores.
* By default, translated coordinates are the same as physical coordinates.
* Derived coordinate managers that need to implement different mapping
* should override this method. Wormhole and Blackhole coordinate managers
* override this method to implement different mapping.
*/
virtual void fill_tensix_physical_translated_mapping();
virtual void fill_tensix_physical_translated_mapping() = 0;

/*
* Fills the physical to translated mapping for the ethernet cores.
Expand All @@ -144,7 +157,7 @@ class CoordinateManager {
* should override this method. Wormhole and Blackhole coordinate managers
* override this method to implement different mapping.
*/
virtual void fill_eth_physical_translated_mapping();
virtual void fill_eth_physical_translated_mapping() = 0;

/*
* Fills the physical to translated mapping for the DRAM cores.
Expand All @@ -153,7 +166,7 @@ class CoordinateManager {
* should override this method. Blackhole coordinate manager overrides
* this method to implement different mapping.
*/
virtual void fill_dram_physical_translated_mapping();
virtual void fill_dram_physical_translated_mapping() = 0;

/*
* Fills the physical to translated mapping for the PCIE cores.
Expand All @@ -162,19 +175,25 @@ class CoordinateManager {
* should override this method. Blackhole coordinate manager overrides
* this method to implement different mapping.
*/
virtual void fill_pcie_physical_translated_mapping();
virtual void fill_pcie_physical_translated_mapping() = 0;

/*
* Fills the physical to translated mapping for the ARC cores.
* By default, translated coordinates are the same as physical coordinates.
* Derived coordinate managers that need to implement different mapping
* should override this method.
*/
virtual void fill_arc_physical_translated_mapping();
virtual void fill_arc_physical_translated_mapping() = 0;

std::map<tt::umd::CoreCoord, tt_xy_pair> to_physical_map;
std::map<std::pair<tt_xy_pair, CoordSystem>, tt::umd::CoreCoord> from_physical_map;

// Whether NOC translation is enabled on chip.
// This flag affects how Translated coords are calculated. If translation is enabled on the chip, than we can
// interface it with a coordinate system which abstracts away harvested cores. If it is not enabled, then we need to
// interface it with noc0 coordinates.
bool noc_translation_enabled;

tt_xy_pair tensix_grid_size;
const std::vector<tt_xy_pair> tensix_cores;
size_t tensix_harvesting_mask;
Expand Down
4 changes: 4 additions & 0 deletions device/api/umd/device/grayskull_coordinate_manager.h
Original file line number Diff line number Diff line change
Expand Up @@ -27,5 +27,9 @@ class GrayskullCoordinateManager : public CoordinateManager {
const std::vector<tt_xy_pair>& pcie_cores);

protected:
void fill_tensix_physical_translated_mapping() override;
void fill_eth_physical_translated_mapping() override;
void fill_dram_physical_translated_mapping() override;
void fill_pcie_physical_translated_mapping() override;
void fill_arc_physical_translated_mapping() override;
};
6 changes: 5 additions & 1 deletion device/api/umd/device/tt_soc_descriptor.h
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,7 @@ class tt_SocDescriptor {
// Constructor used to build object from device descriptor file.
tt_SocDescriptor(
std::string device_descriptor_path,
const bool noc_translation_enabled,
const size_t tensix_harvesting_mask = 0,
const size_t dram_harvesting_mask = 0,
const size_t eth_harvesting_mask = 0);
Expand Down Expand Up @@ -114,7 +115,10 @@ class tt_SocDescriptor {

private:
void create_coordinate_manager(
const size_t tensix_harvesting_mask, const size_t dram_harvesting_mask, const size_t eth_harvesting_mask);
const bool noc_translation_enabled,
const size_t tensix_harvesting_mask,
const size_t dram_harvesting_mask,
const size_t eth_harvesting_mask);
void load_core_descriptors_from_device_descriptor(YAML::Node &device_descriptor_yaml);
void load_soc_features_from_device_descriptor(YAML::Node &device_descriptor_yaml);
void get_cores_and_grid_size_from_coordinate_manager();
Expand Down
4 changes: 4 additions & 0 deletions device/api/umd/device/wormhole_coordinate_manager.h
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
class WormholeCoordinateManager : public CoordinateManager {
public:
WormholeCoordinateManager(
const bool noc_translation_enabled,
const tt_xy_pair& tensix_grid_size,
const std::vector<tt_xy_pair>& tensix_cores,
const size_t tensix_harvesting_mask,
Expand All @@ -28,5 +29,8 @@ class WormholeCoordinateManager : public CoordinateManager {

protected:
void fill_tensix_physical_translated_mapping() override;
void fill_dram_physical_translated_mapping() override;
void fill_eth_physical_translated_mapping() override;
void fill_pcie_physical_translated_mapping() override;
void fill_arc_physical_translated_mapping() override;
};
25 changes: 22 additions & 3 deletions device/blackhole/blackhole_coordinate_manager.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
using namespace tt::umd;

BlackholeCoordinateManager::BlackholeCoordinateManager(
const bool noc_translation_enabled,
const tt_xy_pair& tensix_grid_size,
const std::vector<tt_xy_pair>& tensix_cores,
const size_t tensix_harvesting_mask,
Expand All @@ -24,6 +25,7 @@ BlackholeCoordinateManager::BlackholeCoordinateManager(
const tt_xy_pair& pcie_grid_size,
const std::vector<tt_xy_pair>& pcie_cores) :
CoordinateManager(
noc_translation_enabled,
tensix_grid_size,
tensix_cores,
tensix_harvesting_mask,
Expand Down Expand Up @@ -89,7 +91,11 @@ void BlackholeCoordinateManager::translate_tensix_coords() {
}
}

fill_tensix_physical_translated_mapping();
if (noc_translation_enabled) {
fill_tensix_physical_translated_mapping();
} else {
fill_tensix_default_physical_translated_mapping();
}
}

void BlackholeCoordinateManager::fill_tensix_physical_translated_mapping() {
Expand Down Expand Up @@ -146,7 +152,11 @@ void BlackholeCoordinateManager::translate_dram_coords() {
}
}

fill_dram_physical_translated_mapping();
if (noc_translation_enabled) {
fill_dram_physical_translated_mapping();
} else {
fill_dram_default_physical_translated_mapping();
}
}

void BlackholeCoordinateManager::translate_eth_coords() {
Expand Down Expand Up @@ -184,7 +194,11 @@ void BlackholeCoordinateManager::translate_eth_coords() {
}
}

fill_eth_physical_translated_mapping();
if (noc_translation_enabled) {
fill_eth_physical_translated_mapping();
} else {
fill_eth_default_physical_translated_mapping();
}
}

void BlackholeCoordinateManager::fill_eth_physical_translated_mapping() {
Expand Down Expand Up @@ -231,6 +245,11 @@ void BlackholeCoordinateManager::fill_pcie_physical_translated_mapping() {
add_core_translation(translated_coord, physical_pair);
}

void BlackholeCoordinateManager::fill_arc_physical_translated_mapping() {
// ARC cores are not translated in Blackhole.
fill_arc_default_physical_translated_mapping();
}

void BlackholeCoordinateManager::map_column_of_dram_banks(
const size_t start_bank, const size_t end_bank, const size_t x_coord) {
size_t translated_y = blackhole::dram_translated_coordinate_start_y;
Expand Down
34 changes: 23 additions & 11 deletions device/cluster.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -436,19 +436,30 @@ std::unique_ptr<Chip> Cluster::construct_chip_from_cluster(
}

std::unique_ptr<Chip> Cluster::construct_chip_from_cluster(
const std::string& soc_desc_path,
chip_id_t chip_id,
tt_ClusterDescriptor* cluster_desc,
bool perform_harvesting,
std::unordered_map<chip_id_t, uint32_t>& simulated_harvesting_masks) {
tt::ARCH arch = cluster_desc->get_arch(chip_id);
const BoardType chip_board_type = cluster_desc->get_board_type(chip_id);
std::string soc_desc_path = tt_SocDescriptor::get_soc_descriptor_path(arch, chip_board_type);
uint32_t tensix_harvesting_mask =
get_tensix_harvesting_mask(chip_id, cluster_desc, perform_harvesting, simulated_harvesting_masks);
tt_SocDescriptor soc_desc = tt_SocDescriptor(soc_desc_path, tensix_harvesting_mask);
tt_SocDescriptor soc_desc = tt_SocDescriptor(
soc_desc_path, cluster_desc->get_noc_translation_table_en().at(chip_id), tensix_harvesting_mask);
return construct_chip_from_cluster(chip_id, cluster_desc, soc_desc);
}

std::unique_ptr<Chip> Cluster::construct_chip_from_cluster(
chip_id_t chip_id,
tt_ClusterDescriptor* cluster_desc,
bool perform_harvesting,
std::unordered_map<chip_id_t, uint32_t>& simulated_harvesting_masks) {
tt::ARCH arch = cluster_desc->get_arch(chip_id);
const BoardType chip_board_type = cluster_desc->get_board_type(chip_id);
std::string soc_desc_path = tt_SocDescriptor::get_soc_descriptor_path(arch, chip_board_type);
return construct_chip_from_cluster(
soc_desc_path, chip_id, cluster_desc, perform_harvesting, simulated_harvesting_masks);
}

void Cluster::add_chip(chip_id_t chip_id, std::unique_ptr<Chip> chip) {
log_assert(
chips_.find(chip_id) == chips_.end(),
Expand Down Expand Up @@ -567,16 +578,16 @@ Cluster::Cluster(
cluster_desc->get_all_chips().find(chip_id) != cluster_desc->get_all_chips().end(),
"Target device {} not present in current cluster!",
chip_id);
size_t tensix_harvesting_mask =
get_tensix_harvesting_mask(chip_id, cluster_desc.get(), perform_harvesting, simulated_harvesting_masks);
tt_SocDescriptor soc_desc = tt_SocDescriptor(sdesc_path, tensix_harvesting_mask);
add_chip(
chip_id,
construct_chip_from_cluster(
sdesc_path, chip_id, cluster_desc.get(), perform_harvesting, simulated_harvesting_masks));
log_assert(
cluster_desc->get_arch(chip_id) == soc_desc.arch,
cluster_desc->get_arch(chip_id) == chips_.at(chip_id)->get_soc_descriptor().arch,
"Passed soc descriptor has {} arch, but for chip id {} has arch {}",
arch_to_str(soc_desc.arch),
arch_to_str(chips_.at(chip_id)->get_soc_descriptor().arch),
chip_id,
arch_to_str(cluster_desc->get_arch(chip_id)));
add_chip(chip_id, construct_chip_from_cluster(chip_id, cluster_desc.get(), soc_desc));
}

// TODO: work on removing this member altogether. Currently assumes all have the same arch.
Expand Down Expand Up @@ -623,7 +634,8 @@ Cluster::Cluster(
// rather than ClusterDescriptor.
tt::ARCH arch = tt::ARCH::GRAYSKULL;
chip_id_t mock_chip_id = 0;
tt_SocDescriptor soc_desc = tt_SocDescriptor(tt_SocDescriptor::get_soc_descriptor_path(arch, BoardType::UNKNOWN));
tt_SocDescriptor soc_desc =
tt_SocDescriptor(tt_SocDescriptor::get_soc_descriptor_path(arch, BoardType::UNKNOWN), false);
std::unique_ptr<Chip> chip = std::make_unique<MockChip>(soc_desc);

std::unordered_map<chip_id_t, std::unique_ptr<Chip>> chips;
Expand Down
Loading

0 comments on commit 0f2f8d8

Please sign in to comment.