Skip to content

Commit

Permalink
detect_arch in ClusterDescriptor (#345)
Browse files Browse the repository at this point in the history
### Issue
Related to #99 
Aftermath of #175 

### Description
Make detect_arch available through cluster_descriptor which should be
the only real endpoint when reading information about chips in the
cluster.

### List of the changes
- Removed detect_arch from cluster.h
- Added arch map in ClusterDescriptor
- Added static tt_ClusterDescriptor::detect_arch which should be used
instead of previously defined detect_arch

### Testing
Wrote a test which verifies this works as intended

### API Changes
This PR has API changes:
- [x] tt_metal approved PR pointing to this branch:
tenstorrent/tt-metal#15564
- [x] tt_debuda approved PR pointing to this branch:
tenstorrent/tt-lens#201
  • Loading branch information
broskoTT authored Dec 4, 2024
1 parent 48cae2b commit dca4e49
Show file tree
Hide file tree
Showing 5 changed files with 64 additions and 57 deletions.
5 changes: 0 additions & 5 deletions device/api/umd/device/cluster.h
Original file line number Diff line number Diff line change
Expand Up @@ -24,11 +24,6 @@

using TLB_DATA = tt::umd::tlb_data;

// TODO: Remove this - it's here for Metal backwards compatibility.
// Implementation is in cluster.cpp.
tt::ARCH detect_arch(int pci_device_num);
tt::ARCH detect_arch();

namespace boost::interprocess {
class named_mutex;
}
Expand Down
6 changes: 5 additions & 1 deletion device/api/umd/device/tt_cluster_descriptor.h
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ enum BoardType : uint32_t {
E150 = 2,
P150A = 3,
GALAXY = 4,
DEFAULT = 5,
UNKNOWN = 5,
};

class tt_ClusterDescriptor {
Expand All @@ -53,6 +53,7 @@ class tt_ClusterDescriptor {
std::unordered_map<chip_id_t, chip_id_t> closest_mmio_chip_cache = {};
std::unordered_map<chip_id_t, BoardType> chip_board_type = {};
std::unordered_map<chip_id_t, std::unordered_set<chip_id_t>> chips_grouped_by_closest_mmio;
std::unordered_map<chip_id_t, tt::ARCH> chip_arch = {};

// one-to-many chip connections
struct Chip2ChipConnection {
Expand All @@ -77,6 +78,7 @@ class tt_ClusterDescriptor {
static void load_harvesting_information(YAML::Node &yaml, tt_ClusterDescriptor &desc);

void fill_chips_grouped_by_closest_mmio();
static tt::ARCH arch_from_string(std::string arch_str);

public:
/*
Expand All @@ -96,6 +98,7 @@ class tt_ClusterDescriptor {
static std::string get_cluster_descriptor_file_path();
static std::unique_ptr<tt_ClusterDescriptor> create_from_yaml(const std::string &cluster_descriptor_file_path);
static std::unique_ptr<tt_ClusterDescriptor> create();
static tt::ARCH detect_arch(const chip_id_t chip_id);

// This function is used to create mock cluster descriptor yaml files, for example for simulation.
static std::unique_ptr<tt_ClusterDescriptor> create_mock_cluster(
Expand All @@ -115,6 +118,7 @@ class tt_ClusterDescriptor {
int get_ethernet_link_distance(chip_id_t chip_a, chip_id_t chip_b) const;

BoardType get_board_type(chip_id_t chip_id) const;
tt::ARCH get_arch(chip_id_t chip_id) const;

bool ethernet_core_has_active_ethernet_link(chip_id_t local_chip, ethernet_channel_t local_ethernet_channel) const;
std::tuple<chip_id_t, ethernet_channel_t> get_chip_and_channel_of_remote_ethernet_core(
Expand Down
33 changes: 0 additions & 33 deletions device/cluster.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -59,39 +59,6 @@ const uint64_t BH_4GB_TLB_SIZE = 4ULL * 1024 * 1024 * 1024;
// Remove 256MB from full 1GB for channel 3 (iATU limitation)
static constexpr uint32_t HUGEPAGE_CHANNEL_3_SIZE_LIMIT = 805306368;

// TODO: Remove in favor of cluster descriptor method, when it becomes available.
// Metal uses this function to determine the architecture of the first PCIe chip
// and then verifies that all subsequent chips are of the same architecture. It
// looks like Metal is doing this because we don't provide any other way... When
// we are further along in our refactoring efforts and `tt_device` is more of a
// Cluster abstraction, we should provide Metal with interfaces for:
// 1. Checking that all chips are of the same architecture (we may not care
// about this, but the application might).
// 2. Getting the architecture of a specific chip.
// Until then... I'm putting this function back so that Metal will still build
// next time someone bumps its UMD submodule version.
tt::ARCH detect_arch(int pci_device_num) {
const auto devices_info = PCIDevice::enumerate_devices_info();
const auto it = devices_info.find(pci_device_num);
if (it == devices_info.end()) {
return tt::ARCH::Invalid;
}

const auto info = it->second;
return info.get_arch();
}

// TODO: Remove in favor of cluster descriptor method, when it becomes available.
// There is also a function which just wants to get any architecture, since it
// presumably already checked that all archs are the same.
tt::ARCH detect_arch() {
const auto devices_info = PCIDevice::enumerate_devices_info();
if (devices_info.empty()) {
return tt::ARCH::Invalid;
}
return devices_info.begin()->second.get_arch();
}

template <typename T>
void size_buffer_to_capacity(std::vector<T>& data_buf, std::size_t size_in_bytes) {
std::size_t target_size = 0;
Expand Down
41 changes: 36 additions & 5 deletions device/tt_cluster_descriptor.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -467,6 +467,7 @@ std::unique_ptr<tt_ClusterDescriptor> tt_ClusterDescriptor::create_mock_cluster(
log_debug(tt::LogSiliconDriver, "{} - adding logical: {}", __FUNCTION__, logical_id);
desc->chip_board_type.insert({logical_id, board_type});
desc->chips_with_mmio.insert({logical_id, logical_id});
desc->chip_arch.insert({logical_id, arch});
}

desc->enable_all_devices();
Expand Down Expand Up @@ -693,7 +694,9 @@ void tt_ClusterDescriptor::merge_cluster_ids(tt_ClusterDescriptor &desc) {
void tt_ClusterDescriptor::load_chips_from_connectivity_descriptor(YAML::Node &yaml, tt_ClusterDescriptor &desc) {
for (YAML::const_iterator node = yaml["arch"].begin(); node != yaml["arch"].end(); ++node) {
chip_id_t chip_id = node->first.as<int>();
std::string arch_str = node->second.as<std::string>();
desc.all_chips.insert(chip_id);
desc.chip_arch.insert({chip_id, arch_from_string(arch_str)});
}

for (YAML::const_iterator node = yaml["chips"].begin(); node != yaml["chips"].end(); ++node) {
Expand Down Expand Up @@ -747,15 +750,15 @@ void tt_ClusterDescriptor::load_chips_from_connectivity_descriptor(YAML::Node &y
log_warning(
LogSiliconDriver,
"Unknown board type for chip {}. This might happen because chip is running old firmware. "
"Defaulting to DEFAULT",
"Defaulting to UNKNOWN",
chip);
board_type = BoardType::DEFAULT;
board_type = BoardType::UNKNOWN;
}
desc.chip_board_type.insert({chip, board_type});
}
} else {
for (const auto &chip : desc.all_chips) {
desc.chip_board_type.insert({chip, BoardType::DEFAULT});
desc.chip_board_type.insert({chip, BoardType::UNKNOWN});
}
}
}
Expand All @@ -781,6 +784,19 @@ void tt_ClusterDescriptor::fill_chips_grouped_by_closest_mmio() {
}
}

tt::ARCH tt_ClusterDescriptor::arch_from_string(std::string arch_str) {
if (arch_str == "Grayskull") {
return tt::ARCH::GRAYSKULL;
}
if (arch_str == "Wormhole") {
return tt::ARCH::WORMHOLE_B0;
}
if (arch_str == "Blackhole") {
return tt::ARCH::BLACKHOLE;
}
return tt::ARCH::Invalid;
}

const std::unordered_map<chip_id_t, std::unordered_map<ethernet_channel_t, std::tuple<chip_id_t, ethernet_channel_t>>>
tt_ClusterDescriptor::get_ethernet_connections() const {
auto eth_connections = std::
Expand Down Expand Up @@ -856,8 +872,23 @@ int tt_ClusterDescriptor::get_ethernet_link_distance(chip_id_t chip_a, chip_id_t
}

BoardType tt_ClusterDescriptor::get_board_type(chip_id_t chip_id) const {
BoardType board_type = this->chip_board_type.at(chip_id);
return board_type;
log_assert(
chip_board_type.find(chip_id) != chip_board_type.end(),
"Chip {} does not have a board type in the cluster descriptor",
chip_id);
return chip_board_type.at(chip_id);
}

tt::ARCH tt_ClusterDescriptor::get_arch(chip_id_t chip_id) const {
log_assert(
chip_arch.find(chip_id) != chip_arch.end(),
"Chip {} does not have an architecture in the cluster descriptor",
chip_id);
return chip_arch.at(chip_id);
}

/* static */ tt::ARCH tt_ClusterDescriptor::detect_arch(chip_id_t chip_id) {
return tt_ClusterDescriptor::create()->get_arch(chip_id);
}

const std::unordered_map<chip_id_t, std::unordered_set<chip_id_t>> &
Expand Down
36 changes: 23 additions & 13 deletions tests/api/test_cluster_descriptor.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -13,25 +13,35 @@
#include "umd/device/pci_device.hpp"
#include "umd/device/tt_cluster_descriptor.h"

// TODO: Needed for detect_arch, remove when it is part of cluster descriptor.
#include "umd/device/cluster.h"

TEST(ApiClusterDescriptorTest, DetectArch) {
// TODO: This should be part of cluster descriptor. It is currently used like this from tt_metal.
tt::ARCH arch = detect_arch();
std::unique_ptr<tt_ClusterDescriptor> cluster_desc = tt_ClusterDescriptor::create();

// Expect it to be invalid if no devices are found.
if (PCIDevice::enumerate_devices().empty()) {
EXPECT_EQ(arch, tt::ARCH::Invalid);
if (cluster_desc->get_number_of_chips() == 0) {
// Expect it to be invalid if no devices are found.
EXPECT_THROW(tt_ClusterDescriptor::detect_arch(0), std::runtime_error);
} else {
tt::ARCH arch = tt_ClusterDescriptor::detect_arch(0);
EXPECT_NE(arch, tt::ARCH::Invalid);

// TODO: This should be the only available API, previous call should be routed to this one to get any arch.
tt::ARCH arch2 = detect_arch(PCIDevice::enumerate_devices()[0]);
EXPECT_NE(arch2, tt::ARCH::Invalid);
// Test that cluster descriptor and PCIDevice::enumerate_devices_info() return the same set of chips.
std::map<int, PciDeviceInfo> pci_device_infos = PCIDevice::enumerate_devices_info();
std::unordered_set<chip_id_t> pci_chips_set;
for (auto [pci_device_number, _] : pci_device_infos) {
pci_chips_set.insert(pci_device_number);
}

// In our current setup, we expect all arch to be the same.
EXPECT_EQ(arch, arch2);
std::unordered_map<chip_id_t, chip_id_t> chips_with_mmio = cluster_desc->get_chips_with_mmio();
std::unordered_set<chip_id_t> cluster_chips_set;
for (auto [_, pci_device_number] : chips_with_mmio) {
cluster_chips_set.insert(pci_device_number);
}

EXPECT_EQ(pci_chips_set, cluster_chips_set);

// Test that cluster descriptor holds the same arch as pci_device.
for (auto [chip, pci_device_number] : cluster_desc->get_chips_with_mmio()) {
EXPECT_EQ(cluster_desc->get_arch(chip), pci_device_infos.at(pci_device_number).get_arch());
}
}
}

Expand Down

0 comments on commit dca4e49

Please sign in to comment.