Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Make cluster_id deterministic #334

Merged
merged 1 commit into from
Nov 26, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion common/disjoint_set.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ class DisjointSet {
void merge(T item1, T item2) {
T set1 = get_set(item1);
T set2 = get_set(item2);
parent[set1] = set2;
parent[set1] = parent[set2] = std::min(set1, set2);
}

bool are_same_set(T item1, T item2) { return get_set(item1) == get_set(item2); }
Expand Down
18 changes: 10 additions & 8 deletions tests/api/test_cluster.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -59,12 +59,12 @@ TEST(ApiClusterTest, OpenAllChips) { std::unique_ptr<Cluster> umd_cluster = get_
TEST(ApiClusterTest, SimpleIOAllChips) {
std::unique_ptr<Cluster> umd_cluster = get_cluster();

const tt_ClusterDescriptor* cluster_desc = umd_cluster->get_cluster_description();

if (umd_cluster == nullptr || umd_cluster->get_all_chips_in_cluster().empty()) {
GTEST_SKIP() << "No chips present on the system. Skipping test.";
}

const tt_ClusterDescriptor* cluster_desc = umd_cluster->get_cluster_description();

// Initialize random data.
size_t data_size = 1024;
std::vector<uint8_t> data(data_size, 0);
Expand Down Expand Up @@ -117,12 +117,12 @@ TEST(ApiClusterTest, SimpleIOAllChips) {
TEST(ApiClusterTest, RemoteFlush) {
std::unique_ptr<Cluster> umd_cluster = get_cluster();

const tt_ClusterDescriptor* cluster_desc = umd_cluster->get_cluster_description();

if (umd_cluster == nullptr || umd_cluster->get_all_chips_in_cluster().empty()) {
GTEST_SKIP() << "No chips present on the system. Skipping test.";
}

const tt_ClusterDescriptor* cluster_desc = umd_cluster->get_cluster_description();

size_t data_size = 1024;
std::vector<uint8_t> data(data_size, 0);

Expand Down Expand Up @@ -175,14 +175,16 @@ TEST(ApiClusterTest, RemoteFlush) {
}

TEST(ApiClusterTest, SimpleIOSpecificChips) {
std::vector<int> pci_device_ids = PCIDevice::enumerate_devices();
// TODO: Make this test work on a host system without any tt devices.
if (pci_device_ids.empty()) {
GTEST_SKIP() << "No chips present on the system. Skipping test.";
}

std::unique_ptr<Cluster> umd_cluster = std::make_unique<Cluster>(0);

const tt_ClusterDescriptor* cluster_desc = umd_cluster->get_cluster_description();

if (umd_cluster == nullptr || umd_cluster->get_all_chips_in_cluster().empty()) {
GTEST_SKIP() << "No chips present on the system. Skipping test.";
}

// Initialize random data.
size_t data_size = 1024;
std::vector<uint8_t> data(data_size, 0);
Expand Down
11 changes: 10 additions & 1 deletion tests/api/test_cluster_descriptor.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -99,6 +99,16 @@ TEST(ApiClusterDescriptorTest, TestAllOfflineClusterDescriptors) {

std::unordered_map<chip_id_t, std::unordered_set<chip_id_t>> chips_grouped_by_closest_mmio =
cluster_desc->get_chips_grouped_by_closest_mmio();

// Check that cluster_id is always the same for the same cluster.
// Cluster id takes the value of the smallest chip_id in the cluster.
for (auto const &[chip, coord] : eth_chip_coords) {
if (cluster_desc_yaml != "wormhole_2xN300_unconnected.yaml") {
EXPECT_EQ(coord.cluster_id, 0);
} else {
EXPECT_TRUE(coord.cluster_id == 0 || coord.cluster_id == 1);
}
}
}
}

Expand All @@ -125,7 +135,6 @@ TEST(ApiClusterDescriptorTest, SeparateClusters) {
std::cout << "Detected " << chip_clusters.get_num_sets() << " separate clusters." << std::endl;

// Check that get_closes_mmio_capable_chip works.
// Currently, it is expected that the following fails if there is more than 1 cluster.
for (auto chip : all_chips) {
chip_id_t closest_mmio_chip = cluster_desc->get_closest_mmio_capable_chip(chip);
EXPECT_TRUE(chip_clusters.are_same_set(chip, closest_mmio_chip));
Expand Down
Loading