Skip to content

Commit

Permalink
Check KMD version prior to querying device info (#173)
Browse files Browse the repository at this point in the history
  • Loading branch information
joelsmithTT authored Nov 23, 2024
1 parent 59d5b51 commit c51e692
Show file tree
Hide file tree
Showing 7 changed files with 200 additions and 2 deletions.
4 changes: 4 additions & 0 deletions .github/workflows/run-tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -103,3 +103,7 @@ jobs:
- name: Run API tests
run: |
${{ env.TEST_OUTPUT_DIR }}/umd/api/api_tests
- name: Run MISC tests
run: |
${{ env.TEST_OUTPUT_DIR }}/umd/misc/umd_misc_tests
73 changes: 73 additions & 0 deletions common/semver.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,73 @@
/*
* SPDX-FileCopyrightText: (c) 2024 Tenstorrent Inc.
*
* SPDX-License-Identifier: Apache-2.0
*/

#pragma once

#include <cstdint>
#include <sstream>
#include <string>

#include "fmt/format.h"

namespace tt::umd {

/**
* Based on Semantic Versioning 2.0.0 (https://semver.org/) but more permissive.
* TT-KMD reports version strings that are technically not semver compliant.
*/

class semver_t {
public:
const uint64_t major;
const uint64_t minor;
const uint64_t patch;

semver_t(uint64_t major, uint64_t minor, uint64_t patch) : major(major), minor(minor), patch(patch) {}

semver_t(const std::string& version_str) : semver_t(parse(version_str)) {}

bool operator<(const semver_t& other) const {
return std::tie(major, minor, patch) < std::tie(other.major, other.minor, other.patch);
}

bool operator>(const semver_t& other) const { return other < *this; }

bool operator==(const semver_t& other) const {
return std::tie(major, minor, patch) == std::tie(other.major, other.minor, other.patch);
}

bool operator!=(const semver_t& other) const { return !(*this == other); }

bool operator<=(const semver_t& other) const { return !(other < *this); }

bool operator>=(const semver_t& other) const { return !(*this < other); }

std::string to_string() const { return fmt::format("{}.{}.{}", major, minor, patch); }

private:
static semver_t parse(const std::string& version_str) {
std::istringstream iss(version_str);
std::string token;
uint64_t major = 0;
uint64_t minor = 0;
uint64_t patch = 0;

if (std::getline(iss, token, '.')) {
major = std::stoull(token);

if (std::getline(iss, token, '.')) {
minor = std::stoull(token);

if (std::getline(iss, token, '.')) {
patch = std::stoull(token);
}
}
}
return semver_t(major, minor, patch);
}
};

} // namespace tt::umd
20 changes: 19 additions & 1 deletion device/pcie/pci_device.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -258,7 +258,10 @@ PCIDevice::PCIDevice(int pci_device_number, int logical_device_id) :
numa_node(read_sysfs<int>(info, "numa_node")),
revision(read_sysfs<int>(info, "revision")),
arch(detect_arch(info.device_id, revision)),
architecture_implementation(tt::umd::architecture_implementation::create(arch)) {
architecture_implementation(tt::umd::architecture_implementation::create(arch)),
kmd_version(read_kmd_version()) {
log_info(LogSiliconDriver, "Opened PCI device {}; KMD version: {}", pci_device_num, kmd_version.to_string());

struct {
tenstorrent_query_mappings query_mappings;
tenstorrent_mapping mapping_array[8];
Expand Down Expand Up @@ -809,3 +812,18 @@ void PCIDevice::print_file_contents(std::string filename, std::string hint) {
}
}
}

semver_t PCIDevice::read_kmd_version() {
static const std::string path = "/sys/module/tenstorrent/version";
std::ifstream file(path);

if (!file.is_open()) {
log_warning(LogSiliconDriver, "Failed to open file: {}", path);
return {0, 0, 0};
}

std::string version_str;
std::getline(file, version_str);

return semver_t(version_str);
}
11 changes: 10 additions & 1 deletion device/pcie/pci_device.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -12,10 +12,12 @@
#include <unordered_map>
#include <vector>

#include "common/semver.hpp"
#include "device/tlb.h"
#include "device/tt_arch_types.h"
#include "device/tt_cluster_descriptor_types.h"
#include "device/tt_xy_pair.h"
#include "fmt/format.h"

// TODO: this is used up in cluster.cpp but that logic ought to be
// lowered into the PCIDevice class since it is specific to PCIe cards.
Expand All @@ -31,7 +33,8 @@ constexpr unsigned int c_hang_read_value = 0xffffffffu;

namespace tt::umd {
class architecture_implementation;
}
struct semver_t;
} // namespace tt::umd

struct dynamic_tlb {
uint64_t bar_offset; // Offset that address is mapped to, within the PCI BAR.
Expand All @@ -57,6 +60,9 @@ struct PciDeviceInfo {
// onto this struct as methods? e.g. current_link_width etc.
};

// Do we want to put everything into this file into tt::umd namespace?
using tt::umd::semver_t;

class PCIDevice {
const std::string device_path; // Path to character device: /dev/tenstorrent/N
const int pci_device_num; // N in /dev/tenstorrent/N
Expand All @@ -66,6 +72,7 @@ class PCIDevice {
const int numa_node; // -1 if non-NUMA
const int revision; // PCI revision value from sysfs
const tt::ARCH arch; // e.g. Grayskull, Wormhole, Blackhole
const semver_t kmd_version; // KMD version
std::unique_ptr<tt::umd::architecture_implementation> architecture_implementation;

public:
Expand Down Expand Up @@ -230,5 +237,7 @@ class PCIDevice {
// For debug purposes when various stages fails.
void print_file_contents(std::string filename, std::string hint = "");

semver_t read_kmd_version();

std::vector<hugepage_mapping> hugepage_mapping_per_channel;
};
2 changes: 2 additions & 0 deletions tests/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,7 @@ endif()
add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/microbenchmark)
add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/api)
add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/pcie)
add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/misc)
add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/simulation)
if($ENV{ARCH_NAME} STREQUAL "wormhole_b0")
add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/wormhole)
Expand All @@ -82,4 +83,5 @@ add_custom_target(
simulation_tests
test_pcie_device
api_tests
umd_misc_tests
)
12 changes: 12 additions & 0 deletions tests/misc/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
set(UMD_MISC_TESTS_SRCS test_semver.cpp)

add_executable(umd_misc_tests ${UMD_MISC_TESTS_SRCS})
target_link_libraries(umd_misc_tests PRIVATE test_common)
set_target_properties(
umd_misc_tests
PROPERTIES
RUNTIME_OUTPUT_DIRECTORY
${CMAKE_BINARY_DIR}/test/umd/misc
OUTPUT_NAME
umd_misc_tests
)
80 changes: 80 additions & 0 deletions tests/misc/test_semver.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,80 @@
/*
* SPDX-FileCopyrightText: (c) 2024 Tenstorrent Inc.
*
* SPDX-License-Identifier: Apache-2.0
*/

#include <gtest/gtest.h>

#include <map>

#include "common/semver.hpp"

using tt::umd::semver_t;

TEST(Semver, Valid) {
const std::map<std::string, semver_t> valid_test_cases = {
{"1.29", semver_t(1, 29, 0)}, // technically invalid, but seen from TT-KMD
{"1.28-bh2", semver_t(1, 28, 0)}, // technically invalid, but seen from TT-KMD
{"0.0.4", semver_t(0, 0, 4)},
{"1.2.3", semver_t(1, 2, 3)},
{"10.20.30", semver_t(10, 20, 30)},
{"1.1.2-prerelease+meta", semver_t(1, 1, 2)},
{"1.1.2+meta", semver_t(1, 1, 2)},
{"1.1.2+meta-valid", semver_t(1, 1, 2)},
{"1.0.0-alpha", semver_t(1, 0, 0)},
{"1.0.0-beta", semver_t(1, 0, 0)},
{"1.0.0-alpha.beta", semver_t(1, 0, 0)},
{"1.0.0-alpha.beta.1", semver_t(1, 0, 0)},
{"1.0.0-alpha.1", semver_t(1, 0, 0)},
{"1.0.0-alpha0.valid", semver_t(1, 0, 0)},
{"1.0.0-alpha.0valid", semver_t(1, 0, 0)},
{"1.0.0-alpha-a.b-c-somethinglong+build.1-aef.1-its-okay", semver_t(1, 0, 0)},
{"1.0.0-rc.1+build.1", semver_t(1, 0, 0)},
{"2.0.0-rc.1+build.123", semver_t(2, 0, 0)},
{"1.2.3-beta", semver_t(1, 2, 3)},
{"10.2.3-DEV-SNAPSHOT", semver_t(10, 2, 3)},
{"1.2.3-SNAPSHOT-123", semver_t(1, 2, 3)},
{"1.0.0", semver_t(1, 0, 0)},
{"2.0.0", semver_t(2, 0, 0)},
{"1.1.7", semver_t(1, 1, 7)},
{"2.0.0+build.1848", semver_t(2, 0, 0)},
{"2.0.1-alpha.1227", semver_t(2, 0, 1)},
{"1.0.0-alpha+beta", semver_t(1, 0, 0)},
{"1.2.3----RC-SNAPSHOT.12.9.1--.12+788", semver_t(1, 2, 3)},
{"1.2.3----R-S.12.9.1--.12+meta", semver_t(1, 2, 3)},
{"1.2.3----RC-SNAPSHOT.12.9.1--.12", semver_t(1, 2, 3)},
{"1.0.0+0.build.1-rc.10000aaa-kk-0.1", semver_t(1, 0, 0)},
{"1.0.0-0A.is.legal", semver_t(1, 0, 0)}};

for (const auto &[version_str, expected] : valid_test_cases) {
semver_t actual(version_str);
EXPECT_EQ(actual.major, expected.major);
EXPECT_EQ(actual.minor, expected.minor);
EXPECT_EQ(actual.patch, expected.patch);
}
}

TEST(Semver, Invalid) {
std::vector<std::string> invalid_test_cases = {
"+invalid",
"-invalid",
"-invalid+invalid",
"-invalid.01",
"alpha",
"alpha.beta",
"alpha.beta.1",
"alpha.1",
"alpha+beta",
"alpha_beta",
"alpha.",
"alpha..",
"beta",
"-alpha.",
"+justmeta",
};

for (const auto &version_str : invalid_test_cases) {
EXPECT_THROW(semver_t{version_str}, std::exception) << "'" << version_str << "' should be invalid";
}
}

0 comments on commit c51e692

Please sign in to comment.