diff --git a/cmake/dependencies.cmake b/cmake/dependencies.cmake index 96ebbaf5..86152401 100644 --- a/cmake/dependencies.cmake +++ b/cmake/dependencies.cmake @@ -8,9 +8,9 @@ function(fetch_dependencies) include(${PROJECT_SOURCE_DIR}/cmake/CPM.cmake) - ############################################################################################################################ + #################################################################################################################### # google test - ############################################################################################################################ + #################################################################################################################### CPMAddPackage( NAME googletest GITHUB_REPOSITORY google/googletest @@ -20,9 +20,9 @@ function(fetch_dependencies) "INSTALL_GTEST OFF" ) - ############################################################################################################################ + #################################################################################################################### # yaml-cpp - ############################################################################################################################ + #################################################################################################################### CPMAddPackage( NAME yaml-cpp GITHUB_REPOSITORY jbeder/yaml-cpp @@ -42,15 +42,15 @@ function(fetch_dependencies) ) endif() - ############################################################################################################################ + ################################################################################################################### # boost::interprocess - ############################################################################################################################ + ################################################################################################################### include(${PROJECT_SOURCE_DIR}/cmake/fetch_boost.cmake) fetch_boost_library(interprocess) - ############################################################################################################################ + ################################################################################################################### # Nanomsg - ############################################################################################################################ + ################################################################################################################### CPMAddPackage( NAME nanomsg GITHUB_REPOSITORY nanomsg/nng @@ -61,9 +61,9 @@ function(fetch_dependencies) "NNG_TOOLS OFF" ) - ############################################################################################################################ + ################################################################################################################### # Flatbuffers - ############################################################################################################################ + ################################################################################################################### CPMAddPackage( NAME flatbuffers GITHUB_REPOSITORY google/flatbuffers @@ -94,22 +94,27 @@ function(fetch_dependencies) set(FBS_GENERATED_HEADER ${FBS_GENERATED_HEADER} PARENT_SCOPE) endfunction() - ############################################################################################################################ + ################################################################################################################### # libuv (for process management) - ############################################################################################################################ + ################################################################################################################### CPMAddPackage(NAME libuv GITHUB_REPOSITORY libuv/libuv GIT_TAG v1.48.0 OPTIONS "LIBUV_BUILD_TESTS OFF") - ############################################################################################################################ + ################################################################################################################### # fmt : https://github.com/fmtlib/fmt - ############################################################################################################################ + ################################################################################################################### CPMAddPackage(NAME fmt GITHUB_REPOSITORY fmtlib/fmt GIT_TAG 11.0.1) - ############################################################################################################################ + ################################################################################################################### # nanobench (for uBenchmarking) - ############################################################################################################################ + ################################################################################################################### if(MASTER_PROJECT) CPMAddPackage(NAME nanobench GITHUB_REPOSITORY martinus/nanobench GIT_TAG v4.3.11) endif() + + #################################################################################################################### + # spdlog + #################################################################################################################### + CPMAddPackage(NAME spdlog GITHUB_REPOSITORY gabime/spdlog GIT_TAG v1.14.1 VERSION v1.14.1) endfunction() fetch_dependencies() diff --git a/common/CMakeLists.txt b/common/CMakeLists.txt index 69a6eef6..d1a3f16d 100644 --- a/common/CMakeLists.txt +++ b/common/CMakeLists.txt @@ -8,6 +8,8 @@ target_sources( backtrace.hpp gtest_initializer.hpp # FIXME: this should be tucked away with the tests logger.hpp + logger_.hpp # FIXME: replace old logger, see issue #315 + logger_.cpp ) target_include_directories(umd_common INTERFACE .) diff --git a/common/logger_.cpp b/common/logger_.cpp new file mode 100644 index 00000000..0a604e6f --- /dev/null +++ b/common/logger_.cpp @@ -0,0 +1,49 @@ +/* + * SPDX-FileCopyrightText: (c) 2024 Tenstorrent Inc. + * + * SPDX-License-Identifier: Apache-2.0 + */ + +#include "logger_.hpp" // TODO: rename after logger.hpp is removed + +#include +#include +#include + +#include + +namespace tt::umd::logger { + +void initialize(const Options& options) { + static std::mutex mutex; + std::scoped_lock lock{mutex}; + + if (detail::is_initialized.load(std::memory_order_relaxed)) { + return; + } + + std::vector sinks; + + if (options.log_to_stderr) { + auto stderr_sink = std::make_shared(); + sinks.push_back(stderr_sink); + } + + if (!options.filename.empty()) { + auto file_sink = std::make_shared(options.filename); + sinks.push_back(file_sink); + } + + auto logger = std::make_shared("UMD", sinks.begin(), sinks.end()); + logger->set_level(options.log_level); + logger->set_pattern(options.pattern); + + spdlog::set_default_logger(logger); + detail::is_initialized.store(true, std::memory_order_release); +} + +namespace detail { +std::atomic_bool is_initialized = false; +} + +} // namespace tt::umd::logger diff --git a/common/logger_.hpp b/common/logger_.hpp new file mode 100644 index 00000000..047611ad --- /dev/null +++ b/common/logger_.hpp @@ -0,0 +1,90 @@ +/* + * SPDX-FileCopyrightText: (c) 2024 Tenstorrent Inc. + * + * SPDX-License-Identifier: Apache-2.0 + * + */ + +#pragma once + +#include + +#define SPDLOG_FMT_EXTERNAL +#include + +namespace tt::umd::logger { + +/** + * Parameters controlling the behavior of the logger. + */ +struct Options { + bool log_to_stderr{true}; + std::string filename{}; + std::string pattern{"[%Y-%m-%d %H:%M:%S.%e] [%l] [%s:%#] %v"}; + spdlog::level::level_enum log_level{spdlog::level::debug}; + + // TODO: this can be augmented as needed (log rotation, flush policy...) +}; + +/** + * One-time initialization of the logger. + * + * If you don't call it, the logger will be initialized with default options the + * first time a message is logged. + */ +void initialize(const Options& options = Options{}); + +/** + * Macros for using the logger. + */ +#define UMD_TRACE(...) \ + do { \ + ::tt::umd::logger::detail::ensure_initialized(); \ + SPDLOG_TRACE(__VA_ARGS__); \ + } while (0) + +#define UMD_DEBUG(...) \ + do { \ + ::tt::umd::logger::detail::ensure_initialized(); \ + SPDLOG_DEBUG(__VA_ARGS__); \ + } while (0) + +#define UMD_INFO(...) \ + do { \ + ::tt::umd::logger::detail::ensure_initialized(); \ + SPDLOG_INFO(__VA_ARGS__); \ + } while (0) + +#define UMD_WARN(...) \ + do { \ + ::tt::umd::logger::detail::ensure_initialized(); \ + SPDLOG_WARN(__VA_ARGS__); \ + } while (0) + +#define UMD_ERROR(...) \ + do { \ + ::tt::umd::logger::detail::ensure_initialized(); \ + SPDLOG_ERROR(__VA_ARGS__); \ + } while (0) + +#define UMD_CRITICAL(...) \ + do { \ + ::tt::umd::logger::detail::ensure_initialized(); \ + SPDLOG_CRITICAL(__VA_ARGS__); \ + } while (0) + +/** + * This is not part of the API. + */ +namespace detail { +extern std::atomic_bool is_initialized; + +inline void ensure_initialized() { + if (!is_initialized.load(std::memory_order_acquire)) { + initialize(); + } +} + +} // namespace detail + +} // namespace tt::umd::logger diff --git a/common/timestamp.hpp b/common/timestamp.hpp new file mode 100644 index 00000000..055827e3 --- /dev/null +++ b/common/timestamp.hpp @@ -0,0 +1,61 @@ +/* + * SPDX-FileCopyrightText: (c) 2024 Tenstorrent Inc. + * + * SPDX-License-Identifier: Apache-2.0 + */ + +#pragma once + +#include + +#include +#include + +namespace tt::umd::util { + +class Timestamp { + std::chrono::steady_clock::time_point start; + +public: + Timestamp() : start(std::chrono::steady_clock::now()) {} + + void reset() { start = std::chrono::steady_clock::now(); } + + uint64_t nanoseconds() const { + auto now = std::chrono::steady_clock::now(); + return std::chrono::duration_cast(now - start).count(); + } + + uint64_t microseconds() const { + auto now = std::chrono::steady_clock::now(); + return std::chrono::duration_cast(now - start).count(); + } + + uint64_t milliseconds() const { + auto now = std::chrono::steady_clock::now(); + return std::chrono::duration_cast(now - start).count(); + } + + uint64_t seconds() const { + auto now = std::chrono::steady_clock::now(); + return std::chrono::duration_cast(now - start).count(); + } + + std::string to_string() const { + auto ns = nanoseconds(); + if (ns < 1000) { + return fmt::format("{} ns", ns); + } + auto us = microseconds(); + if (us < 1000) { + return fmt::format("{} μs", us); + } + auto ms = milliseconds(); + if (ms < 1000) { + return fmt::format("{} ms", ms); + } + return fmt::format("{} s", seconds()); + } +}; + +} // namespace tt::umd::util diff --git a/device/CMakeLists.txt b/device/CMakeLists.txt index 4f2cd09f..4ed6001f 100644 --- a/device/CMakeLists.txt +++ b/device/CMakeLists.txt @@ -55,6 +55,7 @@ target_link_libraries( hwloc rt Boost::interprocess + spdlog::spdlog_header_only fmt::fmt-header-only yaml-cpp::yaml-cpp ${CMAKE_CURRENT_SOURCE_DIR}/libs/${CMAKE_SYSTEM_PROCESSOR}/libcreate_ethernet_map.a diff --git a/device/api/umd/device/cluster.h b/device/api/umd/device/cluster.h index 3f5ab3c7..9f60adef 100644 --- a/device/api/umd/device/cluster.h +++ b/device/api/umd/device/cluster.h @@ -659,7 +659,6 @@ class Cluster : public tt_device { * Cluster constructor. * * @param sdesc_path SOC descriptor specifying single chip. - * @param ndesc_path Network Descriptor specifying the network topology of the system. * @param target_devices Devices to target. * @param num_host_mem_ch_per_mmio_device Requested number of host channels (hugepages). * @param skip_driver_allocs @@ -669,7 +668,6 @@ class Cluster : public tt_device { */ Cluster( const std::string& sdesc_path, - const std::string& ndesc_path, const std::set& target_devices, const uint32_t& num_host_mem_ch_per_mmio_device = 1, const bool skip_driver_allocs = false, @@ -982,7 +980,7 @@ class Cluster : public tt_device { tt::ARCH arch_name; std::unordered_map> m_pci_device_map; // Map of enabled pci devices int m_num_pci_devices; // Number of pci devices in system (enabled or disabled) - std::shared_ptr ndesc; + std::shared_ptr cluster_desc; // remote eth transfer setup static constexpr std::uint32_t NUM_ETH_CORES_FOR_NON_MMIO_TRANSFERS = 6; diff --git a/device/api/umd/device/pci_device.hpp b/device/api/umd/device/pci_device.hpp index 5966b5cb..e2e9400a 100644 --- a/device/api/umd/device/pci_device.hpp +++ b/device/api/umd/device/pci_device.hpp @@ -66,7 +66,6 @@ using tt::umd::semver_t; class PCIDevice { const std::string device_path; // Path to character device: /dev/tenstorrent/N const int pci_device_num; // N in /dev/tenstorrent/N - const int logical_id; // Unique identifier for each device in entire network topology const int pci_device_file_desc; // Character device file descriptor const PciDeviceInfo info; // PCI device info const int numa_node; // -1 if non-NUMA @@ -93,9 +92,8 @@ class PCIDevice { * sysfs, and maps device memory region(s) into the process address space. * * @param pci_device_number N in /dev/tenstorrent/N - * @param logical_device_id unique identifier for this device in the network topology */ - PCIDevice(int pci_device_number, int logical_device_id = 0); + PCIDevice(int pci_device_number); /** * PCIDevice destructor. @@ -129,13 +127,6 @@ class PCIDevice { */ int get_device_num() const { return pci_device_num; } - /** - * @return unique integer for each device in entire network topology - * TODO: target for removal; upper layers shouldn't to pass this in here. It - * is unused by this class. - */ - int get_logical_id() const { return logical_id; } - /** * @return PCI device id */ @@ -179,18 +170,18 @@ class PCIDevice { tt_xy_pair end, std::uint64_t address, bool multicast, - std::unordered_map> &harvested_coord_translation, + std::unordered_map &harvested_coord_translation, std::uint64_t ordering); dynamic_tlb set_dynamic_tlb( unsigned int tlb_index, tt_xy_pair target, std::uint64_t address, - std::unordered_map> &harvested_coord_translation, + std::unordered_map &harvested_coord_translation, std::uint64_t ordering = tt::umd::tlb_data::Relaxed); dynamic_tlb set_dynamic_tlb_broadcast( unsigned int tlb_index, std::uint64_t address, - std::unordered_map> &harvested_coord_translation, + std::unordered_map &harvested_coord_translation, tt_xy_pair start, tt_xy_pair end, std::uint64_t ordering = tt::umd::tlb_data::Relaxed); diff --git a/device/api/umd/device/tt_cluster_descriptor.h b/device/api/umd/device/tt_cluster_descriptor.h index 82618671..8fea01f0 100644 --- a/device/api/umd/device/tt_cluster_descriptor.h +++ b/device/api/umd/device/tt_cluster_descriptor.h @@ -35,6 +35,8 @@ enum BoardType : uint32_t { class tt_ClusterDescriptor { private: + tt_ClusterDescriptor() = default; + int get_ethernet_link_coord_distance(const eth_coord_t &location_a, const eth_coord_t &location_b) const; protected: @@ -77,9 +79,6 @@ class tt_ClusterDescriptor { void fill_chips_grouped_by_closest_mmio(); public: - tt_ClusterDescriptor() = default; - tt_ClusterDescriptor(const tt_ClusterDescriptor &) = default; - /* * Returns the pairs of channels that are connected where the first entry in the pair corresponds to the argument * ordering when calling the function An empty result implies that the two chips do not share any direct connection @@ -96,6 +95,7 @@ class tt_ClusterDescriptor { // get_cluster_descriptor_file_path will create ethernet map in the background. static std::string get_cluster_descriptor_file_path(); static std::unique_ptr create_from_yaml(const std::string &cluster_descriptor_file_path); + static std::unique_ptr create(); // This function is used to create mock cluster descriptor yaml files, for example for simulation. static std::unique_ptr create_mock_cluster( diff --git a/device/api/umd/device/tt_simulation_device.h b/device/api/umd/device/tt_simulation_device.h index 9b4778aa..3f1bd1f4 100644 --- a/device/api/umd/device/tt_simulation_device.h +++ b/device/api/umd/device/tt_simulation_device.h @@ -74,5 +74,5 @@ class tt_SimulationDevice : public tt_device { std::set target_devices_in_cluster = {}; std::set target_remote_chips = {}; tt::ARCH arch_name; - std::shared_ptr ndesc; + std::shared_ptr cluster_descriptor; }; diff --git a/device/cluster.cpp b/device/cluster.cpp index 4ab2a888..49596738 100644 --- a/device/cluster.cpp +++ b/device/cluster.cpp @@ -250,7 +250,7 @@ void Cluster::create_device( // Just use PCI interface id from physical_device_id given by cluster desc mmio map. For GS, already virtualized to // use available devices. - auto logical_to_physical_device_id_map = ndesc->get_chips_with_mmio(); + auto logical_to_physical_device_id_map = cluster_desc->get_chips_with_mmio(); log_assert( target_mmio_device_ids.size() > 0, "Must provide set of target_mmio_device_ids to Cluster constructor now."); @@ -268,8 +268,7 @@ void Cluster::create_device( "Opening TT_PCI_INTERFACE_ID {} for netlist target_device_id: {}", pci_interface_id, logical_device_id); - m_pci_device_map.insert( - {logical_device_id, std::make_unique(pci_interface_id, logical_device_id)}); + m_pci_device_map.insert({logical_device_id, std::make_unique(pci_interface_id)}); } auto dev = m_pci_device_map.at(logical_device_id).get(); @@ -325,7 +324,7 @@ void Cluster::create_device( for (const chip_id_t& chip : target_devices_in_cluster) { // Initialize identity mapping for Non-MMIO chips as well - if (!ndesc->is_chip_mmio_capable(chip)) { + if (!cluster_desc->is_chip_mmio_capable(chip)) { harvested_coord_translation.insert({chip, create_harvested_coord_translation(arch_name, true)}); flush_non_mmio_per_chip[chip] = false; } @@ -359,10 +358,10 @@ void Cluster::construct_cluster( std::unordered_set target_mmio_device_ids; for (auto& d : target_devices_in_cluster) { log_assert( - ndesc->get_all_chips().find(d) != ndesc->get_all_chips().end(), + cluster_desc->get_all_chips().find(d) != cluster_desc->get_all_chips().end(), "Target device {} not present in current cluster!", d); - if (ndesc->is_chip_mmio_capable(d)) { + if (cluster_desc->is_chip_mmio_capable(d)) { target_mmio_device_ids.insert(d); } else { target_remote_chips.insert(d); @@ -391,8 +390,8 @@ void Cluster::construct_cluster( } if (arch_name == tt::ARCH::WORMHOLE_B0) { - const auto& harvesting_masks = ndesc->get_harvesting_info(); - const auto& noc_translation_enabled = ndesc->get_noc_translation_table_en(); + const auto& harvesting_masks = cluster_desc->get_harvesting_info(); + const auto& noc_translation_enabled = cluster_desc->get_noc_translation_table_en(); translation_tables_en = false; for (auto& masks : harvesting_masks) { @@ -526,13 +525,14 @@ Cluster::Cluster( bool perform_harvesting, std::unordered_map simulated_harvesting_masks) : tt_device() { + cluster_desc = tt_ClusterDescriptor::create(); + // TODO: this should be fetched through ClusterDescriptor auto available_device_ids = detect_available_device_ids(); m_num_pci_devices = available_device_ids.size(); int physical_device_id = available_device_ids[0]; - // TODO: remove logical_device_id - PCIDevice pci_device(physical_device_id, 0); + PCIDevice pci_device(physical_device_id); tt::ARCH device_arch = pci_device.get_arch(); std::string sdesc_path = tt_SocDescriptor::get_soc_descriptor_path(device_arch); @@ -550,11 +550,8 @@ Cluster::Cluster( log_debug(LogSiliconDriver, "Passed target devices: {}", target_devices); } - std::string ndesc_path = tt_ClusterDescriptor::get_cluster_descriptor_file_path(); - ndesc = tt_ClusterDescriptor::create_from_yaml(ndesc_path); - std::set target_devices; - for (const chip_id_t& d : ndesc->get_all_chips()) { + for (const chip_id_t& d : cluster_desc->get_all_chips()) { target_devices.insert(d); } target_devices_in_cluster = target_devices; @@ -576,13 +573,14 @@ Cluster::Cluster( bool perform_harvesting, std::unordered_map simulated_harvesting_masks) : tt_device() { + cluster_desc = tt_ClusterDescriptor::create(); + // TODO: this should be fetched through ClusterDescriptor auto available_device_ids = detect_available_device_ids(); m_num_pci_devices = available_device_ids.size(); int physical_device_id = available_device_ids[0]; - // TODO: remove logical_device_id - PCIDevice pci_device(physical_device_id, 0); + PCIDevice pci_device(physical_device_id); tt::ARCH device_arch = pci_device.get_arch(); std::string sdesc_path = tt_SocDescriptor::get_soc_descriptor_path(device_arch); @@ -600,9 +598,6 @@ Cluster::Cluster( log_debug(LogSiliconDriver, "Passed target devices: {}", target_devices); } - std::string ndesc_path = tt_ClusterDescriptor::get_cluster_descriptor_file_path(); - ndesc = tt_ClusterDescriptor::create_from_yaml(ndesc_path); - target_devices_in_cluster = target_devices; construct_cluster( @@ -616,7 +611,6 @@ Cluster::Cluster( Cluster::Cluster( const std::string& sdesc_path, - const std::string& ndesc_path, const std::set& target_devices, const uint32_t& num_host_mem_ch_per_mmio_device, const bool skip_driver_allocs, @@ -624,6 +618,8 @@ Cluster::Cluster( bool perform_harvesting, std::unordered_map simulated_harvesting_masks) : tt_device() { + cluster_desc = tt_ClusterDescriptor::create(); + // TODO: this should be fetched through ClusterDescriptor auto available_device_ids = detect_available_device_ids(); m_num_pci_devices = available_device_ids.size(); @@ -642,13 +638,6 @@ Cluster::Cluster( log_debug(LogSiliconDriver, "Passed target devices: {}", target_devices); } - std::string cluster_descriptor_path = ndesc_path; - if (cluster_descriptor_path == "") { - cluster_descriptor_path = tt_ClusterDescriptor::get_cluster_descriptor_file_path(); - } - - ndesc = tt_ClusterDescriptor::create_from_yaml(cluster_descriptor_path); - construct_cluster( sdesc_path, num_host_mem_ch_per_mmio_device, @@ -955,7 +944,6 @@ void Cluster::broadcast_pcie_tensix_risc_reset(chip_id_t chip_id, const TensixSo PCIDevice* device = get_pci_device(chip_id); auto valid = soft_resets & ALL_TENSIX_SOFT_RESET; - auto logical_id = device->get_logical_id(); log_debug( LogSiliconDriver, @@ -968,11 +956,11 @@ void Cluster::broadcast_pcie_tensix_risc_reset(chip_id_t chip_id, const TensixSo auto [soft_reset_reg, _] = device->set_dynamic_tlb_broadcast( architecture_implementation->get_reg_tlb(), architecture_implementation->get_tensix_soft_reset_addr(), - harvested_coord_translation, + harvested_coord_translation.at(chip_id), tt_xy_pair(0, 0), tt_xy_pair( architecture_implementation->get_grid_size_x() - 1, - architecture_implementation->get_grid_size_y() - 1 - num_rows_harvested.at(logical_id)), + architecture_implementation->get_grid_size_y() - 1 - num_rows_harvested.at(chip_id)), TLB_DATA::Posted); device->write_regs(soft_reset_reg, 1, &valid); tt_driver_atomics::sfence(); @@ -1003,7 +991,7 @@ void Cluster::deassert_risc_reset_at_core(tt_cxy_pair core, const TensixSoftRese get_soc_descriptor(target_device).ethernet_cores.end(), core) != get_soc_descriptor(target_device).ethernet_cores.end(), "Cannot deassert reset on a non-tensix or harvested core"); - bool target_is_mmio_capable = ndesc->is_chip_mmio_capable(target_device); + bool target_is_mmio_capable = cluster_desc->is_chip_mmio_capable(target_device); if (target_is_mmio_capable) { log_assert( m_pci_device_map.find(target_device) != m_pci_device_map.end(), @@ -1027,7 +1015,7 @@ void Cluster::assert_risc_reset_at_core(tt_cxy_pair core) { get_soc_descriptor(target_device).ethernet_cores.end(), core) != get_soc_descriptor(target_device).ethernet_cores.end(), "Cannot assert reset on a non-tensix or harvested core"); - bool target_is_mmio_capable = ndesc->is_chip_mmio_capable(target_device); + bool target_is_mmio_capable = cluster_desc->is_chip_mmio_capable(target_device); if (target_is_mmio_capable) { log_assert( m_pci_device_map.find(target_device) != m_pci_device_map.end(), @@ -1047,14 +1035,14 @@ void Cluster::cleanup_shared_host_state() { } } -std::unordered_set Cluster::get_all_chips_in_cluster() { return ndesc->get_all_chips(); } +std::unordered_set Cluster::get_all_chips_in_cluster() { return cluster_desc->get_all_chips(); } int Cluster::get_number_of_chips_in_cluster() { // Returns the number of chips seen in the network descriptor - return ndesc->get_all_chips().size(); + return cluster_desc->get_all_chips().size(); } -tt_ClusterDescriptor* Cluster::get_cluster_description() { return ndesc.get(); } +tt_ClusterDescriptor* Cluster::get_cluster_description() { return cluster_desc.get(); } // Can be used before instantiating a silicon device int Cluster::detect_number_of_chips() { @@ -1085,7 +1073,7 @@ std::function Cluster::get_fast_pcie_s } tt::Writer Cluster::get_static_tlb_writer(tt_cxy_pair target) { - if (!ndesc->is_chip_mmio_capable(target.chip)) { + if (!cluster_desc->is_chip_mmio_capable(target.chip)) { throw std::runtime_error(fmt::format("Target not in MMIO chip: {}", target.str())); } @@ -1154,7 +1142,11 @@ void Cluster::write_device_memory( while (size_in_bytes > 0) { auto [mapped_address, tlb_size] = dev->set_dynamic_tlb( - tlb_index, target, address, harvested_coord_translation, dynamic_tlb_ordering_modes.at(fallback_tlb)); + tlb_index, + target, + address, + harvested_coord_translation.at(target.chip), + dynamic_tlb_ordering_modes.at(fallback_tlb)); uint32_t transfer_size = std::min((uint64_t)size_in_bytes, tlb_size); dev->write_block(mapped_address, transfer_size, buffer_addr); @@ -1205,7 +1197,11 @@ void Cluster::read_device_memory( log_debug(LogSiliconDriver, " dynamic tlb_index: {}", tlb_index); while (size_in_bytes > 0) { auto [mapped_address, tlb_size] = dev->set_dynamic_tlb( - tlb_index, target, address, harvested_coord_translation, dynamic_tlb_ordering_modes.at(fallback_tlb)); + tlb_index, + target, + address, + harvested_coord_translation.at(target.chip), + dynamic_tlb_ordering_modes.at(fallback_tlb)); uint32_t transfer_size = std::min((uint64_t)size_in_bytes, tlb_size); dev->read_block(mapped_address, transfer_size, buffer_addr); @@ -1325,7 +1321,7 @@ int Cluster::get_clock(int logical_device_id) { } uint32_t clock; - auto mmio_capable_chip_logical = ndesc->get_closest_mmio_capable_chip(logical_device_id); + auto mmio_capable_chip_logical = cluster_desc->get_closest_mmio_capable_chip(logical_device_id); PCIDevice* pci_device = get_pci_device(mmio_capable_chip_logical); auto exit_code = arc_msg( logical_device_id, @@ -1356,7 +1352,7 @@ Cluster::~Cluster() { cleanup_shared_host_state(); m_pci_device_map.clear(); - ndesc.reset(); + cluster_desc.reset(); soc_descriptor_per_chip.clear(); dynamic_tlb_config.clear(); tlb_config_map.clear(); @@ -1381,7 +1377,7 @@ void Cluster::configure_tlb( ordering == TLB_DATA::Strict || ordering == TLB_DATA::Posted || ordering == TLB_DATA::Relaxed, "Invalid ordering specified in Cluster::configure_tlb"); PCIDevice* pci_device = get_pci_device(logical_device_id); - pci_device->set_dynamic_tlb(tlb_index, core, address, harvested_coord_translation, ordering); + pci_device->set_dynamic_tlb(tlb_index, core, address, harvested_coord_translation.at(logical_device_id), ordering); auto tlb_size = std::get<1>(pci_device->get_architecture_implementation()->describe_tlb(tlb_index).value()); if (tlb_config_map.find(logical_device_id) == tlb_config_map.end()) { tlb_config_map.insert({logical_device_id, {}}); @@ -1437,15 +1433,15 @@ void Cluster::init_pcie_iatus() { } int Cluster::test_setup_interface() { + int ret_val = 0; + int logical_device_id = m_pci_device_map.begin()->first; + PCIDevice* dev = m_pci_device_map.at(logical_device_id).get(); if (arch_name == tt::ARCH::GRAYSKULL) { - int ret_val = 0; - PCIDevice* dev = m_pci_device_map.begin()->second.get(); - uint32_t mapped_reg = dev->set_dynamic_tlb( dev->get_architecture_implementation()->get_reg_tlb(), tt_xy_pair(0, 0), 0xffb20108, - harvested_coord_translation) + harvested_coord_translation.at(logical_device_id)) .bar_offset; uint32_t regval = 0; @@ -1453,14 +1449,11 @@ int Cluster::test_setup_interface() { ret_val = (regval != 0xffffffff && ((regval & 0x1) == 1)) ? 0 : 1; return ret_val; } else if (arch_name == tt::ARCH::WORMHOLE_B0) { - int ret_val = 0; - PCIDevice* dev = m_pci_device_map.begin()->second.get(); - uint32_t mapped_reg = dev->set_dynamic_tlb( dev->get_architecture_implementation()->get_reg_tlb(), tt_xy_pair(1, 0), 0xffb20108, - harvested_coord_translation) + harvested_coord_translation.at(logical_device_id)) .bar_offset; uint32_t regval = 0; @@ -1474,7 +1467,7 @@ int Cluster::test_setup_interface() { // uint32_t mapped_reg = dev->set_dynamic_tlb(m_pci_device_map.begin()->second, // dev->get_architecture_implementation()->get_reg_tlb(), tt_xy_pair(1, 0), 0xffb20108, - // harvested_coord_translation).bar_offset; + // harvested_coord_translation.at(logical_device_id)).bar_offset; // uint32_t regval = 0; // read_regs(dev, mapped_reg, 1, ®val); @@ -1709,7 +1702,7 @@ uint32_t Cluster::get_harvested_rows(int logical_device_id) { if (harv_override) { harv = std::stoul(harv_override, nullptr, 16); } else { - auto mmio_capable_chip_logical = ndesc->get_closest_mmio_capable_chip(logical_device_id); + auto mmio_capable_chip_logical = cluster_desc->get_closest_mmio_capable_chip(logical_device_id); PCIDevice* pci_device = get_pci_device(mmio_capable_chip_logical); int harvesting_msg_code = arc_msg( logical_device_id, @@ -1864,9 +1857,9 @@ void Cluster::write_to_non_mmio_device( if (broadcast) { mmio_capable_chip_logical = core.chip; } else { - mmio_capable_chip_logical = ndesc->get_closest_mmio_capable_chip(core.chip); + mmio_capable_chip_logical = cluster_desc->get_closest_mmio_capable_chip(core.chip); } - flush_non_mmio_per_chip[ndesc->get_closest_mmio_capable_chip(core.chip)] = true; + flush_non_mmio_per_chip[cluster_desc->get_closest_mmio_capable_chip(core.chip)] = true; if (non_mmio_transfer_cores_customized) { log_assert( @@ -1877,7 +1870,7 @@ void Cluster::write_to_non_mmio_device( using data_word_t = uint32_t; constexpr int DATA_WORD_SIZE = sizeof(data_word_t); constexpr int BROADCAST_HEADER_SIZE = sizeof(data_word_t) * 8; // Broadcast header is 8 words - const auto target_chip = ndesc->get_chip_locations().at(core.chip); + const auto target_chip = cluster_desc->get_chip_locations().at(core.chip); std::string write_tlb = "LARGE_WRITE_TLB"; std::string read_tlb = "LARGE_READ_TLB"; @@ -2110,8 +2103,8 @@ void Cluster::read_from_non_mmio_device(void* mem_ptr, tt_cxy_pair core, uint64_ std::string empty_tlb = ""; translate_to_noc_table_coords(core.chip, core.y, core.x); - const auto& mmio_capable_chip_logical = ndesc->get_closest_mmio_capable_chip(core.chip); - const eth_coord_t target_chip = ndesc->get_chip_locations().at(core.chip); + const auto& mmio_capable_chip_logical = cluster_desc->get_closest_mmio_capable_chip(core.chip); + const eth_coord_t target_chip = cluster_desc->get_chip_locations().at(core.chip); std::vector erisc_command; std::vector erisc_q_rptr; @@ -2386,12 +2379,12 @@ void Cluster::wait_for_non_mmio_flush(const chip_id_t chip_id) { log_assert(arch_name != tt::ARCH::BLACKHOLE, "Non-MMIO flush not supported in Blackhole"); std::string read_tlb = "LARGE_READ_TLB"; - if (!this->ndesc->is_chip_remote(chip_id)) { + if (!this->cluster_desc->is_chip_remote(chip_id)) { log_debug(LogSiliconDriver, "Chip {} is not a remote chip, skipping wait_for_non_mmio_flush", chip_id); return; } - chip_id_t mmio_connected_chip = ndesc->get_closest_mmio_capable_chip(chip_id); + chip_id_t mmio_connected_chip = cluster_desc->get_closest_mmio_capable_chip(chip_id); wait_for_connected_non_mmio_flush(mmio_connected_chip); } @@ -2463,8 +2456,8 @@ std::unordered_map>>& Cluster::get_ether for (const auto& chip : target_devices_in_cluster) { if (chips_to_exclude.find(chip) == chips_to_exclude.end()) { // Get shelf local physical chip id included in broadcast - chip_id_t physical_chip_id = ndesc->get_shelf_local_physical_chip_coords(chip); - eth_coord_t eth_coords = ndesc->get_chip_locations().at(chip); + chip_id_t physical_chip_id = cluster_desc->get_shelf_local_physical_chip_coords(chip); + eth_coord_t eth_coords = cluster_desc->get_chip_locations().at(chip); // Rack word to be set in header uint32_t rack_word = eth_coords.rack >> 2; // Rack byte to be set in header @@ -2478,7 +2471,7 @@ std::unordered_map>>& Cluster::get_ether if (eth_coords.rack == 0 && eth_coords.shelf == 0) { // Shelf 0 + Rack 0: Either an MMIO chip or a remote chip potentially connected to host through its // own MMIO counterpart. - closest_mmio_chip = ndesc->get_closest_mmio_capable_chip(chip); + closest_mmio_chip = cluster_desc->get_closest_mmio_capable_chip(chip); } else { // All other shelves: Group these under the same/first MMIO chip, since all MMIO chips are // connected. @@ -2564,7 +2557,12 @@ void Cluster::pcie_broadcast_write( const scoped_lock lock(*get_mutex(fallback_tlb, pci_device->get_device_num())); while (size_in_bytes > 0) { auto [mapped_address, tlb_size] = pci_device->set_dynamic_tlb_broadcast( - tlb_index, addr, harvested_coord_translation, start, end, dynamic_tlb_ordering_modes.at(fallback_tlb)); + tlb_index, + addr, + harvested_coord_translation.at(chip), + start, + end, + dynamic_tlb_ordering_modes.at(fallback_tlb)); uint64_t transfer_size = std::min((uint64_t)size_in_bytes, tlb_size); pci_device->write_block(mapped_address, transfer_size, buffer_addr); @@ -2934,7 +2932,7 @@ void Cluster::insert_host_to_device_barrier( void Cluster::init_membars() { for (const auto& chip : target_devices_in_cluster) { - if (ndesc->is_chip_mmio_capable(chip)) { + if (cluster_desc->is_chip_mmio_capable(chip)) { set_membar_flag( chip, workers_per_chip.at(chip), @@ -2951,7 +2949,7 @@ void Cluster::init_membars() { void Cluster::l1_membar( const chip_id_t chip, const std::string& fallback_tlb, const std::unordered_set& cores) { - if (ndesc->is_chip_mmio_capable(chip)) { + if (cluster_desc->is_chip_mmio_capable(chip)) { const auto& all_workers = workers_per_chip.at(chip); const auto& all_eth = eth_cores; if (cores.size()) { @@ -2983,7 +2981,7 @@ void Cluster::l1_membar( void Cluster::dram_membar( const chip_id_t chip, const std::string& fallback_tlb, const std::unordered_set& cores) { - if (ndesc->is_chip_mmio_capable(chip)) { + if (cluster_desc->is_chip_mmio_capable(chip)) { if (cores.size()) { for (const auto& core : cores) { log_assert( @@ -3001,7 +2999,7 @@ void Cluster::dram_membar( void Cluster::dram_membar( const chip_id_t chip, const std::string& fallback_tlb, const std::unordered_set& channels) { - if (ndesc->is_chip_mmio_capable(chip)) { + if (cluster_desc->is_chip_mmio_capable(chip)) { if (channels.size()) { std::unordered_set dram_cores_to_sync = {}; for (const auto& chan : channels) { @@ -3020,7 +3018,7 @@ void Cluster::dram_membar( void Cluster::write_to_device( const void* mem_ptr, uint32_t size, tt_cxy_pair core, uint64_t addr, const std::string& fallback_tlb) { - bool target_is_mmio_capable = ndesc->is_chip_mmio_capable(core.chip); + bool target_is_mmio_capable = cluster_desc->is_chip_mmio_capable(core.chip); if (target_is_mmio_capable) { if (fallback_tlb == "REG_TLB") { write_mmio_device_register(mem_ptr, core, addr, size, fallback_tlb); @@ -3045,7 +3043,7 @@ void Cluster::read_mmio_device_register( log_debug(LogSiliconDriver, " dynamic tlb_index: {}", tlb_index); auto [mapped_address, tlb_size] = - pci_device->set_dynamic_tlb(tlb_index, core, addr, harvested_coord_translation, TLB_DATA::Strict); + pci_device->set_dynamic_tlb(tlb_index, core, addr, harvested_coord_translation.at(core.chip), TLB_DATA::Strict); // Align block to 4bytes if needed. auto aligned_buf = tt_4_byte_aligned_buffer(mem_ptr, size); pci_device->read_regs(mapped_address, aligned_buf.block_size / sizeof(std::uint32_t), aligned_buf.local_storage); @@ -3065,7 +3063,7 @@ void Cluster::write_mmio_device_register( log_debug(LogSiliconDriver, " dynamic tlb_index: {}", tlb_index); auto [mapped_address, tlb_size] = - pci_device->set_dynamic_tlb(tlb_index, core, addr, harvested_coord_translation, TLB_DATA::Strict); + pci_device->set_dynamic_tlb(tlb_index, core, addr, harvested_coord_translation.at(core.chip), TLB_DATA::Strict); // Align block to 4bytes if needed. auto aligned_buf = tt_4_byte_aligned_buffer(mem_ptr, size); if (aligned_buf.input_size != aligned_buf.block_size) { @@ -3077,7 +3075,7 @@ void Cluster::write_mmio_device_register( void Cluster::read_from_device( void* mem_ptr, tt_cxy_pair core, uint64_t addr, uint32_t size, const std::string& fallback_tlb) { - bool target_is_mmio_capable = ndesc->is_chip_mmio_capable(core.chip); + bool target_is_mmio_capable = cluster_desc->is_chip_mmio_capable(core.chip); if (target_is_mmio_capable) { if (fallback_tlb == "REG_TLB") { read_mmio_device_register(mem_ptr, core, addr, size, fallback_tlb); @@ -3105,7 +3103,7 @@ int Cluster::arc_msg( uint32_t* return_3, uint32_t* return_4) { log_assert(arch_name != tt::ARCH::BLACKHOLE, "ARC messages not supported in Blackhole"); - if (ndesc->is_chip_mmio_capable(logical_device_id)) { + if (cluster_desc->is_chip_mmio_capable(logical_device_id)) { return pcie_arc_msg(logical_device_id, msg_code, wait_for_done, arg0, arg1, timeout, return_3, return_4); } else { return remote_arc_msg(logical_device_id, msg_code, wait_for_done, arg0, arg1, timeout, return_3, return_4); @@ -3128,7 +3126,7 @@ void Cluster::send_remote_tensix_risc_reset_to_core( } int Cluster::set_remote_power_state(const chip_id_t& chip, tt_DevicePowerState device_state) { - auto mmio_capable_chip_logical = ndesc->get_closest_mmio_capable_chip(chip); + auto mmio_capable_chip_logical = cluster_desc->get_closest_mmio_capable_chip(chip); return remote_arc_msg( chip, get_power_state_arc_msg(mmio_capable_chip_logical, device_state), true, 0, 0, 1, NULL, NULL); } @@ -3185,7 +3183,7 @@ void Cluster::set_power_state(tt_DevicePowerState device_state) { // MT Initial BH - ARC messages not supported in Blackhole if (arch_name != tt::ARCH::BLACKHOLE) { for (auto& chip : target_devices_in_cluster) { - if (ndesc->is_chip_mmio_capable(chip)) { + if (cluster_desc->is_chip_mmio_capable(chip)) { set_pcie_power_state(device_state); } else { int exit_code = set_remote_power_state(chip, device_state); @@ -3202,7 +3200,7 @@ void Cluster::enable_ethernet_queue(int timeout) { switch (arch) { case tt::ARCH::WORMHOLE_B0: { - if (ndesc->is_chip_mmio_capable(chip)) { + if (cluster_desc->is_chip_mmio_capable(chip)) { enable_local_ethernet_queue(chip, timeout); } else { enable_remote_ethernet_queue(chip, timeout); @@ -3237,10 +3235,10 @@ void Cluster::deassert_resets_and_set_power_state() { 0, 0); } - if (ndesc != nullptr) { + if (cluster_desc != nullptr) { for (const chip_id_t& chip : target_devices_in_cluster) { - if (!ndesc->is_chip_mmio_capable(chip)) { - auto mmio_capable_chip_logical = ndesc->get_closest_mmio_capable_chip(chip); + if (!cluster_desc->is_chip_mmio_capable(chip)) { + auto mmio_capable_chip_logical = cluster_desc->get_closest_mmio_capable_chip(chip); auto pci_device = get_pci_device(mmio_capable_chip_logical); remote_arc_msg( chip, diff --git a/device/mockup/tt_mockup_device.hpp b/device/mockup/tt_mockup_device.hpp index 15107ebc..e1023986 100644 --- a/device/mockup/tt_mockup_device.hpp +++ b/device/mockup/tt_mockup_device.hpp @@ -115,5 +115,5 @@ class tt_MockupDevice : public tt_device { std::set target_devices_in_cluster = {}; std::set target_remote_chips = {}; tt::ARCH arch_name; - std::shared_ptr ndesc; + std::shared_ptr cluster_descriptor; }; diff --git a/device/pcie/pci_device.cpp b/device/pcie/pci_device.cpp index 5afb33fa..c49a9d78 100644 --- a/device/pcie/pci_device.cpp +++ b/device/pcie/pci_device.cpp @@ -258,10 +258,9 @@ tt::ARCH PciDeviceInfo::get_arch() const { return infos; } -PCIDevice::PCIDevice(int pci_device_number, int logical_device_id) : +PCIDevice::PCIDevice(int pci_device_number) : device_path(fmt::format("/dev/tenstorrent/{}", pci_device_number)), pci_device_num(pci_device_number), - logical_id(logical_device_id), pci_device_file_desc(open(device_path.c_str(), O_RDWR | O_CLOEXEC)), info(read_device_info(pci_device_file_desc)), numa_node(read_sysfs(info, "numa_node", -1)), // default to -1 if not found @@ -602,7 +601,7 @@ dynamic_tlb PCIDevice::set_dynamic_tlb( tt_xy_pair end, std::uint64_t address, bool multicast, - std::unordered_map> &harvested_coord_translation, + std::unordered_map &harvested_coord_translation, std::uint64_t ordering) { auto architecture_implementation = get_architecture_implementation(); if (multicast) { @@ -624,8 +623,8 @@ dynamic_tlb PCIDevice::set_dynamic_tlb( tt::umd::tlb_configuration tlb_config = architecture_implementation->get_tlb_configuration(tlb_index); std::uint32_t TLB_CFG_REG_SIZE_BYTES = architecture_implementation->get_tlb_cfg_reg_size_bytes(); - auto translated_start_coords = harvested_coord_translation.at(logical_id).at(start); - auto translated_end_coords = harvested_coord_translation.at(logical_id).at(end); + auto translated_start_coords = harvested_coord_translation.at(start); + auto translated_end_coords = harvested_coord_translation.at(end); uint32_t tlb_address = address / tlb_config.size; uint32_t local_address = address % tlb_config.size; uint64_t tlb_base = tlb_config.base + (tlb_config.size * tlb_config.index_offset); @@ -665,7 +664,7 @@ dynamic_tlb PCIDevice::set_dynamic_tlb( unsigned int tlb_index, tt_xy_pair target, std::uint64_t address, - std::unordered_map> &harvested_coord_translation, + std::unordered_map &harvested_coord_translation, std::uint64_t ordering) { return set_dynamic_tlb(tlb_index, tt_xy_pair(0, 0), target, address, false, harvested_coord_translation, ordering); } @@ -673,7 +672,7 @@ dynamic_tlb PCIDevice::set_dynamic_tlb( dynamic_tlb PCIDevice::set_dynamic_tlb_broadcast( unsigned int tlb_index, std::uint64_t address, - std::unordered_map> &harvested_coord_translation, + std::unordered_map &harvested_coord_translation, tt_xy_pair start, tt_xy_pair end, std::uint64_t ordering) { @@ -688,7 +687,6 @@ tt::umd::architecture_implementation *PCIDevice::get_architecture_implementation bool PCIDevice::init_hugepage(uint32_t num_host_mem_channels) { const size_t hugepage_size = HUGEPAGE_REGION_SIZE; - // Convert from logical (device_id in netlist) to physical device_id (in case of virtualization) auto physical_device_id = get_device_num(); std::string hugepage_dir = find_hugepage_dir(hugepage_size); diff --git a/device/simulation/deprecated/tt_emulation_device.cpp b/device/simulation/deprecated/tt_emulation_device.cpp index 8f096e64..4d633c28 100644 --- a/device/simulation/deprecated/tt_emulation_device.cpp +++ b/device/simulation/deprecated/tt_emulation_device.cpp @@ -14,14 +14,14 @@ tt_emulation_device::tt_emulation_device(const std::string& sdesc_path) : tt_device(sdesc_path) { soc_descriptor_per_chip.emplace(0, tt_SocDescriptor(sdesc_path)); - ndesc = tt_ClusterDescriptor::create_mock_cluster({0}); + cluster_descriptor = tt_ClusterDescriptor::create_mock_cluster({0}); tt_zebu_wrapper_inst = new tt_emu_zemi3_wrapper(); log_info(tt::LogEmulationDriver, "Created Emulation Device "); } tt_emulation_device::~tt_emulation_device() { - ndesc.reset(); + cluster_descriptor.reset(); delete tt_zebu_wrapper_inst; log_info(tt::LogEmulationDriver, "Destroyed Emulation Device "); } @@ -190,7 +190,7 @@ void tt_emulation_device::translate_to_noc_table_coords(chip_id_t device_id, std return; } -tt_ClusterDescriptor* tt_emulation_device::get_cluster_description() { return ndesc.get(); } +tt_ClusterDescriptor* tt_emulation_device::get_cluster_description() { return cluster_descriptor.get(); } std::set tt_emulation_device::get_target_mmio_device_ids() { log_error("LogEmulationDriver: get_target_mmio_device_ids not implemented"); diff --git a/device/simulation/deprecated/tt_emulation_device.h b/device/simulation/deprecated/tt_emulation_device.h index 8c411d07..7452041b 100644 --- a/device/simulation/deprecated/tt_emulation_device.h +++ b/device/simulation/deprecated/tt_emulation_device.h @@ -93,7 +93,7 @@ class tt_emulation_device : public tt_device { private: tt_device_l1_address_params l1_address_params; - std::shared_ptr ndesc; + std::shared_ptr cluster_descriptor; tt_device_dram_address_params dram_address_params; // zebu wrapper, provides interface to zebu emulator device through axi and command transactors diff --git a/device/simulation/deprecated/tt_emulation_stub.cpp b/device/simulation/deprecated/tt_emulation_stub.cpp index bdd97b27..88d861eb 100644 --- a/device/simulation/deprecated/tt_emulation_stub.cpp +++ b/device/simulation/deprecated/tt_emulation_stub.cpp @@ -92,7 +92,7 @@ void tt_emulation_device::translate_to_noc_table_coords(chip_id_t device_id, std return; } -tt_ClusterDescriptor* tt_emulation_device::get_cluster_description() { return ndesc.get(); } +tt_ClusterDescriptor* tt_emulation_device::get_cluster_description() { return cluster_descriptor.get(); } std::set tt_emulation_device::get_target_mmio_device_ids() { return {}; } diff --git a/device/simulation/deprecated/tt_versim_device.cpp b/device/simulation/deprecated/tt_versim_device.cpp index acbfe13e..cdf10bb5 100644 --- a/device/simulation/deprecated/tt_versim_device.cpp +++ b/device/simulation/deprecated/tt_versim_device.cpp @@ -68,9 +68,9 @@ void translate_soc_descriptor_to_ca_soc(CA::Soc& soc, const tt_SocDescriptor soc tt_VersimDevice::tt_VersimDevice(const std::string& sdesc_path, const std::string& ndesc_path) : tt_device(sdesc_path) { soc_descriptor_per_chip.emplace(0, tt_SocDescriptor(sdesc_path)); if (ndesc_path == "") { - ndesc = tt_ClusterDescriptor::create_mock_cluster({0}); + cluster_descriptor = tt_ClusterDescriptor::create_mock_cluster({0}); } else { - ndesc = tt_ClusterDescriptor::create_from_yaml(ndesc_path); + cluster_descriptor = tt_ClusterDescriptor::create_from_yaml(ndesc_path); } } @@ -78,7 +78,7 @@ std::unordered_map& tt_VersimDevice::get_virtual_so return soc_descriptor_per_chip; } -tt_ClusterDescriptor* tt_VersimDevice::get_cluster_description() { return ndesc.get(); } +tt_ClusterDescriptor* tt_VersimDevice::get_cluster_description() { return cluster_descriptor.get(); } void tt_VersimDevice::start_device(const tt_device_params& device_params) { bool no_checkers = true; @@ -143,7 +143,7 @@ void tt_VersimDevice::start( std::cout << "Versim Device: Done start " << std::endl; } -tt_VersimDevice::~tt_VersimDevice() { ndesc.reset(); } +tt_VersimDevice::~tt_VersimDevice() { cluster_descriptor.reset(); } // bool tt_VersimDevice::run() { // std::cout << "Versim Device: Run " << std::endl; diff --git a/device/simulation/deprecated/tt_versim_device.h b/device/simulation/deprecated/tt_versim_device.h index 2c71f1be..6e1cb995 100644 --- a/device/simulation/deprecated/tt_versim_device.h +++ b/device/simulation/deprecated/tt_versim_device.h @@ -118,6 +118,6 @@ class tt_VersimDevice : public tt_device { tt_device_l1_address_params l1_address_params; tt_device_dram_address_params dram_address_params; versim::VersimSimulator* versim; - std::shared_ptr ndesc; + std::shared_ptr cluster_descriptor; void* p_ca_soc_manager; }; diff --git a/device/simulation/deprecated/tt_versim_stub.cpp b/device/simulation/deprecated/tt_versim_stub.cpp index c80e0bdd..ef307028 100644 --- a/device/simulation/deprecated/tt_versim_stub.cpp +++ b/device/simulation/deprecated/tt_versim_stub.cpp @@ -140,4 +140,4 @@ std::uint32_t tt_VersimDevice::get_host_channel_size(std::uint32_t device_id, st std::map tt_VersimDevice::get_clocks() { return std::map(); } -tt_ClusterDescriptor* tt_VersimDevice::get_cluster_description() { return ndesc.get(); } +tt_ClusterDescriptor* tt_VersimDevice::get_cluster_description() { return cluster_descriptor.get(); } diff --git a/device/tt_cluster_descriptor.cpp b/device/tt_cluster_descriptor.cpp index 1dde5383..dc9b0b42 100644 --- a/device/tt_cluster_descriptor.cpp +++ b/device/tt_cluster_descriptor.cpp @@ -437,6 +437,10 @@ std::unique_ptr tt_ClusterDescriptor::create_from_yaml( return desc; } +std::unique_ptr tt_ClusterDescriptor::create() { + return tt_ClusterDescriptor::create_from_yaml(tt_ClusterDescriptor::get_cluster_descriptor_file_path()); +} + std::unique_ptr tt_ClusterDescriptor::create_mock_cluster( const std::vector &logical_device_ids, tt::ARCH arch) { std::unique_ptr desc = std::unique_ptr(new tt_ClusterDescriptor()); diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index 9d3aceae..631733b5 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -58,8 +58,8 @@ if(MASTER_PROJECT) add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/microbenchmark) endif() add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/api) -add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/pcie) add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/misc) +add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/pcie) add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/simulation) if($ENV{ARCH_NAME} STREQUAL "wormhole_b0") add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/wormhole) diff --git a/tests/api/test_cluster_descriptor.cpp b/tests/api/test_cluster_descriptor.cpp index 5545a1eb..97be7ab1 100644 --- a/tests/api/test_cluster_descriptor.cpp +++ b/tests/api/test_cluster_descriptor.cpp @@ -16,13 +16,6 @@ // TODO: Needed for detect_arch, remove when it is part of cluster descriptor. #include "umd/device/cluster.h" -inline std::unique_ptr get_cluster_desc() { - // TODO: remove getting manually cluster descriptor from yaml. - std::string yaml_path = tt_ClusterDescriptor::get_cluster_descriptor_file_path(); - - return tt_ClusterDescriptor::create_from_yaml(yaml_path); -} - TEST(ApiClusterDescriptorTest, DetectArch) { // TODO: This should be part of cluster descriptor. It is currently used like this from tt_metal. tt::ARCH arch = detect_arch(); @@ -43,7 +36,7 @@ TEST(ApiClusterDescriptorTest, DetectArch) { } TEST(ApiClusterDescriptorTest, BasicFunctionality) { - std::unique_ptr cluster_desc = get_cluster_desc(); + std::unique_ptr cluster_desc = tt_ClusterDescriptor::create(); if (cluster_desc == nullptr) { GTEST_SKIP() << "No chips present on the system. Skipping test."; diff --git a/tests/blackhole/test_silicon_driver_bh.cpp b/tests/blackhole/test_silicon_driver_bh.cpp index 735bad0d..8dd64dac 100644 --- a/tests/blackhole/test_silicon_driver_bh.cpp +++ b/tests/blackhole/test_silicon_driver_bh.cpp @@ -72,8 +72,7 @@ std::int32_t get_static_tlb_index(tt_xy_pair target) { std::set get_target_devices() { std::set target_devices; - std::unique_ptr cluster_desc_uniq = - tt_ClusterDescriptor::create_from_yaml(tt_ClusterDescriptor::get_cluster_descriptor_file_path()); + std::unique_ptr cluster_desc_uniq = tt_ClusterDescriptor::create(); for (int i = 0; i < cluster_desc_uniq->get_number_of_chips(); i++) { target_devices.insert(i); } @@ -88,7 +87,6 @@ TEST(SiliconDriverBH, CreateDestroy) { for (int i = 0; i < 50; i++) { Cluster device = Cluster( test_utils::GetAbsPath("tests/soc_descs/blackhole_140_arch_no_eth.yaml"), - tt_ClusterDescriptor::get_cluster_descriptor_file_path(), target_devices, num_host_mem_ch_per_mmio_device, false, @@ -107,7 +105,7 @@ TEST(SiliconDriverBH, CreateDestroy) { // { // std::unique_ptr cluster_desc_uniq = -// tt_ClusterDescriptor::create_from_yaml(tt_ClusterDescriptor::get_cluster_descriptor_file_path()); +// tt_ClusterDescriptor::create(); // if (cluster_desc_uniq->get_number_of_chips() != target_devices.size()) { // GTEST_SKIP() << "SiliconDriverWH.Harvesting skipped because it can only be run on a two chip nebula // system"; @@ -117,7 +115,6 @@ TEST(SiliconDriverBH, CreateDestroy) { // uint32_t num_host_mem_ch_per_mmio_device = 1; // Cluster device = Cluster( // "./tests/soc_descs/wormhole_b0_8x10.yaml", -// tt_ClusterDescriptor::get_cluster_descriptor_file_path(), // target_devices, // num_host_mem_ch_per_mmio_device, // false, @@ -143,7 +140,7 @@ TEST(SiliconDriverBH, CreateDestroy) { // std::unordered_map simulated_harvesting_masks = {{0, 30}, {1, 60}}; // { // std::unique_ptr cluster_desc_uniq = -// tt_ClusterDescriptor::create_from_yaml(tt_ClusterDescriptor::get_cluster_descriptor_file_path()); +// tt_ClusterDescriptor::create(); // if (cluster_desc_uniq->get_number_of_chips() != target_devices.size()) { // GTEST_SKIP() << "SiliconDriverWH.Harvesting skipped because it can only be run on a two chip nebula // system"; @@ -154,7 +151,6 @@ TEST(SiliconDriverBH, CreateDestroy) { // // Initialize the driver with a 1x1 descriptor and explictly do not perform harvesting // Cluster device = Cluster( // "./tests/soc_descs/wormhole_b0_1x1.yaml", -// tt_ClusterDescriptor::get_cluster_descriptor_file_path(), // target_devices, // num_host_mem_ch_per_mmio_device, // false, @@ -177,7 +173,7 @@ TEST(SiliconDriverBH, CreateDestroy) { // std::unordered_map simulated_harvesting_masks = {{0, 30}, {1, 60}}; // { // std::unique_ptr cluster_desc_uniq = -// tt_ClusterDescriptor::create_from_yaml(tt_ClusterDescriptor::get_cluster_descriptor_file_path()); +// tt_ClusterDescriptor::create(); // if (cluster_desc_uniq->get_number_of_chips() != target_devices.size()) { // GTEST_SKIP() << "SiliconDriverWH.Harvesting skipped because it can only be run on a two chip nebula // system"; @@ -188,7 +184,6 @@ TEST(SiliconDriverBH, CreateDestroy) { // Cluster device = Cluster( // "./tests/soc_descs/wormhole_b0_8x10.yaml", -// tt_ClusterDescriptor::get_cluster_descriptor_file_path(), // target_devices, // num_host_mem_ch_per_mmio_device, // false, diff --git a/tests/galaxy/test_umd_concurrent_threads.cpp b/tests/galaxy/test_umd_concurrent_threads.cpp index e668160f..f7f6a8d5 100644 --- a/tests/galaxy/test_umd_concurrent_threads.cpp +++ b/tests/galaxy/test_umd_concurrent_threads.cpp @@ -24,8 +24,7 @@ static const std::string SOC_DESC_PATH = "tests/soc_descs/wormhole_b0_8x10.yaml" TEST(GalaxyConcurrentThreads, WriteToAllChipsL1) { // Galaxy Setup - std::string cluster_desc_path = tt_ClusterDescriptor::get_cluster_descriptor_file_path(); - std::shared_ptr cluster_desc = tt_ClusterDescriptor::create_from_yaml(cluster_desc_path); + std::shared_ptr cluster_desc = tt_ClusterDescriptor::create(); std::set target_devices_th1 = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}; std::set target_devices_th2 = {17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32}; std::set all_devices = {}; @@ -50,13 +49,8 @@ TEST(GalaxyConcurrentThreads, WriteToAllChipsL1) { uint32_t num_host_mem_ch_per_mmio_device = 1; - Cluster device = Cluster( - test_utils::GetAbsPath(SOC_DESC_PATH), - cluster_desc_path, - all_devices, - num_host_mem_ch_per_mmio_device, - false, - true); + Cluster device = + Cluster(test_utils::GetAbsPath(SOC_DESC_PATH), all_devices, num_host_mem_ch_per_mmio_device, false, true); const auto sdesc_per_chip = device.get_virtual_soc_descriptors(); tt::umd::test::utils::set_params_for_remote_txn(device); @@ -127,8 +121,7 @@ TEST(GalaxyConcurrentThreads, WriteToAllChipsL1) { TEST(GalaxyConcurrentThreads, WriteToAllChipsDram) { // Galaxy Setup - std::string cluster_desc_path = tt_ClusterDescriptor::get_cluster_descriptor_file_path(); - std::shared_ptr cluster_desc = tt_ClusterDescriptor::create_from_yaml(cluster_desc_path); + std::shared_ptr cluster_desc = tt_ClusterDescriptor::create(); std::set target_devices_th1 = {0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30, 32}; std::set target_devices_th2 = {1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31}; std::set all_devices = {}; @@ -153,13 +146,8 @@ TEST(GalaxyConcurrentThreads, WriteToAllChipsDram) { uint32_t num_host_mem_ch_per_mmio_device = 1; - Cluster device = Cluster( - test_utils::GetAbsPath(SOC_DESC_PATH), - cluster_desc_path, - all_devices, - num_host_mem_ch_per_mmio_device, - false, - true); + Cluster device = + Cluster(test_utils::GetAbsPath(SOC_DESC_PATH), all_devices, num_host_mem_ch_per_mmio_device, false, true); const auto sdesc_per_chip = device.get_virtual_soc_descriptors(); tt::umd::test::utils::set_params_for_remote_txn(device); @@ -233,8 +221,7 @@ TEST(GalaxyConcurrentThreads, WriteToAllChipsDram) { TEST(GalaxyConcurrentThreads, PushInputsWhileSignalingCluster) { // Galaxy Setup - std::string cluster_desc_path = tt_ClusterDescriptor::get_cluster_descriptor_file_path(); - std::shared_ptr cluster_desc = tt_ClusterDescriptor::create_from_yaml(cluster_desc_path); + std::shared_ptr cluster_desc = tt_ClusterDescriptor::create(); std::set target_devices = {0, 1, 2, 3, 4, 5, 6, 7, 8}; for (const auto& chip : target_devices) { // Verify that selected chips are in the cluster @@ -245,13 +232,8 @@ TEST(GalaxyConcurrentThreads, PushInputsWhileSignalingCluster) { uint32_t num_host_mem_ch_per_mmio_device = 1; - Cluster device = Cluster( - test_utils::GetAbsPath(SOC_DESC_PATH), - cluster_desc_path, - target_devices, - num_host_mem_ch_per_mmio_device, - false, - true); + Cluster device = + Cluster(test_utils::GetAbsPath(SOC_DESC_PATH), target_devices, num_host_mem_ch_per_mmio_device, false, true); const auto sdesc_per_chip = device.get_virtual_soc_descriptors(); tt::umd::test::utils::set_params_for_remote_txn(device); diff --git a/tests/galaxy/test_umd_remote_api.cpp b/tests/galaxy/test_umd_remote_api.cpp index 366ea05d..91e5d71a 100644 --- a/tests/galaxy/test_umd_remote_api.cpp +++ b/tests/galaxy/test_umd_remote_api.cpp @@ -21,8 +21,7 @@ static const std::string SOC_DESC_PATH = "tests/soc_descs/wormhole_b0_8x10.yaml" void run_remote_read_write_test(uint32_t vector_size, bool dram_write) { // Galaxy Setup - std::string cluster_desc_path = tt_ClusterDescriptor::get_cluster_descriptor_file_path(); - std::shared_ptr cluster_desc = tt_ClusterDescriptor::create_from_yaml(cluster_desc_path); + std::shared_ptr cluster_desc = tt_ClusterDescriptor::create(); std::set target_devices = {}; for (const auto& chip : cluster_desc->get_all_chips()) { target_devices.insert(chip); @@ -30,13 +29,8 @@ void run_remote_read_write_test(uint32_t vector_size, bool dram_write) { uint32_t num_host_mem_ch_per_mmio_device = 1; - Cluster device = Cluster( - test_utils::GetAbsPath(SOC_DESC_PATH), - cluster_desc_path, - target_devices, - num_host_mem_ch_per_mmio_device, - false, - true); + Cluster device = + Cluster(test_utils::GetAbsPath(SOC_DESC_PATH), target_devices, num_host_mem_ch_per_mmio_device, false, true); const auto sdesc_per_chip = device.get_virtual_soc_descriptors(); tt::umd::test::utils::set_params_for_remote_txn(device); @@ -136,8 +130,7 @@ TEST(GalaxyBasicReadWrite, LargeRemoteDramBlockReadWrite) { run_remote_read_writ void run_data_mover_test( uint32_t vector_size, tt_multichip_core_addr sender_core, tt_multichip_core_addr receiver_core) { // Galaxy Setup - std::string cluster_desc_path = tt_ClusterDescriptor::get_cluster_descriptor_file_path(); - std::shared_ptr cluster_desc = tt_ClusterDescriptor::create_from_yaml(cluster_desc_path); + std::shared_ptr cluster_desc = tt_ClusterDescriptor::create(); std::set target_devices = {}; for (const auto& chip : cluster_desc->get_all_chips()) { target_devices.insert(chip); @@ -154,13 +147,8 @@ void run_data_mover_test( uint32_t num_host_mem_ch_per_mmio_device = 1; - Cluster device = Cluster( - test_utils::GetAbsPath(SOC_DESC_PATH), - cluster_desc_path, - target_devices, - num_host_mem_ch_per_mmio_device, - false, - true); + Cluster device = + Cluster(test_utils::GetAbsPath(SOC_DESC_PATH), target_devices, num_host_mem_ch_per_mmio_device, false, true); tt::umd::test::utils::set_params_for_remote_txn(device); @@ -259,8 +247,7 @@ TEST(GalaxyDataMovement, TwoChipMoveData4) { void run_data_broadcast_test( uint32_t vector_size, tt_multichip_core_addr sender_core, std::vector receiver_cores) { // Galaxy Setup - std::string cluster_desc_path = tt_ClusterDescriptor::get_cluster_descriptor_file_path(); - std::shared_ptr cluster_desc = tt_ClusterDescriptor::create_from_yaml(cluster_desc_path); + std::shared_ptr cluster_desc = tt_ClusterDescriptor::create(); std::set target_devices = {}; for (const auto& chip : cluster_desc->get_all_chips()) { target_devices.insert(chip); @@ -279,13 +266,8 @@ void run_data_broadcast_test( uint32_t num_host_mem_ch_per_mmio_device = 1; - Cluster device = Cluster( - test_utils::GetAbsPath(SOC_DESC_PATH), - cluster_desc_path, - target_devices, - num_host_mem_ch_per_mmio_device, - false, - true); + Cluster device = + Cluster(test_utils::GetAbsPath(SOC_DESC_PATH), target_devices, num_host_mem_ch_per_mmio_device, false, true); tt::umd::test::utils::set_params_for_remote_txn(device); diff --git a/tests/galaxy/test_umd_remote_api_stability.cpp b/tests/galaxy/test_umd_remote_api_stability.cpp index 86416e4d..0204f245 100644 --- a/tests/galaxy/test_umd_remote_api_stability.cpp +++ b/tests/galaxy/test_umd_remote_api_stability.cpp @@ -33,8 +33,7 @@ class WormholeGalaxyStabilityTestFixture : public WormholeTestFixture { static uint32_t scale_number_of_tests; static void SetUpTestSuite() { - std::unique_ptr cluster_desc = - tt_ClusterDescriptor::create_from_yaml(tt_ClusterDescriptor::get_cluster_descriptor_file_path()); + std::unique_ptr cluster_desc = tt_ClusterDescriptor::create(); detected_num_chips = cluster_desc->get_number_of_chips(); if (detected_num_chips < EXPECTED_MIN_CHIPS) { skip_tests = true; diff --git a/tests/grayskull/test_silicon_driver.cpp b/tests/grayskull/test_silicon_driver.cpp index 04af85bb..8954ae56 100644 --- a/tests/grayskull/test_silicon_driver.cpp +++ b/tests/grayskull/test_silicon_driver.cpp @@ -71,7 +71,6 @@ TEST(SiliconDriverGS, CustomSocDesc) { // Initialize the driver with a 1x1 descriptor and explictly do not perform harvesting Cluster device = Cluster( test_utils::GetAbsPath("./tests/soc_descs/grayskull_1x1_arch.yaml"), - tt_ClusterDescriptor::get_cluster_descriptor_file_path(), target_devices, num_host_mem_ch_per_mmio_device, false, @@ -517,7 +516,6 @@ TEST(SiliconDriverGS, MultiThreadedMemBar) { // this tests takes ~5 mins to run TEST(SiliconDriverGS, SysmemTestWithPcie) { Cluster cluster( test_utils::GetAbsPath("tests/soc_descs/grayskull_10x12.yaml"), - "", // test_utils::GetClusterDescYAML(), {0}, 1, // one "host memory channel", currently a 1G huge page false, // skip driver allocs - no (don't skip) diff --git a/tests/misc/CMakeLists.txt b/tests/misc/CMakeLists.txt index fbde42df..4f34dd5b 100644 --- a/tests/misc/CMakeLists.txt +++ b/tests/misc/CMakeLists.txt @@ -1,7 +1,15 @@ -set(UMD_MISC_TESTS_SRCS test_semver.cpp) +set(UMD_MISC_TESTS_SRCS + test_semver.cpp + test_logger.cpp +) add_executable(umd_misc_tests ${UMD_MISC_TESTS_SRCS}) -target_link_libraries(umd_misc_tests PRIVATE test_common) +target_link_libraries( + umd_misc_tests + PRIVATE + test_common + spdlog::spdlog_header_only +) set_target_properties( umd_misc_tests PROPERTIES diff --git a/tests/misc/test_logger.cpp b/tests/misc/test_logger.cpp new file mode 100644 index 00000000..1ed54ac2 --- /dev/null +++ b/tests/misc/test_logger.cpp @@ -0,0 +1,225 @@ +/* + * SPDX-FileCopyrightText: (c) 2024 Tenstorrent Inc. + * + * SPDX-License-Identifier: Apache-2.0 + */ + +#include + +#include +#include +#include + +#include "common/logger_.hpp" +#include "common/timestamp.hpp" + +using namespace tt::umd::util; + +class LoggerTest : public ::testing::Test { +protected: + std::filesystem::path temp_dir; + std::filesystem::path log_file; + + void SetUp() override { + // A bit of a hack - logger is only intended to be initialized once per + // process, but we need to reset it for each test. + tt::umd::logger::detail::is_initialized.store(false); + + std::string tmpl = (std::filesystem::temp_directory_path() / "logger_test_XXXXXX").string(); + int fd = mkstemp(tmpl.data()); + if (fd == -1) { + throw std::runtime_error("Failed to create temporary file"); + } + close(fd); + log_file = tmpl; + temp_dir = log_file.parent_path(); + } + + void TearDown() override { + std::filesystem::remove_all(log_file); + spdlog::shutdown(); + } + + // Helper to read entire file content + std::string read_log_file() { + std::ifstream file(log_file); + std::stringstream buf; + buf << file.rdbuf(); + return buf.str(); + } +}; + +TEST_F(LoggerTest, BasicLogging) { + // Initialize logger with our test configuration + tt::umd::logger::Options options; + options.log_to_stderr = true; + options.filename = log_file.string(); + options.pattern = "%v"; // Simple pattern for easier testing + tt::umd::logger::initialize(options); + + // Write some test messages + UMD_INFO("Test message 1"); + UMD_INFO("Test message 2"); + UMD_INFO("Test message 4"); + UMD_INFO("Test message 3"); + + // Force flush by destroying the logger + spdlog::drop_all(); + + auto log_content = read_log_file(); + + // Verify log content + EXPECT_TRUE(log_content.find("Test message 1") != std::string::npos); + EXPECT_TRUE(log_content.find("Test message 2") != std::string::npos); + EXPECT_TRUE(log_content.find("Test message 3") != std::string::npos); + EXPECT_TRUE(log_content.find("Test message 4") != std::string::npos); +} + +TEST_F(LoggerTest, LogLevels) { + tt::umd::logger::Options options; + options.log_to_stderr = true; + options.filename = log_file.string(); + options.pattern = "%v"; + options.log_level = spdlog::level::info; // Set to INFO level + tt::umd::logger::initialize(options); + + UMD_DEBUG("Debug message"); // Shouldn't appear + UMD_INFO("Info message"); // Should appear + UMD_ERROR("Error message"); // Should appear + + spdlog::drop_all(); + + auto log_content = read_log_file(); + + EXPECT_EQ(log_content.find("Debug message"), std::string::npos); + EXPECT_TRUE(log_content.find("Info message") != std::string::npos); + EXPECT_TRUE(log_content.find("Error message") != std::string::npos); +} + +TEST_F(LoggerTest, FormatPatterns) { + tt::umd::logger::Options options; + options.log_to_stderr = false; + options.filename = log_file.string(); + options.pattern = "[%l] %v"; // Level and message + tt::umd::logger::initialize(options); + + UMD_INFO("Test message"); + + spdlog::drop_all(); + + auto log_content = read_log_file(); + + EXPECT_TRUE(log_content.find("[info] Test message") != std::string::npos); +} + +TEST_F(LoggerTest, MultipleInitialization) { + tt::umd::logger::Options options; + options.log_to_stderr = false; + options.filename = log_file.string(); + options.pattern = "%v"; + + // Initialize multiple times - should use first initialization only + tt::umd::logger::initialize(options); + + UMD_INFO("First message"); + + options.pattern = "DIFFERENT: %v"; + tt::umd::logger::initialize(options); // Should be ignored + + UMD_INFO("Second message"); + + spdlog::drop_all(); + + auto log_content = read_log_file(); + + EXPECT_TRUE(log_content.find("First message") != std::string::npos); + EXPECT_TRUE(log_content.find("Second message") != std::string::npos); + EXPECT_EQ(log_content.find("DIFFERENT:"), std::string::npos); +} + +/** + * The next few tests aren't really unit tests - just a mechanism to understand + * the performance of the logger. A log message that isn't printed (i.e. the + * log level suppresses it) is a single-digit nanosecond penalty in a release + * build on EPYC 7713 -- so pretty cheap. + */ +TEST_F(LoggerTest, DiskPerformance) { + const size_t num_messages = 10; + tt::umd::logger::Options options; + options.log_to_stderr = false; + options.filename = log_file.string(); + options.log_level = spdlog::level::info; + tt::umd::logger::initialize(options); + + // Actually logged + { + Timestamp ts; + for (size_t i = 0; i < num_messages; i++) { + UMD_INFO("Test message"); + } + std::cout << ts.to_string() << " for " << num_messages << " messages to disk" << std::endl; + } + + // Not logged - should be faster + { + Timestamp ts; + for (size_t i = 0; i < num_messages; i++) { + UMD_TRACE("Shouldn't be logged"); + } + std::cout << ts.to_string() << " for " << num_messages << " messages below level threshold" << std::endl; + } +} + +TEST_F(LoggerTest, StderrPerformance) { + const size_t num_messages = 10; + tt::umd::logger::Options options; + options.log_to_stderr = true; + options.filename = ""; + options.log_level = spdlog::level::info; + tt::umd::logger::initialize(options); + + // Actually logged + { + Timestamp ts; + for (size_t i = 0; i < num_messages; i++) { + UMD_INFO("Test message"); + } + std::cout << ts.to_string() << " for " << num_messages << " messages to stderr" << std::endl; + } + + // Not logged - should be faster + { + Timestamp ts; + for (size_t i = 0; i < num_messages; i++) { + UMD_TRACE("Shouldn't be logged"); + } + std::cout << ts.to_string() << " for " << num_messages << " messages below level threshold" << std::endl; + } +} + +TEST_F(LoggerTest, StderrAndDiskPerformance) { + const size_t num_messages = 10; + tt::umd::logger::Options options; + options.log_to_stderr = true; + options.filename = log_file.string(); + options.log_level = spdlog::level::info; + tt::umd::logger::initialize(options); + + // Actually logged + { + Timestamp ts; + for (size_t i = 0; i < num_messages; i++) { + UMD_INFO("Test message"); + } + std::cout << ts.to_string() << " for " << num_messages << " messages to disk & stderr" << std::endl; + } + + // Not logged - should be faster + { + Timestamp ts; + for (size_t i = 0; i < num_messages; i++) { + UMD_TRACE("Shouldn't be logged"); + } + std::cout << ts.to_string() << " for " << num_messages << " messages below level threshold" << std::endl; + } +} diff --git a/tests/wormhole/test_silicon_driver_wh.cpp b/tests/wormhole/test_silicon_driver_wh.cpp index 8ad98b5a..130eae8c 100644 --- a/tests/wormhole/test_silicon_driver_wh.cpp +++ b/tests/wormhole/test_silicon_driver_wh.cpp @@ -73,8 +73,7 @@ std::int32_t get_static_tlb_index(tt_xy_pair target) { std::set get_target_devices() { std::set target_devices; - std::unique_ptr cluster_desc_uniq = - tt_ClusterDescriptor::create_from_yaml(tt_ClusterDescriptor::get_cluster_descriptor_file_path()); + std::unique_ptr cluster_desc_uniq = tt_ClusterDescriptor::create(); for (int i = 0; i < cluster_desc_uniq->get_number_of_chips(); i++) { target_devices.insert(i); } @@ -89,7 +88,6 @@ TEST(SiliconDriverWH, CreateDestroy) { for (int i = 0; i < 50; i++) { Cluster device = Cluster( test_utils::GetAbsPath("tests/soc_descs/wormhole_b0_1x1.yaml"), - tt_ClusterDescriptor::get_cluster_descriptor_file_path(), target_devices, num_host_mem_ch_per_mmio_device, false, @@ -131,7 +129,6 @@ TEST(SiliconDriverWH, CustomSocDesc) { // Initialize the driver with a 1x1 descriptor and explictly do not perform harvesting Cluster device = Cluster( test_utils::GetAbsPath("tests/soc_descs/wormhole_b0_1x1.yaml"), - tt_ClusterDescriptor::get_cluster_descriptor_file_path(), target_devices, num_host_mem_ch_per_mmio_device, false, @@ -875,7 +872,6 @@ TEST(SiliconDriverWH, RandomSysmemTestWithPcie) { Cluster cluster( test_utils::GetAbsPath("tests/soc_descs/wormhole_b0_8x10.yaml"), - tt_ClusterDescriptor::get_cluster_descriptor_file_path(), target_devices, num_channels, false, // skip driver allocs - no (don't skip) @@ -945,7 +941,6 @@ TEST(SiliconDriverWH, LargeAddressTlb) { Cluster cluster( test_utils::GetAbsPath("tests/soc_descs/wormhole_b0_8x10.yaml"), - tt_ClusterDescriptor::get_cluster_descriptor_file_path(), target_devices, num_channels, false, // skip driver allocs - no (don't skip) diff --git a/tests/wormhole/test_umd_remote_api_stability.cpp b/tests/wormhole/test_umd_remote_api_stability.cpp index 26978a2b..ae41d137 100644 --- a/tests/wormhole/test_umd_remote_api_stability.cpp +++ b/tests/wormhole/test_umd_remote_api_stability.cpp @@ -33,8 +33,7 @@ class WormholeNebulaX2TestFixture : public WormholeTestFixture { static uint32_t scale_number_of_tests; static void SetUpTestSuite() { - std::unique_ptr cluster_desc = - tt_ClusterDescriptor::create_from_yaml(tt_ClusterDescriptor::get_cluster_descriptor_file_path()); + std::unique_ptr cluster_desc = tt_ClusterDescriptor::create(); detected_num_chips = cluster_desc->get_number_of_chips(); if (detected_num_chips != EXPECTED_NUM_CHIPS) { skip_tests = true;