Skip to content

Commit

Permalink
Simplify Cluster constructors (#277)
Browse files Browse the repository at this point in the history
Work towards removing parameters from default Cluster constructor
- Remove target devices
- Remove cluster descriptor
- Remove soc descriptor
- Add constructor with only target devices
  • Loading branch information
pjanevskiTT authored Nov 18, 2024
1 parent c49cbfb commit 852999c
Show file tree
Hide file tree
Showing 16 changed files with 322 additions and 186 deletions.
31 changes: 31 additions & 0 deletions common/utils.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
/*
* SPDX-FileCopyrightText: (c) 2024 Tenstorrent Inc.
*
* SPDX-License-Identifier: Apache-2.0
*/

#pragma once

#include <filesystem>
#include <iostream>
#include <string>

namespace tt::umd::utils {

std::string get_abs_path(std::string path) {
// Note that __FILE__ might be resolved at compile time to an absolute or relative address, depending on the
// compiler.
std::filesystem::path current_file_path = std::filesystem::path(__FILE__);
std::filesystem::path umd_root;
if (current_file_path.is_absolute()) {
umd_root = current_file_path.parent_path().parent_path();
} else {
std::filesystem::path umd_root_relative =
std::filesystem::relative(std::filesystem::path(__FILE__).parent_path().parent_path(), "../");
umd_root = std::filesystem::canonical(umd_root_relative);
}
std::filesystem::path abs_path = umd_root / path;
return abs_path.string();
}

} // namespace tt::umd::utils
127 changes: 99 additions & 28 deletions device/cluster.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,8 @@
#include <dirent.h>
#include <errno.h>

#include "tt_arch_types.h"
#include "tt_cluster_descriptor.h"
#include "yaml-cpp/yaml.h"
#include "common/logger.hpp"

Expand Down Expand Up @@ -103,7 +105,7 @@ void size_buffer_to_capacity(std::vector<T> &data_buf, std::size_t size_in_bytes

// TODO: To be removed when tt_device is removed

tt_device::tt_device(const std::string& sdesc_path) : soc_descriptor_per_chip({}) {
tt_device::tt_device() : soc_descriptor_per_chip({}) {
}

tt_device::~tt_device() {
Expand Down Expand Up @@ -300,30 +302,12 @@ std::unordered_map<chip_id_t, uint32_t> Cluster::get_harvesting_masks_for_soc_de
return default_harvesting_masks;
}

Cluster::Cluster(const std::string &sdesc_path, const std::string &ndesc_path, const std::set<chip_id_t> &target_devices,
const uint32_t &num_host_mem_ch_per_mmio_device, const bool skip_driver_allocs,
const bool clean_system_resources, bool perform_harvesting, std::unordered_map<chip_id_t, uint32_t> simulated_harvesting_masks) : tt_device(sdesc_path) {
std::unordered_set<chip_id_t> target_mmio_device_ids;
target_devices_in_cluster = target_devices;
arch_name = tt_SocDescriptor(sdesc_path).arch;
perform_harvesting_on_sdesc = perform_harvesting;

auto available_device_ids = detect_available_device_ids();
m_num_pci_devices = available_device_ids.size();

if (!skip_driver_allocs) {
log_info(LogSiliconDriver, "Detected {} PCI device{} : {}", m_num_pci_devices, (m_num_pci_devices > 1) ? "s":"", available_device_ids);
log_debug(LogSiliconDriver, "Passed target devices: {}", target_devices);
}
void Cluster::construct_cluster(const std::string& sdesc_path, const uint32_t &num_host_mem_ch_per_mmio_device, const bool skip_driver_allocs,
const bool clean_system_resources, bool perform_harvesting, std::unordered_map<chip_id_t, uint32_t> simulated_harvesting_masks) {

std::string cluster_descriptor_path = ndesc_path;
if (cluster_descriptor_path == "") {
cluster_descriptor_path = tt_ClusterDescriptor::get_cluster_descriptor_file_path();
}

ndesc = tt_ClusterDescriptor::create_from_yaml(cluster_descriptor_path);

for (auto &d: target_devices){
std::unordered_set<chip_id_t> target_mmio_device_ids;
for (auto &d: target_devices_in_cluster){
log_assert(ndesc->get_all_chips().find(d) != ndesc->get_all_chips().end(), "Target device {} not present in current cluster!", d);
if (ndesc->is_chip_mmio_capable(d)){
target_mmio_device_ids.insert(d);
}
Expand Down Expand Up @@ -357,7 +341,7 @@ Cluster::Cluster(const std::string &sdesc_path, const std::string &ndesc_path, c

translation_tables_en = false;
for(auto& masks : harvesting_masks) {
if(target_devices.find(masks.first) != target_devices.end()) {
if(target_devices_in_cluster.find(masks.first) != target_devices_in_cluster.end()) {
harvested_rows_per_target[masks.first] = get_harvested_noc_rows(masks.second);
noc_translation_enabled_for_chip[masks.first] = noc_translation_enabled.at(masks.first);
num_rows_harvested.insert({masks.first, std::bitset<32>(masks.second).count()});
Expand Down Expand Up @@ -386,7 +370,7 @@ Cluster::Cluster(const std::string &sdesc_path, const std::string &ndesc_path, c
}
else if(arch_name == tt::ARCH::BLACKHOLE) {
// Default harvesting info for Blackhole, describing no harvesting
for(auto chip_id = target_devices.begin(); chip_id != target_devices.end(); chip_id++){
for(auto chip_id = target_devices_in_cluster.begin(); chip_id != target_devices_in_cluster.end(); chip_id++){
harvested_rows_per_target[*chip_id] = 0; //get_harvested_noc_rows_for_chip(*chip_id);
num_rows_harvested.insert({*chip_id, 0}); // Only set for broadcast TLB to get RISCS out of reset. We want all rows to have a reset signal sent.
if(harvested_rows_per_target[*chip_id]) {
Expand All @@ -396,7 +380,7 @@ Cluster::Cluster(const std::string &sdesc_path, const std::string &ndesc_path, c
}
else if(arch_name == tt::ARCH::GRAYSKULL) {
// Multichip harvesting is supported for GS.
for(auto chip_id = target_devices.begin(); chip_id != target_devices.end(); chip_id++){
for(auto chip_id = target_devices_in_cluster.begin(); chip_id != target_devices_in_cluster.end(); chip_id++){
harvested_rows_per_target[*chip_id] = get_harvested_noc_rows_for_chip(*chip_id);
num_rows_harvested.insert({*chip_id, 0}); // Only set for broadcast TLB to get RISCS out of reset. We want all rows to have a reset signal sent.
if(harvested_rows_per_target[*chip_id]) {
Expand All @@ -407,7 +391,7 @@ Cluster::Cluster(const std::string &sdesc_path, const std::string &ndesc_path, c

if(simulated_harvesting_masks.size()) {
performed_harvesting = true;
for (auto device_id = target_devices.begin(); device_id != target_devices.end(); device_id++) {
for (auto device_id = target_devices_in_cluster.begin(); device_id != target_devices_in_cluster.end(); device_id++) {
log_assert(simulated_harvesting_masks.find(*device_id) != simulated_harvesting_masks.end(), "Could not find harvesting mask for device_id {}", *device_id);
if(arch_name == tt::ARCH::GRAYSKULL) {
if ((simulated_harvesting_masks.at(*device_id) & harvested_rows_per_target[*device_id]) != harvested_rows_per_target[*device_id]) {
Expand Down Expand Up @@ -456,7 +440,94 @@ Cluster::Cluster(const std::string &sdesc_path, const std::string &ndesc_path, c

// Default initialize noc_params based on detected arch
noc_params = architecture_implementation->get_noc_params();
}

Cluster::Cluster(const uint32_t &num_host_mem_ch_per_mmio_device, const bool skip_driver_allocs,
const bool clean_system_resources, bool perform_harvesting, std::unordered_map<chip_id_t, uint32_t> simulated_harvesting_masks) : tt_device() {
// TODO: this should be fetched through ClusterDescriptor
auto available_device_ids = detect_available_device_ids();
m_num_pci_devices = available_device_ids.size();

int physical_device_id = available_device_ids[0];
// TODO: remove logical_device_id
PCIDevice pci_device (physical_device_id, 0);
tt::ARCH device_arch = pci_device.get_arch();

std::string sdesc_path = tt_SocDescriptor::get_soc_descriptor_path(device_arch);

arch_name = tt_SocDescriptor(sdesc_path).arch;
perform_harvesting_on_sdesc = perform_harvesting;

if (!skip_driver_allocs) {
log_info(LogSiliconDriver, "Detected {} PCI device{} : {}", m_num_pci_devices, (m_num_pci_devices > 1) ? "s":"", available_device_ids);
log_debug(LogSiliconDriver, "Passed target devices: {}", target_devices);
}

std::string ndesc_path = tt_ClusterDescriptor::get_cluster_descriptor_file_path();
ndesc = tt_ClusterDescriptor::create_from_yaml(ndesc_path);

std::set<chip_id_t> target_devices;
for(const chip_id_t &d : ndesc->get_all_chips()) {
target_devices.insert(d);
}
target_devices_in_cluster = target_devices;

construct_cluster(sdesc_path, num_host_mem_ch_per_mmio_device, skip_driver_allocs, clean_system_resources, perform_harvesting, simulated_harvesting_masks);
}

Cluster::Cluster(const std::set<chip_id_t> &target_devices, const uint32_t &num_host_mem_ch_per_mmio_device, const bool skip_driver_allocs,
const bool clean_system_resources, bool perform_harvesting, std::unordered_map<chip_id_t, uint32_t> simulated_harvesting_masks) : tt_device() {
// TODO: this should be fetched through ClusterDescriptor
auto available_device_ids = detect_available_device_ids();
m_num_pci_devices = available_device_ids.size();

int physical_device_id = available_device_ids[0];
// TODO: remove logical_device_id
PCIDevice pci_device (physical_device_id, 0);
tt::ARCH device_arch = pci_device.get_arch();

std::string sdesc_path = tt_SocDescriptor::get_soc_descriptor_path(device_arch);

arch_name = tt_SocDescriptor(sdesc_path).arch;
perform_harvesting_on_sdesc = perform_harvesting;

if (!skip_driver_allocs) {
log_info(LogSiliconDriver, "Detected {} PCI device{} : {}", m_num_pci_devices, (m_num_pci_devices > 1) ? "s":"", available_device_ids);
log_debug(LogSiliconDriver, "Passed target devices: {}", target_devices);
}

std::string ndesc_path = tt_ClusterDescriptor::get_cluster_descriptor_file_path();
ndesc = tt_ClusterDescriptor::create_from_yaml(ndesc_path);

target_devices_in_cluster = target_devices;

construct_cluster(sdesc_path, num_host_mem_ch_per_mmio_device, skip_driver_allocs, clean_system_resources, perform_harvesting, simulated_harvesting_masks);
}

Cluster::Cluster(const std::string &sdesc_path, const std::string &ndesc_path, const std::set<chip_id_t> &target_devices,
const uint32_t &num_host_mem_ch_per_mmio_device, const bool skip_driver_allocs,
const bool clean_system_resources, bool perform_harvesting, std::unordered_map<chip_id_t, uint32_t> simulated_harvesting_masks) : tt_device() {
// TODO: this should be fetched through ClusterDescriptor
auto available_device_ids = detect_available_device_ids();
m_num_pci_devices = available_device_ids.size();

target_devices_in_cluster = target_devices;
arch_name = tt_SocDescriptor(sdesc_path).arch;
perform_harvesting_on_sdesc = perform_harvesting;

if (!skip_driver_allocs) {
log_info(LogSiliconDriver, "Detected {} PCI device{} : {}", m_num_pci_devices, (m_num_pci_devices > 1) ? "s":"", available_device_ids);
log_debug(LogSiliconDriver, "Passed target devices: {}", target_devices);
}

std::string cluster_descriptor_path = ndesc_path;
if (cluster_descriptor_path == "") {
cluster_descriptor_path = tt_ClusterDescriptor::get_cluster_descriptor_file_path();
}

ndesc = tt_ClusterDescriptor::create_from_yaml(cluster_descriptor_path);

construct_cluster(sdesc_path, num_host_mem_ch_per_mmio_device, skip_driver_allocs, clean_system_resources, perform_harvesting, simulated_harvesting_masks);
}

void Cluster::configure_active_ethernet_cores_for_mmio_device(chip_id_t mmio_chip, const std::unordered_set<tt_xy_pair>& active_eth_cores_per_chip) {
Expand Down
37 changes: 33 additions & 4 deletions device/cluster.h
Original file line number Diff line number Diff line change
Expand Up @@ -220,7 +220,7 @@ struct tt_device_params {
class tt_device
{
public:
tt_device(const std::string& sdesc_path);
tt_device();
virtual ~tt_device();
// Setup/Teardown Functions
/**
Expand Down Expand Up @@ -606,10 +606,10 @@ namespace tt::umd {
*/
class Cluster: public tt_device
{
public:
public:
// Constructor
/**
* Silicon Driver constructor.
* Cluster constructor.
*
* @param sdesc_path SOC descriptor specifying single chip.
* @param ndesc_path Network Descriptor specifying the network topology of the system.
Expand All @@ -624,6 +624,32 @@ class Cluster: public tt_device
const uint32_t &num_host_mem_ch_per_mmio_device = 1, const bool skip_driver_allocs = false,
const bool clean_system_resources = false, bool perform_harvesting = true, std::unordered_map<chip_id_t, uint32_t> simulated_harvesting_masks = {});

/**
* Cluster constructor. This constructor should be used to work towards removing all
* of the params from the constructor of tt_SiliconDevice (to become Cluster).
*
* @param num_host_mem_ch_per_mmio_device Requested number of host channels (hugepages).
* @param skip_driver_allocs
* @param clean_system_resource Specifies if host state from previous runs needs to be cleaned up.
* @param perform_harvesting Allow the driver to modify the SOC descriptors per chip.
* @param simulated_harvesting_masks
*/
Cluster(const uint32_t &num_host_mem_ch_per_mmio_device = 1, const bool skip_driver_allocs = false,
const bool clean_system_resources = false, bool perform_harvesting = true, std::unordered_map<chip_id_t, uint32_t> simulated_harvesting_masks = {});

/**
* Cluster constructor. This constructor should be used to target specific devices in a cluster.
*
* @param target_devices Devices to target.
* @param num_host_mem_ch_per_mmio_device Requested number of host channels (hugepages).
* @param skip_driver_allocs
* @param clean_system_resource Specifies if host state from previous runs needs to be cleaned up.
* @param perform_harvesting Allow the driver to modify the SOC descriptors per chip.
* @param simulated_harvesting_masks
*/
Cluster(const std::set<chip_id_t> &target_devices, const uint32_t &num_host_mem_ch_per_mmio_device = 1, const bool skip_driver_allocs = false,
const bool clean_system_resources = false, bool perform_harvesting = true, std::unordered_map<chip_id_t, uint32_t> simulated_harvesting_masks = {});

//Setup/Teardown Functions
virtual std::unordered_map<chip_id_t, tt_SocDescriptor>& get_virtual_soc_descriptors();
virtual void set_device_l1_address_params(const tt_device_l1_address_params& l1_address_params_);
Expand Down Expand Up @@ -710,7 +736,7 @@ class Cluster: public tt_device
// Destructor
virtual ~Cluster ();

private:
private:
// Helper functions
// Startup + teardown
void create_device(const std::unordered_set<chip_id_t> &target_mmio_device_ids, const uint32_t &num_host_mem_ch_per_mmio_device, const bool skip_driver_allocs, const bool clean_system_resources);
Expand Down Expand Up @@ -771,6 +797,9 @@ class Cluster: public tt_device
// This functions has to be called for local chip, and then it will wait for all connected remote chips to flush.
void wait_for_connected_non_mmio_flush(chip_id_t chip_id);

void construct_cluster(const std::string& sdesc_path, const uint32_t &num_host_mem_ch_per_mmio_device, const bool skip_driver_allocs,
const bool clean_system_resources, bool perform_harvesting, std::unordered_map<chip_id_t, uint32_t> simulated_harvesting_masks);

// State variables
tt_device_dram_address_params dram_address_params;
tt_device_l1_address_params l1_address_params;
Expand Down
2 changes: 1 addition & 1 deletion device/mockup/tt_mockup_device.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@

class tt_MockupDevice : public tt_device {
public:
tt_MockupDevice(const std::string& sdesc_path) : tt_device(sdesc_path) {
tt_MockupDevice(const std::string& sdesc_path) : tt_device() {
soc_descriptor_per_chip.emplace(0, tt_SocDescriptor(sdesc_path));
std::set<chip_id_t> target_devices = {0};
}
Expand Down
2 changes: 1 addition & 1 deletion device/simulation/tt_simulation_device.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@ void print_flatbuffer(const DeviceRequestResponse *buf){
std::cout << std::endl;
}

tt_SimulationDevice::tt_SimulationDevice(const std::string &sdesc_path) : tt_device(sdesc_path){
tt_SimulationDevice::tt_SimulationDevice(const std::string &sdesc_path) : tt_device(){
log_info(tt::LogEmulationDriver, "Instantiating simulation device");
soc_descriptor_per_chip.emplace(0, tt_SocDescriptor(sdesc_path));
std::set<chip_id_t> target_devices = {0};
Expand Down
6 changes: 3 additions & 3 deletions device/tt_cluster_descriptor.h
Original file line number Diff line number Diff line change
Expand Up @@ -33,10 +33,10 @@ enum BoardType : uint32_t {

class tt_ClusterDescriptor {

private:
private:
int get_ethernet_link_coord_distance(const eth_coord_t &location_a, const eth_coord_t &location_b) const;

protected:
protected:

std::unordered_map<chip_id_t, std::unordered_map<ethernet_channel_t, std::tuple<chip_id_t, ethernet_channel_t> > > ethernet_connections;
std::unordered_map<chip_id_t, eth_coord_t> chip_locations;
Expand Down Expand Up @@ -73,7 +73,7 @@ class tt_ClusterDescriptor {

void fill_chips_grouped_by_closest_mmio();

public:
public:
tt_ClusterDescriptor() = default;
tt_ClusterDescriptor(const tt_ClusterDescriptor&) = default;

Expand Down
32 changes: 32 additions & 0 deletions device/tt_device.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
// SPDX-FileCopyrightText: (c) 2023 Tenstorrent Inc.
//
// SPDX-License-Identifier: Apache-2.0


#ifdef TT_DEBUG_LOGGING
#define DEBUG_LOG(str) do { std::cout << str << std::endl; } while( false )
#else
#define DEBUG_LOG(str) ((void)0)
#endif

#include "tt_device.h"
#include "device/tt_cluster_descriptor_types.h"
#include <iostream>
#include <fstream>
#include <string>
#include <vector>
#include <unordered_map>
#include "yaml-cpp/yaml.h"

////////
// Device base
////////
tt_device::tt_device() : soc_descriptor_per_chip({}) {
}

tt_device::~tt_device() {
}

const tt_SocDescriptor& tt_device::get_soc_descriptor(chip_id_t chip_id) const {
return soc_descriptor_per_chip.at(chip_id);
}
18 changes: 18 additions & 0 deletions device/tt_soc_descriptor.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,8 @@
#include "yaml-cpp/yaml.h"
#include "tt_soc_descriptor.h"

#include "common/utils.hpp"

#include <assert.h>
#include <fstream>
#include <iostream>
Expand Down Expand Up @@ -273,6 +275,22 @@ bool tt_SocDescriptor::is_ethernet_core(const tt_xy_pair &core) const {
return this->ethernet_core_channel_map.find(core) != ethernet_core_channel_map.end();
}

std::string tt_SocDescriptor::get_soc_descriptor_path(tt::ARCH arch) {
switch (arch) {
case tt::ARCH::GRAYSKULL:
// TODO: this path needs to be changed to point to soc descriptors outside of tests directory.
return tt::umd::utils::get_abs_path("tests/soc_descs/grayskull_10x12.yaml");
case tt::ARCH::WORMHOLE_B0:
// TODO: this path needs to be changed to point to soc descriptors outside of tests directory.
return tt::umd::utils::get_abs_path("tests/soc_descs/wormhole_b0_8x10.yaml");
case tt::ARCH::BLACKHOLE:
// TODO: this path needs to be changed to point to soc descriptors outside of tests directory.
return tt::umd::utils::get_abs_path("tests/soc_descs/blackhole_140_arch_no_eth.yaml");
default:
throw std::runtime_error("Invalid architecture");
}
}

std::ostream &operator<<(std::ostream &out, const tt::ARCH &arch_name) {
if (arch_name == tt::ARCH::Invalid) {
out << "none";
Expand Down
Loading

0 comments on commit 852999c

Please sign in to comment.