Skip to content

Commit

Permalink
TLB Manager (#420)
Browse files Browse the repository at this point in the history
### Issue
Related to #417 

### Description
Introduction into TLB manager class. This class should hold maps between
tlb indexes and cores, and should hand out tlb_indicies. This might not
be completelly true after this PR, which was more focused on getting
things out from cluster.cpp file. But not too much stuff.

### List of the changes
- Created TLBManager class which lives inside TTDevice
- Added get_tlb_manager to TTDevice
- Moved dynamic_tlb_configs and dynamic_tlb_config_ordering maps.
- Moved configure_tlb main logic with maps that hold these mapped tlbs
- Moved all related functions from cluster.cpp
- These moved maps/functions are still used throughout cluster.cpp, but
this will be refactored further, so that the tlbmanager itself checks
and hands out the writer.

### Testing
Existing CI tests should cover this class.
Will try to add additional TLBManager focused tests.

### API Changes
There are no API changes in this PR.
  • Loading branch information
broskoTT authored Dec 26, 2024
1 parent b090032 commit 087bbe2
Show file tree
Hide file tree
Showing 16 changed files with 356 additions and 144 deletions.
1 change: 1 addition & 0 deletions device/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ target_sources(
chip/local_chip.cpp
chip/mock_chip.cpp
chip/remote_chip.cpp
tt_device/tlb_manager.cpp
cluster.cpp
coordinate_manager.cpp
cpuset_lib.cpp
Expand Down
6 changes: 6 additions & 0 deletions device/api/umd/device/architecture_implementation.h
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,12 @@ class architecture_implementation {
virtual const std::vector<uint32_t>& get_t6_x_locations() const = 0;
virtual const std::vector<uint32_t>& get_t6_y_locations() const = 0;

// TLB related. Move other functions here as well.
virtual std::pair<uint32_t, uint32_t> get_tlb_1m_base_and_count() const = 0;
virtual std::pair<uint32_t, uint32_t> get_tlb_2m_base_and_count() const = 0;
virtual std::pair<uint32_t, uint32_t> get_tlb_16m_base_and_count() const = 0;
virtual std::pair<uint32_t, uint32_t> get_tlb_4g_base_and_count() const = 0;

virtual std::tuple<xy_pair, xy_pair> multicast_workaround(xy_pair start, xy_pair end) const = 0;
virtual tlb_configuration get_tlb_configuration(uint32_t tlb_index) const = 0;
virtual std::pair<std::uint64_t, std::uint64_t> get_tlb_data(
Expand Down
12 changes: 12 additions & 0 deletions device/api/umd/device/blackhole_implementation.h
Original file line number Diff line number Diff line change
Expand Up @@ -304,6 +304,18 @@ class blackhole_implementation : public architecture_implementation {

const std::vector<uint32_t>& get_t6_y_locations() const override { return blackhole::T6_Y_LOCATIONS; }

std::pair<uint32_t, uint32_t> get_tlb_1m_base_and_count() const override { return {0, 0}; }

std::pair<uint32_t, uint32_t> get_tlb_2m_base_and_count() const override {
return {blackhole::TLB_BASE_2M, blackhole::TLB_COUNT_2M};
}

std::pair<uint32_t, uint32_t> get_tlb_16m_base_and_count() const override { return {0, 0}; }

std::pair<uint32_t, uint32_t> get_tlb_4g_base_and_count() const override {
return {blackhole::TLB_BASE_4G, blackhole::TLB_COUNT_4G};
}

std::tuple<xy_pair, xy_pair> multicast_workaround(xy_pair start, xy_pair end) const override;
tlb_configuration get_tlb_configuration(uint32_t tlb_index) const override;
std::pair<std::uint64_t, std::uint64_t> get_tlb_data(std::uint32_t tlb_index, const tlb_data& data) const override;
Expand Down
1 change: 1 addition & 0 deletions device/api/umd/device/chip/local_chip.h
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
#include "umd/device/chip/chip.h"

namespace tt::umd {

class LocalChip : public Chip {
public:
LocalChip(tt_SocDescriptor soc_descriptor, int pci_device_id);
Expand Down
12 changes: 1 addition & 11 deletions device/api/umd/device/cluster.h
Original file line number Diff line number Diff line change
Expand Up @@ -619,6 +619,7 @@ class Cluster : public tt_device {
// TODO: This should be accessible through public API, probably to be moved to tt_device.
PCIDevice* get_pci_device(int device_id) const;
TTDevice* get_tt_device(chip_id_t device_id) const;
TLBManager* get_tlb_manager(chip_id_t device_id) const;
const tt_SocDescriptor& get_soc_descriptor(chip_id_t chip_id) const;

// Existing API we want to remove. UMD is transitioning to use CoreCoord instead of tt_xy_pair.
Expand Down Expand Up @@ -836,15 +837,6 @@ class Cluster : public tt_device {
uint32_t* return_3 = nullptr,
uint32_t* return_4 = nullptr);

// TODO: These will be moved to a dedicated class for TLB management
bool address_in_tlb_space(
uint64_t address, uint32_t size_in_bytes, int32_t tlb_index, uint64_t tlb_size, uint32_t chip);
bool is_tlb_mapped(tt_cxy_pair target);
bool is_tlb_mapped(tt_cxy_pair target, uint64_t address, uint32_t size_in_bytes);
// Note that these maps holds only entries for local PCIe chips.
std::map<chip_id_t, std::unordered_map<int32_t, uint64_t>> tlb_config_map = {};
std::unordered_map<chip_id_t, std::unordered_map<tt_xy_pair, std::int32_t>> map_core_to_tlb_per_chip = {};

std::shared_ptr<boost::interprocess::named_mutex> get_mutex(const std::string& tlb_name, int logical_device_id);
virtual uint32_t get_harvested_noc_rows_for_chip(
int logical_device_id); // Returns one-hot encoded harvesting mask for PCIe mapped chips
Expand Down Expand Up @@ -914,8 +906,6 @@ class Cluster : public tt_device {
std::unordered_set<tt_xy_pair> eth_cores = {};
std::unordered_set<tt_xy_pair> dram_cores = {};

std::unordered_map<std::string, std::int32_t> dynamic_tlb_config = {};
std::unordered_map<std::string, uint64_t> dynamic_tlb_ordering_modes = {};
std::map<std::set<chip_id_t>, std::unordered_map<chip_id_t, std::vector<std::vector<int>>>> bcast_header_cache = {};
bool perform_harvesting_on_sdesc = false;
bool use_ethernet_ordered_writes = true;
Expand Down
14 changes: 14 additions & 0 deletions device/api/umd/device/grayskull_implementation.h
Original file line number Diff line number Diff line change
Expand Up @@ -294,6 +294,20 @@ class grayskull_implementation : public architecture_implementation {

const std::vector<uint32_t>& get_t6_y_locations() const override { return grayskull::T6_Y_LOCATIONS; }

std::pair<uint32_t, uint32_t> get_tlb_1m_base_and_count() const override {
return {grayskull::TLB_BASE_1M, grayskull::TLB_COUNT_1M};
}

std::pair<uint32_t, uint32_t> get_tlb_2m_base_and_count() const override {
return {grayskull::TLB_BASE_2M, grayskull::TLB_COUNT_2M};
}

std::pair<uint32_t, uint32_t> get_tlb_16m_base_and_count() const override {
return {grayskull::TLB_BASE_16M, grayskull::TLB_COUNT_16M};
}

std::pair<uint32_t, uint32_t> get_tlb_4g_base_and_count() const override { return {0, 0}; }

std::tuple<xy_pair, xy_pair> multicast_workaround(xy_pair start, xy_pair end) const override;
tlb_configuration get_tlb_configuration(uint32_t tlb_index) const override;
std::pair<std::uint64_t, std::uint64_t> get_tlb_data(std::uint32_t tlb_index, const tlb_data& data) const override;
Expand Down
51 changes: 51 additions & 0 deletions device/api/umd/device/tt_device/tlb_manager.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
/*
* SPDX-FileCopyrightText: (c) 2024 Tenstorrent Inc.
*
* SPDX-License-Identifier: Apache-2.0
*/
#pragma once

#include <unordered_map>

#include "umd/device/tt_xy_pair.h"
#include "umd/device/types/tlb.h"

namespace tt {
class Writer;
}

namespace tt::umd {

class TTDevice;

class TLBManager {
public:
TLBManager(TTDevice* tt_device);

// TODO: Think about proper API which doesn't accept two cores.
// core should be in VIRTUAL coords, and translated_core should be in TRANSLATED coords.
void configure_tlb(
tt_xy_pair core, tt_xy_pair translated_core, int32_t tlb_index, uint64_t address, uint64_t ordering);

void set_dynamic_tlb_config(std::string fallback_tlb_name, int32_t tlb_index);
void set_dynamic_tlb_config_ordering(std::string fallback_tlb_name, uint64_t ordering);

bool address_in_tlb_space(uint64_t address, uint32_t size_in_bytes, int32_t tlb_index, uint64_t tlb_size);
bool is_tlb_mapped(tt_xy_pair core);
bool is_tlb_mapped(tt_xy_pair core, uint64_t address, uint32_t size_in_bytes);

tt::Writer get_static_tlb_writer(tt_xy_pair core);
tlb_configuration get_tlb_configuration(tt_xy_pair core);

// TODO: the following members will be moved to private once enough stuff is moved out of cluster.
std::unordered_map<int32_t, uint64_t> tlb_config_map_;
std::unordered_map<tt_xy_pair, std::int32_t> map_core_to_tlb_;

std::unordered_map<std::string, std::int32_t> dynamic_tlb_config_;
std::unordered_map<std::string, uint64_t> dynamic_tlb_ordering_modes_;

private:
TTDevice* tt_device_;
};

} // namespace tt::umd
7 changes: 7 additions & 0 deletions device/api/umd/device/tt_device/tt_device.h
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@

#include "umd/device/architecture_implementation.h"
#include "umd/device/pci_device.hpp"
#include "umd/device/tt_device/tlb_manager.h"

// TODO: Should be moved to blackhole_architecture_implementation.h
// See /vendor_ip/synopsys/052021/bh_pcie_ctl_gen5/export/configuration/DWC_pcie_ctl.h
Expand All @@ -28,6 +29,8 @@ struct dynamic_tlb {

namespace tt::umd {

class TLBManager;

class TTDevice {
public:
/**
Expand All @@ -39,6 +42,9 @@ class TTDevice {

architecture_implementation *get_architecture_implementation();
PCIDevice *get_pci_device();
TLBManager *get_tlb_manager();

tt::ARCH get_arch();

void detect_hang_read(uint32_t data_read = c_hang_read_value);

Expand Down Expand Up @@ -114,6 +120,7 @@ class TTDevice {
protected:
std::unique_ptr<PCIDevice> pci_device_;
std::unique_ptr<architecture_implementation> architecture_impl_;
std::unique_ptr<TLBManager> tlb_manager_;
tt::ARCH arch;

bool is_hardware_hung();
Expand Down
2 changes: 1 addition & 1 deletion device/api/umd/device/tt_io.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ class Cluster;
* It is the caller's responsibility to manage the lifetime of Writer objects.
*/
class Writer {
friend class tt::umd::Cluster;
friend class tt::umd::TLBManager;

public:
/**
Expand Down
14 changes: 14 additions & 0 deletions device/api/umd/device/wormhole_implementation.h
Original file line number Diff line number Diff line change
Expand Up @@ -328,6 +328,20 @@ class wormhole_implementation : public architecture_implementation {

const std::vector<uint32_t>& get_t6_y_locations() const override { return wormhole::T6_Y_LOCATIONS; }

std::pair<uint32_t, uint32_t> get_tlb_1m_base_and_count() const override {
return {wormhole::TLB_BASE_1M, wormhole::TLB_COUNT_1M};
}

std::pair<uint32_t, uint32_t> get_tlb_2m_base_and_count() const override {
return {wormhole::TLB_BASE_2M, wormhole::TLB_COUNT_2M};
}

std::pair<uint32_t, uint32_t> get_tlb_16m_base_and_count() const override {
return {wormhole::TLB_BASE_16M, wormhole::TLB_COUNT_16M};
}

std::pair<uint32_t, uint32_t> get_tlb_4g_base_and_count() const override { return {0, 0}; }

std::tuple<xy_pair, xy_pair> multicast_workaround(xy_pair start, xy_pair end) const override;
tlb_configuration get_tlb_configuration(uint32_t tlb_index) const override;
std::pair<std::uint64_t, std::uint64_t> get_tlb_data(std::uint32_t tlb_index, const tlb_data& data) const override;
Expand Down
14 changes: 13 additions & 1 deletion device/chip/local_chip.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -6,14 +6,26 @@

#include "umd/device/chip/local_chip.h"

#include "umd/device/tt_device/tlb_manager.h"
#include "umd/device/tt_device/tt_device.h"

namespace tt::umd {

LocalChip::LocalChip(tt_SocDescriptor soc_descriptor, int pci_device_id) :
Chip(soc_descriptor), tt_device_(TTDevice::create(pci_device_id)) {}
Chip(soc_descriptor), tt_device_(TTDevice::create(pci_device_id)) {
auto tlb_manager = tt_device_->get_tlb_manager();
// Setup default dynamic tlbs.
tlb_manager->set_dynamic_tlb_config(
"LARGE_READ_TLB", tt_device_->get_architecture_implementation()->get_mem_large_read_tlb());
tlb_manager->set_dynamic_tlb_config(
"LARGE_WRITE_TLB", tt_device_->get_architecture_implementation()->get_mem_large_write_tlb());
tlb_manager->set_dynamic_tlb_config("REG_TLB", tt_device_->get_architecture_implementation()->get_reg_tlb());
tlb_manager->set_dynamic_tlb_config(
"SMALL_READ_WRITE_TLB", tt_device_->get_architecture_implementation()->get_small_read_write_tlb());
}

TTDevice* LocalChip::get_tt_device() { return tt_device_.get(); }

bool LocalChip::is_mmio_capable() const { return true; }

} // namespace tt::umd
Loading

0 comments on commit 087bbe2

Please sign in to comment.