Skip to content

Commit

Permalink
Merge branch 'main' into brosko/gcc
Browse files Browse the repository at this point in the history
  • Loading branch information
broskoTT authored Dec 12, 2024
2 parents 3d61038 + bf740bd commit 4398a70
Show file tree
Hide file tree
Showing 24 changed files with 889 additions and 646 deletions.
6 changes: 2 additions & 4 deletions device/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,15 +1,13 @@
set(POSITION_INDEPENDENT_CODE ON)

set(FBS_FILE ${PROJECT_SOURCE_DIR}/device/simulation/tt_simulation_device.fbs)
get_filename_component(FBS_FILE_NAME ${FBS_FILE} NAME)
get_filename_component(FBS_FILE_NAME ${FBS_FILE} NAME_WLE)
set(FBS_GENERATED_HEADER "${CMAKE_CURRENT_BINARY_DIR}/${FBS_FILE_NAME}_generated.h")
add_custom_command(
OUTPUT
${FBS_GENERATED_HEADER}
COMMAND
flatc
ARGS
--cpp -o "${CMAKE_CURRENT_BINARY_DIR}/" ${FBS_FILE}
flatc --cpp -o "${CMAKE_CURRENT_BINARY_DIR}/" ${FBS_FILE}
DEPENDS
flatc
${FBS_FILE}
Expand Down
8 changes: 4 additions & 4 deletions device/api/umd/device/blackhole_coordinate_manager.h
Original file line number Diff line number Diff line change
Expand Up @@ -29,10 +29,10 @@ class BlackholeCoordinateManager : public CoordinateManager {
void translate_dram_coords() override;
void translate_tensix_coords() override;

void fill_tensix_logical_to_translated() override;
void fill_eth_logical_to_translated() override;
void fill_pcie_logical_to_translated() override;
void fill_dram_logical_to_translated() override;
void fill_tensix_physical_translated_mapping() override;
void fill_eth_physical_translated_mapping() override;
void fill_pcie_physical_translated_mapping() override;
void fill_dram_physical_translated_mapping() override;

private:
void map_column_of_dram_banks(const size_t start_bank, const size_t end_bank, const size_t x_coord);
Expand Down
107 changes: 32 additions & 75 deletions device/api/umd/device/coordinate_manager.h
Original file line number Diff line number Diff line change
Expand Up @@ -16,20 +16,6 @@

class CoordinateManager {
public:
CoordinateManager(
const tt_xy_pair& tensix_grid_size,
const std::vector<tt_xy_pair>& tensix_cores,
const size_t tensix_harvesting_mask,
const tt_xy_pair& dram_grid_size,
const std::vector<tt_xy_pair>& dram_cores,
const size_t dram_harvesting_mask,
const tt_xy_pair& eth_grid_size,
const std::vector<tt_xy_pair>& eth_cores,
const tt_xy_pair& arc_grid_size,
const std::vector<tt_xy_pair>& arc_cores,
const tt_xy_pair& pcie_grid_size,
const std::vector<tt_xy_pair>& pcie_cores);

static std::shared_ptr<CoordinateManager> create_coordinate_manager(
tt::ARCH arch,
const tt_xy_pair& tensix_grid_size,
Expand Down Expand Up @@ -59,112 +45,81 @@ class CoordinateManager {
virtual ~CoordinateManager() = default;

private:
tt::umd::CoreCoord to_physical(const tt::umd::CoreCoord core_coord);
tt::umd::CoreCoord to_logical(const tt::umd::CoreCoord core_coord);
tt::umd::CoreCoord to_virtual(const tt::umd::CoreCoord core_coord);
tt::umd::CoreCoord to_translated(const tt::umd::CoreCoord core_coord);

static void assert_create_coordinate_manager(
const tt::ARCH arch, const size_t tensix_harvesting_mask, const size_t dram_harvesting_mask);

protected:
CoordinateManager(
const tt_xy_pair& tensix_grid_size,
const std::vector<tt_xy_pair>& tensix_cores,
const size_t tensix_harvesting_mask,
const tt_xy_pair& dram_grid_size,
const std::vector<tt_xy_pair>& dram_cores,
const size_t dram_harvesting_mask,
const tt_xy_pair& eth_grid_size,
const std::vector<tt_xy_pair>& eth_cores,
const tt_xy_pair& arc_grid_size,
const std::vector<tt_xy_pair>& arc_cores,
const tt_xy_pair& pcie_grid_size,
const std::vector<tt_xy_pair>& pcie_cores);

void initialize();

virtual void translate_tensix_coords();
virtual void translate_dram_coords();
virtual void translate_eth_coords();
virtual void translate_arc_coords();
virtual void translate_pcie_coords();

void identity_map_physical_cores();
void add_core_translation(const tt::umd::CoreCoord& core_coord, const tt_xy_pair& physical_pair);

/*
* Fills the logical to translated mapping for the tensix cores.
* By default, translated coordinates are the same as physical coordinates.
* Derived coordinate managers that need to implement different mapping
* should override this method. Wormhole and Blackhole coordinate managers
* override this method to implement different mapping.
*/
virtual void fill_tensix_logical_to_translated();
virtual void fill_tensix_physical_translated_mapping();

/*
* Fills the logical to translated mapping for the ethernet cores.
* Fills the physical to translated mapping for the ethernet cores.
* By default, translated coordinates are the same as physical coordinates.
* Derived coordinate managers that need to implement different mapping
* should override this method. Wormhole and Blackhole coordinate managers
* override this method to implement different mapping.
*/
virtual void fill_eth_logical_to_translated();
virtual void fill_eth_physical_translated_mapping();

/*
* Fills the logical to translated mapping for the DRAM cores.
* Fills the physical to translated mapping for the DRAM cores.
* By default, translated coordinates are the same as physical coordinates.
* Derived coordinate managers that need to implement different mapping
* should override this method. Blackhole coordinate manager overrides
* this method to implement different mapping.
*/
virtual void fill_dram_logical_to_translated();
virtual void fill_dram_physical_translated_mapping();

/*
* Fills the logical to translated mapping for the PCIE cores.
* Fills the physical to translated mapping for the PCIE cores.
* By default, translated coordinates are the same as physical coordinates.
* Derived coordinate managers that need to implement different mapping
* should override this method. Blackhole coordinate manager overrides
* this method to implement different mapping.
*/
virtual void fill_pcie_logical_to_translated();
virtual void fill_pcie_physical_translated_mapping();

/*
* Fills the logical to translated mapping for the ARC cores.
* Fills the physical to translated mapping for the ARC cores.
* By default, translated coordinates are the same as physical coordinates.
* Derived coordinate managers that need to implement different mapping
* should override this method.
*/
virtual void fill_arc_logical_to_translated();

std::map<tt_xy_pair, tt::umd::CoreCoord> tensix_logical_to_translated;
std::map<tt_xy_pair, tt::umd::CoreCoord> tensix_logical_to_virtual;
std::map<tt_xy_pair, tt::umd::CoreCoord> tensix_logical_to_physical;

std::map<tt_xy_pair, tt::umd::CoreCoord> tensix_physical_to_logical;
std::map<tt_xy_pair, tt::umd::CoreCoord> tensix_virtual_to_logical;
std::map<tt_xy_pair, tt::umd::CoreCoord> tensix_translated_to_logical;
virtual void fill_arc_physical_translated_mapping();

std::map<tt_xy_pair, tt::umd::CoreCoord> dram_logical_to_translated;
std::map<tt_xy_pair, tt::umd::CoreCoord> dram_logical_to_virtual;
std::map<tt_xy_pair, tt::umd::CoreCoord> dram_logical_to_physical;

std::map<tt_xy_pair, tt::umd::CoreCoord> dram_physical_to_logical;
std::map<tt_xy_pair, tt::umd::CoreCoord> dram_virtual_to_logical;
std::map<tt_xy_pair, tt::umd::CoreCoord> dram_translated_to_logical;

std::map<tt_xy_pair, tt::umd::CoreCoord> eth_logical_to_translated;
std::map<tt_xy_pair, tt::umd::CoreCoord> eth_logical_to_virtual;
std::map<tt_xy_pair, tt::umd::CoreCoord> eth_logical_to_physical;

std::map<tt_xy_pair, tt::umd::CoreCoord> eth_physical_to_logical;
std::map<tt_xy_pair, tt::umd::CoreCoord> eth_virtual_to_logical;
std::map<tt_xy_pair, tt::umd::CoreCoord> eth_translated_to_logical;

std::map<tt_xy_pair, tt::umd::CoreCoord> arc_logical_to_translated;
std::map<tt_xy_pair, tt::umd::CoreCoord> arc_logical_to_virtual;
std::map<tt_xy_pair, tt::umd::CoreCoord> arc_logical_to_physical;

std::map<tt_xy_pair, tt::umd::CoreCoord> arc_physical_to_logical;
std::map<tt_xy_pair, tt::umd::CoreCoord> arc_virtual_to_logical;
std::map<tt_xy_pair, tt::umd::CoreCoord> arc_translated_to_logical;

std::map<tt_xy_pair, tt::umd::CoreCoord> pcie_logical_to_translated;
std::map<tt_xy_pair, tt::umd::CoreCoord> pcie_logical_to_virtual;
std::map<tt_xy_pair, tt::umd::CoreCoord> pcie_logical_to_physical;

std::map<tt_xy_pair, tt::umd::CoreCoord> pcie_physical_to_logical;
std::map<tt_xy_pair, tt::umd::CoreCoord> pcie_virtual_to_logical;
std::map<tt_xy_pair, tt::umd::CoreCoord> pcie_translated_to_logical;

std::map<tt_xy_pair, tt::umd::CoreCoord>& get_logical_to_translated(CoreType core_type);
std::map<tt_xy_pair, tt::umd::CoreCoord>& get_logical_to_virtual(CoreType core_type);
std::map<tt_xy_pair, tt::umd::CoreCoord>& get_logical_to_physical(CoreType core_type);

std::map<tt_xy_pair, tt::umd::CoreCoord>& get_physical_to_logical(CoreType core_type);
std::map<tt_xy_pair, tt::umd::CoreCoord>& get_virtual_to_logical(CoreType core_type);
std::map<tt_xy_pair, tt::umd::CoreCoord>& get_translated_to_logical(CoreType core_type);
std::map<tt::umd::CoreCoord, tt_xy_pair> to_physical_map;
std::map<std::pair<tt_xy_pair, CoordSystem>, tt::umd::CoreCoord> from_physical_map;

const tt_xy_pair tensix_grid_size;
const std::vector<tt_xy_pair>& tensix_cores;
Expand All @@ -183,3 +138,5 @@ class CoordinateManager {
const tt_xy_pair pcie_grid_size;
const std::vector<tt_xy_pair>& pcie_cores;
};

// friend
2 changes: 1 addition & 1 deletion device/api/umd/device/grayskull_coordinate_manager.h
Original file line number Diff line number Diff line change
Expand Up @@ -26,5 +26,5 @@ class GrayskullCoordinateManager : public CoordinateManager {
const std::vector<tt_xy_pair>& pcie_cores);

protected:
void fill_eth_logical_to_translated() override;
void fill_eth_physical_translated_mapping() override;
};
4 changes: 0 additions & 4 deletions device/api/umd/device/hugepage.h
Original file line number Diff line number Diff line change
Expand Up @@ -16,10 +16,6 @@ namespace tt::umd {
// Get number of 1GB host hugepages installed.
uint32_t get_num_hugepages();

// Dynamically figure out how many host memory channels (based on hugepages installed) for each device, based on arch.
uint32_t get_available_num_host_mem_channels(
const uint32_t num_channels_per_device_target, const uint16_t device_id, const uint16_t revision_id);

// Looks for hugetlbfs inside /proc/mounts matching desired pagesize (typically 1G)
std::string find_hugepage_dir(std::size_t pagesize);

Expand Down
4 changes: 2 additions & 2 deletions device/api/umd/device/pci_device.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -142,8 +142,8 @@ class PCIDevice {
*/
bool init_iommu(size_t size);

int get_num_host_mem_channels() const;
hugepage_mapping get_hugepage_mapping(int channel) const;
size_t get_num_host_mem_channels() const;
hugepage_mapping get_hugepage_mapping(size_t channel) const;

/**
* Map a buffer for DMA access by the device.
Expand Down
22 changes: 22 additions & 0 deletions device/api/umd/device/tt_core_coordinates.h
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,28 @@ struct CoreCoord : public tt_xy_pair {
return this->x == other.x && this->y == other.y && this->core_type == other.core_type &&
this->coord_system == other.coord_system;
}

bool operator<(const CoreCoord& o) const {
if (x < o.x) {
return true;
}
if (x > o.x) {
return false;
}
if (y < o.y) {
return true;
}
if (y > o.y) {
return false;
}
if (core_type < o.core_type) {
return true;
}
if (core_type > o.core_type) {
return false;
}
return coord_system < o.coord_system;
}
};

} // namespace tt::umd
8 changes: 8 additions & 0 deletions device/api/umd/device/tt_device/blackhole_tt_device.h
Original file line number Diff line number Diff line change
Expand Up @@ -6,12 +6,20 @@

#pragma once

#include <set>

#include "umd/device/tt_device/tt_device.h"

namespace tt::umd {
class BlackholeTTDevice : public TTDevice {
public:
BlackholeTTDevice(std::unique_ptr<PCIDevice> pci_device);
~BlackholeTTDevice();

void configure_iatu_region(size_t region, uint64_t base, uint64_t target, size_t size) override;

private:
static constexpr uint64_t ATU_OFFSET_IN_BH_BAR2 = 0x1200;
std::set<size_t> iatu_regions_;
};
} // namespace tt::umd
29 changes: 29 additions & 0 deletions device/api/umd/device/tt_device/tt_device.h
Original file line number Diff line number Diff line change
Expand Up @@ -85,6 +85,35 @@ class TTDevice {
tt_xy_pair end,
std::uint64_t ordering = tt::umd::tlb_data::Relaxed);

/**
* Configures a PCIe Address Translation Unit (iATU) region.
*
* Device software expects to be able to access memory that is shared with
* the host using the following NOC addresses at the PCIe core:
* - GS: 0x0
* - WH: 0x8_0000_0000
* - BH: 0x1000_0000_0000_0000
* Without iATU configuration, these map to host PA 0x0.
*
* While modern hardware supports IOMMU with flexible IOVA mapping, we must
* maintain the iATU configuration to satisfy software that has hard-coded
* the above NOC addresses rather than using driver-provided IOVAs.
*
* This interface is only intended to be used for configuring sysmem with
* either 1GB hugepages or a compatible scheme.
*
* @param region iATU region index (0-15)
* @param base region * (1 << 30)
* @param target DMA address (PA or IOVA) to map to
* @param size size of the mapping window; must be (1 << 30)
*
* NOTE: Programming the iATU from userspace is architecturally incorrect:
* - iATU should be managed by KMD to ensure proper cleanup on process exit
* - Multiple processes can corrupt each other's iATU configurations
* We should fix this!
*/
virtual void configure_iatu_region(size_t region, uint64_t base, uint64_t target, size_t size);

protected:
std::unique_ptr<PCIDevice> pci_device_;
std::unique_ptr<architecture_implementation> architecture_impl_;
Expand Down
4 changes: 2 additions & 2 deletions device/api/umd/device/wormhole_coordinate_manager.h
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,6 @@ class WormholeCoordinateManager : public CoordinateManager {
const std::vector<tt_xy_pair>& pcie_cores);

protected:
void fill_tensix_logical_to_translated() override;
void fill_eth_logical_to_translated() override;
void fill_tensix_physical_translated_mapping() override;
void fill_eth_physical_translated_mapping() override;
};
Loading

0 comments on commit 4398a70

Please sign in to comment.