Skip to content

Commit

Permalink
Implement CoreCoord struct and translation
Browse files Browse the repository at this point in the history
  • Loading branch information
pjanevskiTT committed Nov 28, 2024
1 parent aea0eea commit 821fd97
Show file tree
Hide file tree
Showing 30 changed files with 1,964 additions and 980 deletions.
1 change: 1 addition & 0 deletions device/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ target_sources(
coordinate_manager.cpp
cpuset_lib.cpp
grayskull/grayskull_implementation.cpp
grayskull/grayskull_coordinate_manager.cpp
hugepage.cpp
pcie/pci_device.cpp
simulation/tt_simulation_device.cpp
Expand Down
1 change: 1 addition & 0 deletions device/api/umd/device/architecture_implementation.h
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
#include "umd/device/tlb.h"
#include "umd/device/tt_arch_types.h"
#include "umd/device/xy_pair.h"
#include "umd/device/tt_xy_pair.h"

struct tt_driver_host_address_params;
struct tt_driver_eth_interface_params;
Expand Down
60 changes: 60 additions & 0 deletions device/api/umd/device/blackhole_coordinate_manager.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
/*
* SPDX-FileCopyrightText: (c) 2024 Tenstorrent Inc.
*
* SPDX-License-Identifier: Apache-2.0
*/

#pragma once

#include "umd/device/coordinate_manager.h"

class BlackholeCoordinateManager : public CoordinateManager {
public:
BlackholeCoordinateManager(
const tt_xy_pair& tensix_grid_size,
const std::vector<tt_xy_pair>& tensix_cores,
const size_t tensix_harvesting_mask,
const tt_xy_pair& dram_grid_size,
const std::vector<tt_xy_pair>& dram_cores,
const size_t dram_harvesting_mask,
const tt_xy_pair& eth_grid_size,
const std::vector<tt_xy_pair>& eth_cores,
const tt_xy_pair& arc_grid_size,
const std::vector<tt_xy_pair>& arc_cores,
const tt_xy_pair& pcie_grid_size,
const std::vector<tt_xy_pair>& pcie_cores) :
CoordinateManager(
tensix_grid_size,
tensix_cores,
tensix_harvesting_mask,
dram_grid_size,
dram_cores,
dram_harvesting_mask,
eth_grid_size,
eth_cores,
arc_grid_size,
arc_cores,
pcie_grid_size,
pcie_cores) {
this->tensix_harvesting(tensix_harvesting_mask);
this->dram_harvesting(dram_harvesting_mask);
this->translate_eth_coords();
this->translate_arc_coords();
this->translate_pcie_coords();
}

protected:
void dram_harvesting(const size_t dram_harvesting_mask) override;
void tensix_harvesting(const size_t tensix_harvesting_mask) override;

void fill_tensix_logical_to_translated() override;
void fill_eth_logical_to_translated() override;
void fill_pcie_logical_to_translated() override;

private:
static const size_t eth_translated_coordinate_start_x = 20;
static const size_t eth_translated_coordinate_start_y = 25;

static const size_t pcie_translated_coordinate_start_x = 19;
static const size_t pcie_translated_coordinate_start_y = 24;
};
39 changes: 35 additions & 4 deletions device/api/umd/device/blackhole_implementation.h
Original file line number Diff line number Diff line change
Expand Up @@ -58,13 +58,43 @@ enum class arc_message_type {
};

// DEVICE_DATA
static constexpr std::array<xy_pair, 24> DRAM_LOCATIONS = {
const static tt_xy_pair TENSIX_GRID_SIZE = {14, 10};
const static std::vector<tt_xy_pair> TENSIX_CORES = {{
{1, 2}, {2, 2}, {3, 2}, {4, 2}, {5, 2}, {6, 2}, {7, 2}, {10, 2}, {11, 2}, {12, 2}, {13, 2}, {14, 2},
{15, 2}, {16, 2}, {1, 3}, {2, 3}, {3, 3}, {4, 3}, {5, 3}, {6, 3}, {7, 3}, {10, 3}, {11, 3}, {12, 3},
{13, 3}, {14, 3}, {15, 3}, {16, 3}, {1, 4}, {2, 4}, {3, 4}, {4, 4}, {5, 4}, {6, 4}, {7, 4}, {10, 4},
{11, 4}, {12, 4}, {13, 4}, {14, 4}, {15, 4}, {16, 4}, {1, 5}, {2, 5}, {3, 5}, {4, 5}, {5, 5}, {6, 5},
{7, 5}, {10, 5}, {11, 5}, {12, 5}, {13, 5}, {14, 5}, {15, 5}, {16, 5}, {1, 6}, {2, 6}, {3, 6}, {4, 6},
{5, 6}, {6, 6}, {7, 6}, {10, 6}, {11, 6}, {12, 6}, {13, 6}, {14, 6}, {15, 6}, {16, 6}, {1, 7}, {2, 7},
{3, 7}, {4, 7}, {5, 7}, {6, 7}, {7, 7}, {10, 7}, {11, 7}, {12, 7}, {13, 7}, {14, 7}, {15, 7}, {16, 7},
{1, 8}, {2, 8}, {3, 8}, {4, 8}, {5, 8}, {6, 8}, {7, 8}, {10, 8}, {11, 8}, {12, 8}, {13, 8}, {14, 8},
{15, 8}, {16, 8}, {1, 9}, {2, 9}, {3, 9}, {4, 9}, {5, 9}, {6, 9}, {7, 9}, {10, 9}, {11, 9}, {12, 9},
{13, 9}, {14, 9}, {15, 9}, {16, 9}, {1, 10}, {2, 10}, {3, 10}, {4, 10}, {5, 10}, {6, 10}, {7, 10}, {10, 10},
{11, 10}, {12, 10}, {13, 10}, {14, 10}, {15, 10}, {16, 10}, {1, 11}, {2, 11}, {3, 11}, {4, 11}, {5, 11}, {6, 11},
{7, 11}, {10, 11}, {11, 11}, {12, 11}, {13, 11}, {14, 11}, {15, 11}, {16, 11},
}};

const std::size_t NUM_DRAM_BANKS = 8;
const std::size_t NUM_NOC_PORTS_PER_DRAM_BANK = 3;
static const tt_xy_pair DRAM_GRID_SIZE = {NUM_DRAM_BANKS, NUM_NOC_PORTS_PER_DRAM_BANK};
static const std::vector<tt_xy_pair> DRAM_CORES = {
{{0, 0}, {0, 1}, {0, 11}, {0, 2}, {0, 10}, {0, 3}, {0, 9}, {0, 4}, {0, 8}, {0, 5}, {0, 7}, {0, 6},
{9, 0}, {9, 1}, {9, 11}, {9, 2}, {9, 10}, {9, 3}, {9, 9}, {9, 4}, {9, 8}, {9, 5}, {9, 7}, {9, 6}}};

static constexpr std::array<xy_pair, 1> ARC_LOCATIONS = {{{8, 0}}};
static constexpr std::array<xy_pair, 1> PCI_LOCATIONS = {{{11, 0}}};
static constexpr std::array<xy_pair, 14> ETH_LOCATIONS = {
// TODO: DRAM locations should be deleted. We keep it for compatibility with
// the existing code in clients which rely on DRAM_LOCATIONS.
static const std::vector<tt_xy_pair> DRAM_LOCATIONS = DRAM_CORES;

static const tt_xy_pair ARC_GRID_SIZE = {1, 1};
static const std::vector<tt_xy_pair> ARC_CORES = {{8, 0}};
static const std::vector<tt_xy_pair> ARC_LOCATIONS = ARC_CORES;

static const tt_xy_pair PCIE_GRID_SIZE = {1, 1};
static const std::vector<tt_xy_pair> PCIE_CORES = {{{11, 0}}};
static const std::vector<tt_xy_pair> PCI_LOCATIONS = PCIE_CORES;

static const tt_xy_pair ETH_GRID_SIZE = {14, 1};
static const std::vector<tt_xy_pair> ETH_CORES = {
{{1, 1},
{2, 1},
{3, 1},
Expand All @@ -79,6 +109,7 @@ static constexpr std::array<xy_pair, 14> ETH_LOCATIONS = {
{14, 1},
{15, 1},
{16, 1}}};
static const std::vector<tt_xy_pair> ETH_LOCATIONS = ETH_CORES;
// Return to std::array instead of std::vector once we get std::span support in C++20
static const std::vector<uint32_t> T6_X_LOCATIONS = {1, 2, 3, 4, 5, 6, 7, 10, 11, 12, 13, 14, 15, 16};
static const std::vector<uint32_t> T6_Y_LOCATIONS = {2, 3, 4, 5, 6, 7, 8, 9, 10, 11};
Expand Down
5 changes: 5 additions & 0 deletions device/api/umd/device/cluster.h
Original file line number Diff line number Diff line change
Expand Up @@ -828,6 +828,11 @@ class Cluster : public tt_device {
// TODO: This should be accessible through public API, probably to be moved to tt_device.
PCIDevice* get_pci_device(int device_id) const;

// Core coordinates functions
// virtual void write_to_device(const void *mem_ptr, uint32_t size_in_bytes, chip_id_t chip, CoreCoord core_coord,
// uint64_t addr); virtual void read_from_device(void* mem_ptr, uint32_t size_in_bytes, chip_id_t chip, CoreCoord
// core_coord, uint64_t addr);

// Destructor
virtual ~Cluster();

Expand Down
163 changes: 119 additions & 44 deletions device/api/umd/device/coordinate_manager.h
Original file line number Diff line number Diff line change
Expand Up @@ -6,74 +6,149 @@

#pragma once

#include <iostream>
#include <map>
#include <set>
#include <vector>

#include "umd/device/tt_arch_types.h"
#include "umd/device/tt_xy_pair.h"
#include "umd/device/tt_core_coordinates.h"

class CoordinateManager {
public:
CoordinateManager(
const tt_xy_pair& worker_grid_size, const std::vector<tt_xy_pair>& workers, std::size_t harvesting_mask) :
worker_grid_size(worker_grid_size), workers(workers), harvesting_mask(harvesting_mask) {}
const tt_xy_pair& tensix_grid_size,
const std::vector<tt_xy_pair>& tensix_cores,
const size_t tensix_harvesting_mask,
const tt_xy_pair& dram_grid_size,
const std::vector<tt_xy_pair>& dram_cores,
const size_t dram_harvesting_mask,
const tt_xy_pair& eth_grid_size,
const std::vector<tt_xy_pair>& eth_cores,
const tt_xy_pair& arc_grid_size,
const std::vector<tt_xy_pair>& arc_cores,
const tt_xy_pair& pcie_grid_size,
const std::vector<tt_xy_pair>& pcie_cores) :
tensix_grid_size(tensix_grid_size),
tensix_cores(tensix_cores),
tensix_harvesting_mask(tensix_harvesting_mask),
dram_grid_size(dram_grid_size),
dram_cores(dram_cores),
dram_harvesting_mask(dram_harvesting_mask),
eth_grid_size(eth_grid_size),
eth_cores(eth_cores),
arc_grid_size(arc_grid_size),
arc_cores(arc_cores),
pcie_grid_size(pcie_grid_size),
pcie_cores(pcie_cores) {}

static std::shared_ptr<CoordinateManager> get_coordinate_manager(
tt::ARCH arch,
const tt_xy_pair& tensix_grid_size,
const std::vector<tt_xy_pair>& tensix_cores,
const size_t tensix_harvesting_mask,
const tt_xy_pair& dram_grid_size,
const std::vector<tt_xy_pair>& dram_cores,
const size_t dram_harvesting_mask,
const tt_xy_pair& eth_grid_size,
const std::vector<tt_xy_pair>& eth_cores,
const tt_xy_pair& arc_grid_size,
const std::vector<tt_xy_pair>& arc_cores,
const tt_xy_pair& pcie_grid_size,
const std::vector<tt_xy_pair>& pcie_cores);

static std::shared_ptr<CoordinateManager> get_coordinate_manager(
tt::ARCH arch, const size_t tensix_harvesting_mask = 0, const size_t dram_harvesting_mask = 0);

virtual void perform_harvesting(std::size_t harvesting_mask);
CoordinateManager(CoordinateManager& other) = default;

virtual tt_physical_coords to_physical_coords(tt_logical_coords logical_coords);
virtual tt_translated_coords to_translated_coords(tt_logical_coords logical_coords);
virtual tt_virtual_coords to_virtual_coords(tt_logical_coords logical_coords);
tt::umd::CoreCoord to(const tt::umd::CoreCoord core_coord, const CoordSystem coord_system);

virtual tt_logical_coords to_logical_coords(tt_physical_coords physical_coords);
virtual tt_virtual_coords to_virtual_coords(tt_physical_coords physical_coords);
virtual tt_translated_coords to_translated_coords(tt_physical_coords physical_coords);
virtual ~CoordinateManager() = default;

virtual tt_logical_coords to_logical_coords(tt_virtual_coords virtual_coords);
virtual tt_physical_coords to_physical_coords(tt_virtual_coords virtual_coords);
virtual tt_translated_coords to_translated_coords(tt_virtual_coords virtual_coords);
private:
tt::umd::CoreCoord to_physical(const tt::umd::CoreCoord core_coord);
tt::umd::CoreCoord to_logical(const tt::umd::CoreCoord core_coord);
tt::umd::CoreCoord to_virtual(const tt::umd::CoreCoord core_coord);
tt::umd::CoreCoord to_translated(const tt::umd::CoreCoord core_coord);

virtual tt_logical_coords to_logical_coords(tt_translated_coords translated_coords);
virtual tt_physical_coords to_physical_coords(tt_translated_coords translated_coords);
virtual tt_virtual_coords to_virtual_coords(tt_translated_coords translated_coords);
protected:
virtual void tensix_harvesting(const size_t harvesting_mask);
virtual void dram_harvesting(const size_t dram_harvesting_mask);
virtual void translate_eth_coords();
virtual void translate_arc_coords();
virtual void translate_pcie_coords();

static std::unique_ptr<CoordinateManager> get_coordinate_manager(
tt::ARCH arch,
const tt_xy_pair& worker_grid_size,
const std::vector<tt_xy_pair>& workers,
std::size_t harvesting_mask);
void clear_tensix_harvesting_structures();
void clear_dram_harvesting_structures();

CoordinateManager(CoordinateManager& other) = default;
virtual void fill_tensix_logical_to_translated();
virtual void fill_eth_logical_to_translated();
virtual void fill_pcie_logical_to_translated();

virtual ~CoordinateManager() {}
std::map<tt_xy_pair, tt::umd::CoreCoord> tensix_logical_to_translated;
std::map<tt_xy_pair, tt::umd::CoreCoord> tensix_logical_to_virtual;
std::map<tt_xy_pair, tt::umd::CoreCoord> tensix_logical_to_physical;

protected:
virtual void clear_harvesting_structures();
std::map<tt_xy_pair, tt::umd::CoreCoord> tensix_physical_to_logical;
std::map<tt_xy_pair, tt::umd::CoreCoord> tensix_virtual_to_logical;
std::map<tt_xy_pair, tt::umd::CoreCoord> tensix_translated_to_logical;

std::map<tt_xy_pair, tt::umd::CoreCoord> dram_logical_to_translated;
std::map<tt_xy_pair, tt::umd::CoreCoord> dram_logical_to_virtual;
std::map<tt_xy_pair, tt::umd::CoreCoord> dram_logical_to_physical;

std::map<tt_xy_pair, tt::umd::CoreCoord> dram_physical_to_logical;
std::map<tt_xy_pair, tt::umd::CoreCoord> dram_virtual_to_logical;
std::map<tt_xy_pair, tt::umd::CoreCoord> dram_translated_to_logical;

std::map<tt_xy_pair, tt::umd::CoreCoord> eth_logical_to_translated;
std::map<tt_xy_pair, tt::umd::CoreCoord> eth_logical_to_virtual;
std::map<tt_xy_pair, tt::umd::CoreCoord> eth_logical_to_physical;

std::map<tt_xy_pair, tt::umd::CoreCoord> eth_physical_to_logical;
std::map<tt_xy_pair, tt::umd::CoreCoord> eth_virtual_to_logical;
std::map<tt_xy_pair, tt::umd::CoreCoord> eth_translated_to_logical;

std::map<tt_xy_pair, tt::umd::CoreCoord> arc_logical_to_translated;
std::map<tt_xy_pair, tt::umd::CoreCoord> arc_logical_to_virtual;
std::map<tt_xy_pair, tt::umd::CoreCoord> arc_logical_to_physical;

std::map<tt_xy_pair, tt::umd::CoreCoord> arc_physical_to_logical;
std::map<tt_xy_pair, tt::umd::CoreCoord> arc_virtual_to_logical;
std::map<tt_xy_pair, tt::umd::CoreCoord> arc_translated_to_logical;

std::map<tt_xy_pair, tt::umd::CoreCoord> pcie_logical_to_translated;
std::map<tt_xy_pair, tt::umd::CoreCoord> pcie_logical_to_virtual;
std::map<tt_xy_pair, tt::umd::CoreCoord> pcie_logical_to_physical;

std::map<tt_xy_pair, tt::umd::CoreCoord> pcie_physical_to_logical;
std::map<tt_xy_pair, tt::umd::CoreCoord> pcie_virtual_to_logical;
std::map<tt_xy_pair, tt::umd::CoreCoord> pcie_translated_to_logical;

virtual std::set<std::size_t> get_x_coordinates_to_harvest(std::size_t harvesting_mask);
virtual std::set<std::size_t> get_y_coordinates_to_harvest(std::size_t harvesting_mask);
std::map<tt_xy_pair, tt::umd::CoreCoord>& get_logical_to_translated(CoreType core_type);
std::map<tt_xy_pair, tt::umd::CoreCoord>& get_logical_to_virtual(CoreType core_type);
std::map<tt_xy_pair, tt::umd::CoreCoord>& get_logical_to_physical(CoreType core_type);

virtual void fill_logical_to_physical_mapping(
const std::set<size_t>& x_to_harvest,
const std::set<size_t>& y_to_harvest,
const std::set<size_t>& physical_x_unharvested,
const std::set<size_t>& physical_y_unharvested);
virtual void fill_logical_to_virtual_mapping(
const std::set<size_t>& physical_x_unharvested, const std::set<size_t>& physical_y_unharvested);
std::map<tt_xy_pair, tt::umd::CoreCoord>& get_physical_to_logical(CoreType core_type);
std::map<tt_xy_pair, tt::umd::CoreCoord>& get_virtual_to_logical(CoreType core_type);
std::map<tt_xy_pair, tt::umd::CoreCoord>& get_translated_to_logical(CoreType core_type);

std::map<std::size_t, std::size_t> physical_y_to_logical_y;
std::map<std::size_t, std::size_t> physical_x_to_logical_x;
const tt_xy_pair tensix_grid_size;
const std::vector<tt_xy_pair>& tensix_cores;
size_t tensix_harvesting_mask;

std::vector<std::size_t> logical_y_to_physical_y;
std::vector<std::size_t> logical_x_to_physical_x;
const tt_xy_pair dram_grid_size;
const std::vector<tt_xy_pair>& dram_cores;
size_t dram_harvesting_mask;

std::vector<std::size_t> logical_y_to_virtual_y;
std::vector<std::size_t> logical_x_to_virtual_x;
const tt_xy_pair eth_grid_size;
const std::vector<tt_xy_pair>& eth_cores;

std::map<std::size_t, std::size_t> virtual_y_to_logical_y;
std::map<std::size_t, std::size_t> virtual_x_to_logical_x;
const tt_xy_pair arc_grid_size;
const std::vector<tt_xy_pair>& arc_cores;

const tt_xy_pair worker_grid_size;
const std::vector<tt_xy_pair>& workers;
const std::size_t harvesting_mask;
const tt_xy_pair pcie_grid_size;
const std::vector<tt_xy_pair>& pcie_cores;
};
49 changes: 49 additions & 0 deletions device/api/umd/device/grayskull_coordinate_manager.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
/*
* SPDX-FileCopyrightText: (c) 2024 Tenstorrent Inc.
*
* SPDX-License-Identifier: Apache-2.0
*/

#pragma once

#include "umd/device/coordinate_manager.h"

class GrayskullCoordinateManager : public CoordinateManager {
public:
GrayskullCoordinateManager(
const tt_xy_pair& tensix_grid_size,
const std::vector<tt_xy_pair>& tensix_cores,
const size_t tensix_harvesting_mask,
const tt_xy_pair& dram_grid_size,
const std::vector<tt_xy_pair>& dram_cores,
const size_t dram_harvesting_mask,
const tt_xy_pair& eth_grid_size,
const std::vector<tt_xy_pair>& eth_cores,
const tt_xy_pair& arc_grid_size,
const std::vector<tt_xy_pair>& arc_cores,
const tt_xy_pair& pcie_grid_size,
const std::vector<tt_xy_pair>& pcie_cores) :
CoordinateManager(
tensix_grid_size,
tensix_cores,
tensix_harvesting_mask,
dram_grid_size,
dram_cores,
dram_harvesting_mask,
eth_grid_size,
eth_cores,
arc_grid_size,
arc_cores,
pcie_grid_size,
pcie_cores) {
this->tensix_harvesting(tensix_harvesting_mask);
this->dram_harvesting(dram_harvesting_mask);
this->translate_eth_coords();
this->translate_arc_coords();
this->translate_pcie_coords();
}

protected:
void fill_tensix_logical_to_translated() override;
void fill_eth_logical_to_translated() override;
};
Loading

0 comments on commit 821fd97

Please sign in to comment.