Skip to content

Commit

Permalink
Implement CoreCoord struct and translation
Browse files Browse the repository at this point in the history
  • Loading branch information
pjanevskiTT committed Nov 26, 2024
1 parent 71f8fd6 commit a179222
Show file tree
Hide file tree
Showing 24 changed files with 1,868 additions and 385 deletions.
1 change: 1 addition & 0 deletions device/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ set(UMD_DEVICE_SRCS
coordinate_manager.cpp
blackhole/blackhole_coordinate_manager.cpp
wormhole/wormhole_coordinate_manager.cpp
grayskull/grayskull_coordinate_manager.cpp
pcie/pci_device.cpp
hugepage.cpp
)
Expand Down
97 changes: 82 additions & 15 deletions device/blackhole/blackhole_coordinate_manager.cpp
Original file line number Diff line number Diff line change
@@ -1,29 +1,96 @@
/*
* SPDX-FileCopyrightText: (c) 2023 Tenstorrent Inc.
* SPDX-FileCopyrightText: (c) 2024 Tenstorrent Inc.
*
* SPDX-License-Identifier: Apache-2.0
*/
#include "blackhole_coordinate_manager.h"
#include <iostream>
void BlackholeCoordinateManager::tensix_harvesting(const std::size_t tensix_harvesting_mask) {
CoordinateManager::tensix_harvesting_mask = tensix_harvesting_mask;
CoordinateManager::clear_tensix_harvesting_structures();

std::size_t num_harvested_x = __builtin_popcount(tensix_harvesting_mask);
std::size_t grid_size_x = CoordinateManager::tensix_grid_size.x;
std::size_t grid_size_y = CoordinateManager::tensix_grid_size.y;

std::set<std::size_t> BlackholeCoordinateManager::get_x_coordinates_to_harvest(std::size_t harvesting_mask) {
std::set<std::size_t> x_to_harvest;
std::size_t logical_x = 0;
while (harvesting_mask > 0) {
if (harvesting_mask & 1) {
x_to_harvest.insert(logical_x);
for (std::size_t x = 0; x < grid_size_x; x++) {
if (!(tensix_harvesting_mask & (1 << x))) {
for (std::size_t y = 0; y < grid_size_y; y++) {
const tt_xy_pair& tensix_core = CoordinateManager::tensix_cores[x + y * grid_size_x];
tensix_logical_to_physical[{logical_x, y}] = CoreCoord(tensix_core.x, tensix_core.y, CoreType::TENSIX, CoordSystem::PHYSICAL);
tensix_physical_to_logical[tensix_core] = CoreCoord(logical_x, y, CoreType::TENSIX, CoordSystem::LOGICAL);
}
logical_x++;
}
}

for (std::size_t x = 0; x < grid_size_x - num_harvested_x; x++) {
for (std::size_t y = 0; y < grid_size_y; y++) {
const tt_xy_pair& tensix_core = CoordinateManager::tensix_cores[x + y * grid_size_x];
tensix_logical_to_virtual[{x, y}] = CoreCoord(tensix_core.x, tensix_core.y, CoreType::TENSIX, CoordSystem::VIRTUAL);
tensix_virtual_to_logical[tensix_core] = CoreCoord(x, y, CoreType::TENSIX, CoordSystem::LOGICAL);
}
logical_x++;
harvesting_mask >>= 1;
}
return x_to_harvest;

BlackholeCoordinateManager::fill_tensix_logical_to_translated();
}

tt_translated_coords BlackholeCoordinateManager::to_translated_coords(tt_logical_coords logical_coords) {
tt_virtual_coords virtual_coords = to_virtual_coords(logical_coords);
return tt_translated_coords(virtual_coords.x, virtual_coords.y);
void BlackholeCoordinateManager::fill_tensix_logical_to_translated() {
const std::size_t num_harvested_x = __builtin_popcount(CoordinateManager::tensix_harvesting_mask);
const std::size_t grid_size_x = CoordinateManager::tensix_grid_size.x;
const std::size_t grid_size_y = CoordinateManager::tensix_grid_size.y;

for (std::size_t x = 0; x < grid_size_x - num_harvested_x; x++) {
for (std::size_t y = 0; y < grid_size_y; y++) {
const CoreCoord virtual_coord = CoordinateManager::tensix_logical_to_virtual[{x, y}];
const std::size_t translated_x = virtual_coord.x;
const std::size_t translated_y = virtual_coord.y;
CoordinateManager::tensix_logical_to_translated[{x, y}] = CoreCoord(translated_x, translated_y, CoreType::TENSIX, CoordSystem::TRANSLATED);
CoordinateManager::tensix_translated_to_logical[{translated_x, translated_y}] = CoreCoord(x, y, CoreType::TENSIX, CoordSystem::LOGICAL);
}
}
}

void BlackholeCoordinateManager::dram_harvesting(const std::size_t dram_harvesting_mask) {
CoordinateManager::dram_harvesting_mask = dram_harvesting_mask;
CoordinateManager::clear_dram_harvesting_structures();

std::size_t num_harvested_banks = __builtin_popcount(dram_harvesting_mask);

for (std::size_t x = 0; x < dram_grid_size.x - num_harvested_banks; x++) {
for (std::size_t y = 0; y < dram_grid_size.y; y++) {
const tt_xy_pair& dram_core = CoordinateManager::dram_cores[x * dram_grid_size.y + y];
CoordinateManager::dram_logical_to_virtual[{x, y}] = CoreCoord(dram_core.x, dram_core.y, CoreType::DRAM, CoordSystem::VIRTUAL);
CoordinateManager::dram_virtual_to_logical[dram_core] = CoreCoord(x, y, CoreType::DRAM, CoordSystem::LOGICAL);
}
}

std::size_t logical_x = 0;
for (std::size_t x = 0; x < dram_grid_size.x; x++) {
if (!(dram_harvesting_mask & (1 << x))) {
for (std::size_t y = 0; y < dram_grid_size.y; y++) {
const tt_xy_pair& dram_core = CoordinateManager::dram_cores[x * dram_grid_size.y + y];
CoordinateManager::dram_logical_to_physical[{logical_x, y}] = CoreCoord(dram_core.x, dram_core.y, CoreType::DRAM, CoordSystem::PHYSICAL);
CoordinateManager::dram_physical_to_logical[dram_core] = CoreCoord(logical_x, y, CoreType::DRAM, CoordSystem::LOGICAL);
}
logical_x++;
}
}
}

void BlackholeCoordinateManager::fill_eth_logical_to_translated() {
for (size_t x = 0; x < CoordinateManager::eth_grid_size.x; x++) {
for (size_t y = 0; y < CoordinateManager::eth_grid_size.y; y++) {
const size_t translated_x = x + eth_translated_coordinate_start_x;
const size_t translated_y = y + eth_translated_coordinate_start_y;
CoordinateManager::eth_logical_to_translated[{x, y}] = CoreCoord(translated_x, translated_y, CoreType::ETH, CoordSystem::TRANSLATED);
CoordinateManager::eth_translated_to_logical[{translated_x, translated_y}] = CoreCoord(x, y, CoreType::ETH, CoordSystem::LOGICAL);
}
}
}

tt_logical_coords BlackholeCoordinateManager::to_logical_coords(tt_translated_coords translated_coords) {
tt_virtual_coords virtual_coords = tt_virtual_coords(translated_coords.x, translated_coords.y);
return CoordinateManager::to_logical_coords(virtual_coords);
void BlackholeCoordinateManager::fill_pcie_logical_to_translated() {
CoordinateManager::pcie_logical_to_translated[{0, 0}] = CoreCoord(pcie_translated_coordinate_start_x, pcie_translated_coordinate_start_y, CoreType::PCIE, CoordSystem::TRANSLATED);
CoordinateManager::pcie_translated_to_logical[{pcie_translated_coordinate_start_x, pcie_translated_coordinate_start_y}] = CoreCoord(0, 0, CoreType::PCIE, CoordSystem::LOGICAL);
}
41 changes: 35 additions & 6 deletions device/blackhole/blackhole_coordinate_manager.h
Original file line number Diff line number Diff line change
@@ -1,23 +1,52 @@
/*
* SPDX-FileCopyrightText: (c) 2023 Tenstorrent Inc.
* SPDX-FileCopyrightText: (c) 2024 Tenstorrent Inc.
*
* SPDX-License-Identifier: Apache-2.0
*/

#pragma once

#include "blackhole_implementation.h"
#include "device/coordinate_manager.h"
#include "device/blackhole/blackhole_implementation.h"

class BlackholeCoordinateManager : public CoordinateManager {
public:
BlackholeCoordinateManager(
const tt_xy_pair& worker_grid_size, const std::vector<tt_xy_pair>& workers, std::size_t harvesting_mask) :
CoordinateManager(worker_grid_size, workers, harvesting_mask) {}
const tt_xy_pair& tensix_grid_size, const std::vector<tt_xy_pair>& tensix_cores, const std::size_t tensix_harvesting_mask,
const tt_xy_pair& dram_grid_size, const std::vector<tt_xy_pair>& dram_cores, const std::size_t dram_harvesting_mask,
const tt_xy_pair& eth_grid_size, const std::vector<tt_xy_pair>& eth_cores,
const tt_xy_pair& arc_grid_size, const std::vector<tt_xy_pair>& arc_cores,
const tt_xy_pair& pcie_grid_size, const std::vector<tt_xy_pair>& pcie_cores)
: CoordinateManager(
tensix_grid_size, tensix_cores, tensix_harvesting_mask,
dram_grid_size, dram_cores, dram_harvesting_mask,
eth_grid_size, eth_cores,
arc_grid_size, arc_cores,
pcie_grid_size, pcie_cores)
{
this->tensix_harvesting(tensix_harvesting_mask);
this->dram_harvesting(dram_harvesting_mask);
this->translate_eth_coords();
this->translate_arc_coords();
this->translate_pcie_coords();
}

tt_translated_coords to_translated_coords(tt_logical_coords logical_coords) override;
void dram_harvesting(const std::size_t dram_harvesting_mask) override;

tt_logical_coords to_logical_coords(tt_translated_coords translated_coords) override;
void tensix_harvesting(const std::size_t tensix_harvesting_mask) override;

protected:
std::set<std::size_t> get_x_coordinates_to_harvest(std::size_t harvesting_mask) override;
void fill_tensix_logical_to_translated() override;

void fill_eth_logical_to_translated() override;

void fill_pcie_logical_to_translated() override;

private:
static const std::size_t eth_translated_coordinate_start_x = 20;
static const std::size_t eth_translated_coordinate_start_y = 25;

static const size_t pcie_translated_coordinate_start_x = 19;
static const size_t pcie_translated_coordinate_start_y = 24;
};
54 changes: 47 additions & 7 deletions device/blackhole/blackhole_implementation.h
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,8 @@

#include "device/architecture_implementation.h"
#include "device/tlb.h"
#include "device/tt_xy_pair.h"
#include <stdexcept>

namespace tt::umd {

Expand Down Expand Up @@ -58,13 +60,50 @@ enum class arc_message_type {
};

// DEVICE_DATA
static constexpr std::array<xy_pair, 24> DRAM_LOCATIONS = {
{{0, 0}, {0, 1}, {0, 11}, {0, 2}, {0, 10}, {0, 3}, {0, 9}, {0, 4}, {0, 8}, {0, 5}, {0, 7}, {0, 6},
{9, 0}, {9, 1}, {9, 11}, {9, 2}, {9, 10}, {9, 3}, {9, 9}, {9, 4}, {9, 8}, {9, 5}, {9, 7}, {9, 6}}};

static constexpr std::array<xy_pair, 1> ARC_LOCATIONS = {{{8, 0}}};
static constexpr std::array<xy_pair, 1> PCI_LOCATIONS = {{{11, 0}}};
static constexpr std::array<xy_pair, 14> ETH_LOCATIONS = {
const static tt_xy_pair TENSIX_GRID_SIZE = {14, 10};
const static std::vector<tt_xy_pair> TENSIX_CORES = {{
{1, 2}, {2, 2}, {3, 2}, {4, 2}, {5, 2}, {6, 2}, {7, 2}, {10, 2}, {11, 2}, {12, 2}, {13, 2}, {14, 2}, {15, 2}, {16, 2},
{1, 3}, {2, 3}, {3, 3}, {4, 3}, {5, 3}, {6, 3}, {7, 3}, {10, 3}, {11, 3}, {12, 3}, {13, 3}, {14, 3}, {15, 3}, {16, 3},
{1, 4}, {2, 4}, {3, 4}, {4, 4}, {5, 4}, {6, 4}, {7, 4}, {10, 4}, {11, 4}, {12, 4}, {13, 4}, {14, 4}, {15, 4}, {16, 4},
{1, 5}, {2, 5}, {3, 5}, {4, 5}, {5, 5}, {6, 5}, {7, 5}, {10, 5}, {11, 5}, {12, 5}, {13, 5}, {14, 5}, {15, 5}, {16, 5},
{1, 6}, {2, 6}, {3, 6}, {4, 6}, {5, 6}, {6, 6}, {7, 6}, {10, 6}, {11, 6}, {12, 6}, {13, 6}, {14, 6}, {15, 6}, {16, 6},
{1, 7}, {2, 7}, {3, 7}, {4, 7}, {5, 7}, {6, 7}, {7, 7}, {10, 7}, {11, 7}, {12, 7}, {13, 7}, {14, 7}, {15, 7}, {16, 7},
{1, 8}, {2, 8}, {3, 8}, {4, 8}, {5, 8}, {6, 8}, {7, 8}, {10, 8}, {11, 8}, {12, 8}, {13, 8}, {14, 8}, {15, 8}, {16, 8},
{1, 9}, {2, 9}, {3, 9}, {4, 9}, {5, 9}, {6, 9}, {7, 9}, {10, 9}, {11, 9}, {12, 9}, {13, 9}, {14, 9}, {15, 9}, {16, 9},
{1, 10}, {2, 10}, {3, 10}, {4, 10}, {5, 10}, {6, 10}, {7, 10}, {10, 10}, {11, 10}, {12, 10}, {13, 10}, {14, 10}, {15, 10}, {16, 10},
{1, 11}, {2, 11}, {3, 11}, {4, 11}, {5, 11}, {6, 11}, {7, 11}, {10, 11}, {11, 11}, {12, 11}, {13, 11}, {14, 11}, {15, 11}, {16, 11},
}};


const std::size_t NUM_DRAM_BANKS = 8;
const std::size_t NUM_NOC_PORTS_PER_DRAM_BANK = 3;
static const tt_xy_pair DRAM_GRID_SIZE = {NUM_DRAM_BANKS, NUM_NOC_PORTS_PER_DRAM_BANK};
static const std::vector<tt_xy_pair> DRAM_CORES = {
{
{0, 0}, {0, 1}, {0, 11},
{0, 2}, {0, 10}, {0, 3},
{0, 9}, {0, 4}, {0, 8},
{0, 5}, {0, 7}, {0, 6},
{9, 0}, {9, 1}, {9, 11},
{9, 2}, {9, 10}, {9, 3},
{9, 9}, {9, 4}, {9, 8},
{9, 5}, {9, 7}, {9, 6}
}};

// TODO: DRAM locations should be deleted. We keep it for compatibility with
// the existing code in clients which rely on DRAM_LOCATIONS.
static const std::vector<tt_xy_pair> DRAM_LOCATIONS = DRAM_CORES;

static const tt_xy_pair ARC_GRID_SIZE = {1, 1};
static const std::vector<tt_xy_pair> ARC_CORES = {{8, 0}};
static const std::vector<tt_xy_pair> ARC_LOCATIONS = ARC_CORES;

static const tt_xy_pair PCIE_GRID_SIZE = {1, 1};
static const std::vector<tt_xy_pair> PCIE_CORES = {{{11, 0}}};
static const std::vector<tt_xy_pair> PCI_LOCATIONS = PCIE_CORES;

static const tt_xy_pair ETH_GRID_SIZE = {14, 1};
static const std::vector<tt_xy_pair> ETH_CORES = {
{{1, 1},
{2, 1},
{3, 1},
Expand All @@ -79,6 +118,7 @@ static constexpr std::array<xy_pair, 14> ETH_LOCATIONS = {
{14, 1},
{15, 1},
{16, 1}}};
static const std::vector<tt_xy_pair> ETH_LOCATIONS = ETH_CORES;
// Return to std::array instead of std::vector once we get std::span support in C++20
static const std::vector<uint32_t> T6_X_LOCATIONS = {1, 2, 3, 4, 5, 6, 7, 10, 11, 12, 13, 14, 15, 16};
static const std::vector<uint32_t> T6_Y_LOCATIONS = {2, 3, 4, 5, 6, 7, 8, 9, 10, 11};
Expand Down
3 changes: 3 additions & 0 deletions device/cluster.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,9 @@
#include <utility>
#include <vector>

#include "tt_core_coordinates.h"
#include "tt_soc_descriptor.h"
#include "yaml-cpp/yaml.h"
#include "common/logger.hpp"
#include "device/architecture_implementation.h"
#include "device/driver_atomics.h"
Expand Down
17 changes: 15 additions & 2 deletions device/cluster.h
Original file line number Diff line number Diff line change
Expand Up @@ -13,9 +13,17 @@
#include <unordered_set>
#include <vector>

#include "tt_core_coordinates.h"
#include "tt_soc_descriptor.h"
#include "tt_xy_pair.h"
#include "tt_silicon_driver_common.hpp"
#include "device/tt_cluster_descriptor_types.h"
#include "device/tlb.h"
#include "device/tt_cluster_descriptor_types.h"
#include "device/tt_io.hpp"
#include "device/tt_core_coordinates.h"

#include "pcie/pci_device.hpp"
#include "fmt/core.h"
#include "pcie/pci_device.hpp"
#include "tt_silicon_driver_common.hpp"
Expand Down Expand Up @@ -651,8 +659,9 @@ namespace tt::umd {
/**
* Silicon Driver Class, derived from the tt_device class
* Implements APIs to communicate with a physical Tenstorrent Device.
*/
class Cluster : public tt_device {
*/
class Cluster: public tt_device
{
public:
// Constructor
/**
Expand Down Expand Up @@ -828,6 +837,10 @@ class Cluster : public tt_device {
// TODO: This should be accessible through public API, probably to be moved to tt_device.
PCIDevice* get_pci_device(int device_id) const;

// Core coordinates functions
// virtual void write_to_device(const void *mem_ptr, uint32_t size_in_bytes, chip_id_t chip, CoreCoord core_coord, uint64_t addr);
// virtual void read_from_device(void* mem_ptr, uint32_t size_in_bytes, chip_id_t chip, CoreCoord core_coord, uint64_t addr);

// Destructor
virtual ~Cluster();

Expand Down
Loading

0 comments on commit a179222

Please sign in to comment.