Skip to content

Commit

Permalink
Handle P150 board type properly
Browse files Browse the repository at this point in the history
  • Loading branch information
pjanevskiTT committed Dec 26, 2024
1 parent 087bbe2 commit b759bcc
Show file tree
Hide file tree
Showing 17 changed files with 187 additions and 39 deletions.
8 changes: 6 additions & 2 deletions device/api/umd/device/blackhole_implementation.h
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
#include <stdexcept>

#include "umd/device/architecture_implementation.h"
#include "umd/device/types/cluster_descriptor_types.h"
#include "umd/device/types/tlb.h"

namespace tt::umd {
Expand Down Expand Up @@ -99,8 +100,9 @@ static const std::vector<tt_xy_pair> ARC_CORES = {{8, 0}};
static const std::vector<tt_xy_pair> ARC_LOCATIONS = ARC_CORES;

static const tt_xy_pair PCIE_GRID_SIZE = {1, 1};
static const std::vector<tt_xy_pair> PCIE_CORES = {{{11, 0}}};
static const std::vector<tt_xy_pair> PCI_LOCATIONS = PCIE_CORES;
static const std::vector<tt_xy_pair> PCIE_CORES_LOCAL = {{{2, 0}}};
static const std::vector<tt_xy_pair> PCI_LOCATIONS = PCIE_CORES_LOCAL;
static const std::vector<tt_xy_pair> PCIE_CORES_REMOTE = {{{11, 0}}};

static const tt_xy_pair ETH_GRID_SIZE = {14, 1};
static const std::vector<tt_xy_pair> ETH_CORES = {
Expand Down Expand Up @@ -195,6 +197,8 @@ static const size_t pcie_translated_coordinate_start_y = 24;
static const size_t dram_translated_coordinate_start_x = 17;
static const size_t dram_translated_coordinate_start_y = 12;

std::vector<tt_xy_pair> get_pcie_cores(const BoardType board_type, const bool is_chip_remote);

} // namespace blackhole

class blackhole_implementation : public architecture_implementation {
Expand Down
5 changes: 4 additions & 1 deletion device/api/umd/device/coordinate_manager.h
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
#include "umd/device/tt_core_coordinates.h"
#include "umd/device/tt_xy_pair.h"
#include "umd/device/types/arch.h"
#include "umd/device/types/cluster_descriptor_types.h"

class CoordinateManager {
public:
Expand All @@ -36,7 +37,9 @@ class CoordinateManager {
tt::ARCH arch,
const size_t tensix_harvesting_mask = 0,
const size_t dram_harvesting_mask = 0,
const size_t eth_harvesting_mask = 0);
const size_t eth_harvesting_mask = 0,
const BoardType board_type = BoardType::UNKNOWN,
const bool is_chip_remote = false);

static size_t get_num_harvested(const size_t harvesting_mask);

Expand Down
12 changes: 0 additions & 12 deletions device/api/umd/device/tt_cluster_descriptor.h
Original file line number Diff line number Diff line change
Expand Up @@ -24,18 +24,6 @@ namespace YAML {
class Node;
}

enum BoardType : uint32_t {
E75,
E150,
E300,
N150,
N300,
P100,
P150A,
GALAXY,
UNKNOWN,
};

class tt_ClusterDescriptor {
private:
tt_ClusterDescriptor() = default;
Expand Down
4 changes: 3 additions & 1 deletion device/api/umd/device/tt_soc_descriptor.h
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
#include "umd/device/tt_core_coordinates.h"
#include "umd/device/tt_xy_pair.h"
#include "umd/device/types/arch.h"
#include "umd/device/types/cluster_descriptor_types.h"

namespace YAML {
class Node;
Expand Down Expand Up @@ -92,7 +93,8 @@ class tt_SocDescriptor {
// CoreCoord conversions.
tt::umd::CoreCoord translate_coord_to(const tt::umd::CoreCoord core_coord, const CoordSystem coord_system) const;

static std::string get_soc_descriptor_path(tt::ARCH arch);
static std::string get_soc_descriptor_path(
tt::ARCH arch, const BoardType board_type = BoardType::UNKNOWN, const bool is_chip_remote = false);

std::vector<tt::umd::CoreCoord> get_cores(const CoreType core_type) const;
std::vector<tt::umd::CoreCoord> get_harvested_cores(const CoreType core_type) const;
Expand Down
13 changes: 13 additions & 0 deletions device/api/umd/device/types/cluster_descriptor_types.h
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,19 @@ struct eth_coord_t {
}
};

enum BoardType : uint32_t {
E75,
E150,
E300,
N150,
N300,
P100,
P150A,
P300,
GALAXY,
UNKNOWN,
};

namespace std {
template <>
struct hash<eth_coord_t> {
Expand Down
14 changes: 14 additions & 0 deletions device/blackhole/blackhole_implementation.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -104,4 +104,18 @@ tt_driver_noc_params blackhole_implementation::get_noc_params() const {
return {NOC_ADDR_LOCAL_BITS, NOC_ADDR_NODE_ID_BITS};
}

namespace blackhole {
std::vector<tt_xy_pair> get_pcie_cores(const BoardType board_type, const bool is_chip_remote) {
if (board_type == BoardType::UNKNOWN || board_type == BoardType::P100) {
return PCIE_CORES_REMOTE;
} else if (board_type == BoardType::P150A) {
return PCIE_CORES_LOCAL;
} else if (board_type == BoardType::P300) {
return is_chip_remote ? PCIE_CORES_REMOTE : PCIE_CORES_LOCAL;
}

return PCIE_CORES_LOCAL;
}
} // namespace blackhole

} // namespace tt::umd
12 changes: 7 additions & 5 deletions device/cluster.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -454,7 +454,9 @@ std::unique_ptr<Chip> Cluster::construct_chip_from_cluster(

std::unique_ptr<Chip> Cluster::construct_chip_from_cluster(chip_id_t chip_id, tt_ClusterDescriptor* cluster_desc) {
tt::ARCH arch = cluster_desc->get_arch(chip_id);
std::string soc_desc_path = tt_SocDescriptor::get_soc_descriptor_path(arch);
const BoardType chip_board_type = cluster_desc->get_board_type(chip_id);
std::string soc_desc_path =
tt_SocDescriptor::get_soc_descriptor_path(arch, chip_board_type, cluster_desc->is_chip_remote(chip_id));
// Note that initially soc_descriptors are not harvested, but will be harvested later if perform_harvesting is
// true.
// TODO: This should be changed, harvesting should be done in tt_socdescriptor's constructor and not as part of
Expand Down Expand Up @@ -600,7 +602,7 @@ Cluster::Cluster(
// rather than ClusterDescriptor.
tt::ARCH arch = tt::ARCH::GRAYSKULL;
chip_id_t mock_chip_id = 0;
tt_SocDescriptor soc_desc = tt_SocDescriptor(tt_SocDescriptor::get_soc_descriptor_path(arch));
tt_SocDescriptor soc_desc = tt_SocDescriptor(tt_SocDescriptor::get_soc_descriptor_path(arch, BoardType::UNKNOWN));
std::unique_ptr<Chip> chip = std::make_unique<MockChip>(soc_desc);

std::unordered_map<chip_id_t, std::unique_ptr<Chip>> chips;
Expand Down Expand Up @@ -2286,14 +2288,14 @@ void Cluster::wait_for_connected_non_mmio_flush(const chip_id_t chip_id) {
}

void Cluster::wait_for_non_mmio_flush(const chip_id_t chip_id) {
log_assert(arch_name != tt::ARCH::BLACKHOLE, "Non-MMIO flush not supported in Blackhole");
std::string read_tlb = "LARGE_READ_TLB";

if (!this->cluster_desc->is_chip_remote(chip_id)) {
log_debug(LogSiliconDriver, "Chip {} is not a remote chip, skipping wait_for_non_mmio_flush", chip_id);
return;
}

std::string read_tlb = "LARGE_READ_TLB";
log_assert(arch_name != tt::ARCH::BLACKHOLE, "Non-MMIO flush not supported in Blackhole");

chip_id_t mmio_connected_chip = cluster_desc->get_closest_mmio_capable_chip(chip_id);
wait_for_connected_non_mmio_flush(mmio_connected_chip);
}
Expand Down
10 changes: 7 additions & 3 deletions device/coordinate_manager.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -461,7 +461,9 @@ std::shared_ptr<CoordinateManager> CoordinateManager::create_coordinate_manager(
tt::ARCH arch,
const size_t tensix_harvesting_mask,
const size_t dram_harvesting_mask,
const size_t eth_harvesting_mask) {
const size_t eth_harvesting_mask,
const BoardType board_type,
bool is_chip_remote) {
switch (arch) {
case tt::ARCH::GRAYSKULL:
return create_coordinate_manager(
Expand Down Expand Up @@ -495,7 +497,8 @@ std::shared_ptr<CoordinateManager> CoordinateManager::create_coordinate_manager(
tt::umd::wormhole::ARC_CORES,
tt::umd::wormhole::PCIE_GRID_SIZE,
tt::umd::wormhole::PCIE_CORES);
case tt::ARCH::BLACKHOLE:
case tt::ARCH::BLACKHOLE: {
const std::vector<tt_xy_pair> pcie_cores = tt::umd::blackhole::get_pcie_cores(board_type, is_chip_remote);
return create_coordinate_manager(
arch,
tt::umd::blackhole::TENSIX_GRID_SIZE,
Expand All @@ -510,7 +513,8 @@ std::shared_ptr<CoordinateManager> CoordinateManager::create_coordinate_manager(
tt::umd::blackhole::ARC_GRID_SIZE,
tt::umd::blackhole::ARC_CORES,
tt::umd::blackhole::PCIE_GRID_SIZE,
tt::umd::blackhole::PCIE_CORES);
pcie_cores);
}
case tt::ARCH::Invalid:
throw std::runtime_error("Invalid architecture for creating coordinate manager");
default:
Expand Down
2 changes: 2 additions & 0 deletions device/tt_cluster_descriptor.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -751,6 +751,8 @@ void tt_ClusterDescriptor::load_chips_from_connectivity_descriptor(YAML::Node &y
board_type = BoardType::P100;
} else if (chip_board_type.second == "p150A") {
board_type = BoardType::P150A;
} else if (chip_board_type.second == "p300") {
board_type = BoardType::P300;
} else if (chip_board_type.second == "GALAXY") {
board_type = BoardType::GALAXY;
} else {
Expand Down
25 changes: 21 additions & 4 deletions device/tt_soc_descriptor.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -273,17 +273,34 @@ bool tt_SocDescriptor::is_ethernet_core(const tt_xy_pair &core) const {
return this->ethernet_core_channel_map.find(core) != ethernet_core_channel_map.end();
}

std::string tt_SocDescriptor::get_soc_descriptor_path(tt::ARCH arch) {
std::string tt_SocDescriptor::get_soc_descriptor_path(
tt::ARCH arch, const BoardType board_type, const bool is_chip_remote) {
switch (arch) {
case tt::ARCH::GRAYSKULL:
// TODO: this path needs to be changed to point to soc descriptors outside of tests directory.
return tt::umd::utils::get_abs_path("tests/soc_descs/grayskull_10x12.yaml");
case tt::ARCH::WORMHOLE_B0:
// TODO: this path needs to be changed to point to soc descriptors outside of tests directory.
return tt::umd::utils::get_abs_path("tests/soc_descs/wormhole_b0_8x10.yaml");
case tt::ARCH::BLACKHOLE:
// TODO: this path needs to be changed to point to soc descriptors outside of tests directory.
return tt::umd::utils::get_abs_path("tests/soc_descs/blackhole_140_arch_no_eth.yaml");
case tt::ARCH::BLACKHOLE: {
if (board_type == BoardType::P100 || board_type == BoardType::UNKNOWN) {
// TODO: this path needs to be changed to point to soc descriptors outside of tests directory.
return tt::umd::utils::get_abs_path("tests/soc_descs/blackhole_140_arch_no_eth.yaml");
} else if (board_type == BoardType::P150A) {
// TODO: this path needs to be changed to point to soc descriptors outside of tests directory.
return tt::umd::utils::get_abs_path("tests/soc_descs/blackhole_140_arch_local.yaml");
} else if (board_type == BoardType::P300) {
if (is_chip_remote) {
// TODO: this path needs to be changed to point to soc descriptors outside of tests directory.
return tt::umd::utils::get_abs_path("tests/soc_descs/blackhole_140_arch_remote.yaml");
} else {
// TODO: this path needs to be changed to point to soc descriptors outside of tests directory.
return tt::umd::utils::get_abs_path("tests/soc_descs/blackhole_140_arch_local.yaml");
}
} else {
throw std::runtime_error("Invalid board type for Blackhole architecture.");
}
}
default:
throw std::runtime_error("Invalid architecture");
}
Expand Down
2 changes: 1 addition & 1 deletion tests/api/test_cluster.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,7 @@ TEST(ApiClusterTest, DifferentConstructors) {
// 3. Constructor taking a custom soc descriptor in addition.
tt::ARCH device_arch = tt_ClusterDescriptor::detect_arch(logical_device_id);
// You can add a custom soc descriptor here.
std::string sdesc_path = tt_SocDescriptor::get_soc_descriptor_path(device_arch);
std::string sdesc_path = tt_SocDescriptor::get_soc_descriptor_path(device_arch, BoardType::UNKNOWN);
umd_cluster = std::make_unique<Cluster>(sdesc_path, target_devices);
umd_cluster = nullptr;

Expand Down
42 changes: 35 additions & 7 deletions tests/api/test_core_coord_translation_bh.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -496,19 +496,47 @@ TEST(CoordinateManager, CoordinateManagerBlackholeDRAMPMoreThanOneDRAMBankHarves
}

// Test that virtual, physical and translated coordinates are the same for all logical PCIE coordinates.
TEST(CoordinateManager, CoordinateManagerBlackholePCIETranslation) {
TEST(CoordinateManager, CoordinateManagerBlackholePCIETranslationLocal) {
std::shared_ptr<CoordinateManager> coordinate_manager =
CoordinateManager::create_coordinate_manager(tt::ARCH::BLACKHOLE);
CoordinateManager::create_coordinate_manager(tt::ARCH::BLACKHOLE, 0, 0, 0, BoardType::P300, false);
const tt_xy_pair pcie_grid_size = tt::umd::blackhole::PCIE_GRID_SIZE;
const std::vector<tt_xy_pair> pcie_cores = tt::umd::blackhole::PCIE_CORES_LOCAL;

for (size_t x = 0; x < pcie_grid_size.x; x++) {
for (size_t y = 0; y < pcie_grid_size.y; y++) {
const CoreCoord arc_logical = CoreCoord(x, y, CoreType::PCIE, CoordSystem::LOGICAL);
const CoreCoord arc_virtual = coordinate_manager->translate_coord_to(arc_logical, CoordSystem::VIRTUAL);
const CoreCoord arc_physical = coordinate_manager->translate_coord_to(arc_logical, CoordSystem::PHYSICAL);
const CoreCoord pcie_logical = CoreCoord(x, y, CoreType::PCIE, CoordSystem::LOGICAL);
const CoreCoord pcie_virtual = coordinate_manager->translate_coord_to(pcie_logical, CoordSystem::VIRTUAL);
const CoreCoord pcie_physical = coordinate_manager->translate_coord_to(pcie_logical, CoordSystem::PHYSICAL);
const tt_xy_pair pcie_core = pcie_cores[y * pcie_grid_size.x + x];

EXPECT_EQ(arc_virtual.x, arc_physical.x);
EXPECT_EQ(arc_virtual.y, arc_physical.y);
EXPECT_EQ(pcie_virtual.x, pcie_physical.x);
EXPECT_EQ(pcie_virtual.y, pcie_physical.y);

EXPECT_EQ(pcie_core.x, pcie_physical.x);
EXPECT_EQ(pcie_core.y, pcie_physical.y);
}
}
}

// Test that virtual, physical and translated coordinates are the same for all logical PCIE coordinates.
TEST(CoordinateManager, CoordinateManagerBlackholePCIETranslationRemote) {
std::shared_ptr<CoordinateManager> coordinate_manager =
CoordinateManager::create_coordinate_manager(tt::ARCH::BLACKHOLE, 0, 0, 0, BoardType::P300, true);
const tt_xy_pair pcie_grid_size = tt::umd::blackhole::PCIE_GRID_SIZE;
const std::vector<tt_xy_pair> pcie_cores = tt::umd::blackhole::PCIE_CORES_REMOTE;

for (size_t x = 0; x < pcie_grid_size.x; x++) {
for (size_t y = 0; y < pcie_grid_size.y; y++) {
const CoreCoord pcie_logical = CoreCoord(x, y, CoreType::PCIE, CoordSystem::LOGICAL);
const CoreCoord pcie_virtual = coordinate_manager->translate_coord_to(pcie_logical, CoordSystem::VIRTUAL);
const CoreCoord pcie_physical = coordinate_manager->translate_coord_to(pcie_logical, CoordSystem::PHYSICAL);
const tt_xy_pair pcie_core = pcie_cores[y * pcie_grid_size.x + x];

EXPECT_EQ(pcie_virtual.x, pcie_physical.x);
EXPECT_EQ(pcie_virtual.y, pcie_physical.y);

EXPECT_EQ(pcie_core.x, pcie_physical.x);
EXPECT_EQ(pcie_core.y, pcie_physical.y);
}
}
}
Expand Down
2 changes: 1 addition & 1 deletion tests/api/test_mockup_device.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ std::string get_soc_descriptor_file(tt::ARCH arch) {
case tt::ARCH::WORMHOLE_B0:
return test_utils::GetAbsPath("tests/soc_descs/wormhole_b0_8x10.yaml");
case tt::ARCH::BLACKHOLE:
return test_utils::GetAbsPath("tests/soc_descs/blackhole_140_arch.yaml");
return test_utils::GetAbsPath("tests/soc_descs/blackhole_140_arch_local.yaml");
case tt::ARCH::Invalid:
throw std::runtime_error("Invalid arch not supported");
default:
Expand Down
2 changes: 1 addition & 1 deletion tests/api/test_soc_descriptor.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -165,7 +165,7 @@ TEST(SocDescriptor, SocDescriptorBlackholeETHHarvesting) {
}

tt_SocDescriptor soc_desc(
test_utils::GetAbsPath("tests/soc_descs/blackhole_140_arch.yaml"), 0, 0, eth_harvesting_mask);
test_utils::GetAbsPath("tests/soc_descs/blackhole_140_arch_local.yaml"), 0, 0, eth_harvesting_mask);

const std::vector<CoreCoord> eth_cores = soc_desc.get_cores(CoreType::ETH);

Expand Down
1 change: 0 additions & 1 deletion tests/blackhole/test_cluster_bh.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -941,7 +941,6 @@ TEST(SiliconDriverBH, RandomSysmemTestWithPcie) {
auto target_devices = get_target_devices();

Cluster cluster(
test_utils::GetAbsPath("tests/soc_descs/blackhole_140_arch_no_eth.yaml"),
target_devices,
num_channels,
false, // skip driver allocs - no (don't skip)
Expand Down
Loading

0 comments on commit b759bcc

Please sign in to comment.