Skip to content

Commit

Permalink
Implement BH ARC message queue
Browse files Browse the repository at this point in the history
  • Loading branch information
pjanevskiTT committed Jan 8, 2025
1 parent 0c5da07 commit 5ebed3b
Show file tree
Hide file tree
Showing 8 changed files with 324 additions and 1 deletion.
1 change: 1 addition & 0 deletions device/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,7 @@ target_sources(
grayskull/grayskull_coordinate_manager.cpp
wormhole/wormhole_coordinate_manager.cpp
blackhole/blackhole_coordinate_manager.cpp
blackhole/blackhole_arc_message_queue.cpp
xy_pair.cpp
${FBS_GENERATED_HEADER}
)
Expand Down
65 changes: 65 additions & 0 deletions device/api/umd/device/blackhole_arc_message_queue.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
/*
* SPDX-FileCopyrightText: (c) 2024 Tenstorrent Inc.
*
* SPDX-License-Identifier: Apache-2.0
*/
#pragma once

#include "umd/device/blackhole_implementation.h"
#include "umd/device/tt_core_coordinates.h"
#include "umd/device/types/blackhole_arc.h"
#include "umd/device/types/cluster_descriptor_types.h"

using namespace tt::umd::blackhole;

namespace tt::umd {

class Cluster;

class BlackholeArcMessageQueue {
private:
// Header length and entry length in words.
static constexpr uint8_t header_len = 8;
static constexpr uint8_t entry_len = 8;

static constexpr uint8_t request_wptr_offset = 0;
static constexpr uint8_t response_rptr_offset = 1;
static constexpr uint8_t request_rptr_offset = 4;
static constexpr uint8_t response_wptr_offset = 5;

public:
BlackholeArcMessageQueue(
Cluster* cluster,
const chip_id_t chip,
const uint64_t base_address,
const uint64_t size,
const CoreCoord arc_core);

uint32_t send_message(const ArcMessageType message_type, uint16_t arg0 = 0, uint16_t arg1 = 0);

static std::shared_ptr<BlackholeArcMessageQueue> get_blackhole_arc_message_queue(
Cluster* cluster, const chip_id_t chip, const size_t queue_index);

private:
void push_request(std::array<uint32_t, BlackholeArcMessageQueue::entry_len>& request);

std::array<uint32_t, entry_len> pop_response();

void read_words(uint32_t* data, size_t num_words, size_t offset);

uint32_t read_word(size_t offset);

void write_words(uint32_t* data, size_t num_words, size_t offset);

void create_request(uint32_t* request, ArcMessageType message_type, uint32_t* data, size_t num_words);

void trigger_fw_int();

const uint64_t base_address;
const uint64_t size;
Cluster* cluster;
const chip_id_t chip;
const CoreCoord arc_core;
};

} // namespace tt::umd
12 changes: 12 additions & 0 deletions device/api/umd/device/blackhole_implementation.h
Original file line number Diff line number Diff line change
Expand Up @@ -188,6 +188,18 @@ static constexpr uint32_t MSG_TYPE_SETUP_IATU_FOR_PEER_TO_PEER = 0x97;

static const uint32_t BH_NOC_NODE_ID_OFFSET = 0x1FD04044;

// Register from which address of the ARC queue control block is read.
constexpr uint64_t SCRATCH_RAM_11 = 0x8003042C;

// ARC message queue header and entry size in bytes.
constexpr uint32_t ARC_MSG_QUEUE_HEADER_SIZE = 32;
constexpr uint32_t ARC_QUEUE_ENTRY_SIZE = 32;

// ARC firmware interrupt address and value to write in order
// to make an interrupt request.
constexpr uint32_t ARC_FW_INT_ADDR = 2147680512;
constexpr uint32_t ARC_FW_INT_VAL = 65536;

static const size_t eth_translated_coordinate_start_x = 20;
static const size_t eth_translated_coordinate_start_y = 25;

Expand Down
1 change: 1 addition & 0 deletions device/api/umd/device/cluster.h
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
#include "tt_silicon_driver_common.hpp"
#include "tt_soc_descriptor.h"
#include "tt_xy_pair.h"
#include "umd/device/blackhole_arc_message_queue.h"
#include "umd/device/chip/chip.h"
#include "umd/device/tt_device/tt_device.h"
#include "umd/device/tt_io.hpp"
Expand Down
59 changes: 59 additions & 0 deletions device/api/umd/device/types/blackhole_arc.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
/*
* SPDX-FileCopyrightText: (c) 2024 Tenstorrent Inc.
*
* SPDX-License-Identifier: Apache-2.0
*/

#pragma once

#include <cstdint>

namespace tt::umd {

namespace blackhole {

// Note, this only includes message IDs that have actually be implemented in CMFW
enum class ArcMessageType : uint8_t {
RESERVED_01 = 0x01, // reserved to avoid conflict with initial SCRATCH[5] value
NOP = 0x11, // Do nothing
SET_VOLTAGE = 0x12,
GET_VOLTAGE = 0x13,
SWITCH_CLK_SCHEME = 0x14,
REPORT_SCRATCH_ONLY = 0x16,
SEND_PCIE_MSI = 0x17,
SWITCH_VOUT_CONTROL = 0x18,
READ_EEPROM = 0x19,
WRITE_EEPROM = 0x1A,
READ_TS = 0x1B,
READ_PD = 0x1C,
READ_VM = 0x1D,
I2C_MESSAGE = 0x1E,
EFUSE_BURN_BITS = 0x1F,
FORCE_AICLK = 0x33,
FORCE_VDD = 0x39,
AICLK_GO_BUSY = 0x52,
AICLK_GO_LONG_IDLE = 0x54,
TRIGGER_RESET = 0x56, // arg: 3 = ASIC + M3 reset, other values = ASIC-only reset
RESERVED_60 = 0x60, // reserved to avoid conflict with boot-time SCRATCH[5] value
TEST = 0x90,
PCIE_DMA_CHIP_TO_HOST_TRANSFER = 0x9B,
PCIE_DMA_HOST_TO_CHIP_TRANSFER = 0x9C,
ASIC_STATE0 = 0xA0,
ASIC_STATE1 = 0xA1,
ASIC_STATE3 = 0xA3,
ASIC_STATE5 = 0xA5,
SET_LAST_SERIAL = 0xBE,
EFUSE_BURN = 0xBF,
};

// Usage of queues proposed by Syseng.
enum BlackholeArcMessageQueueIndex : uint8_t {
KMD = 0,
MONITORING = 1,
TOOLS = 2,
APPLICATION = 3,
};

} // namespace blackhole

} // namespace tt::umd
144 changes: 144 additions & 0 deletions device/blackhole/blackhole_arc_message_queue.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,144 @@
/*
* SPDX-FileCopyrightText: (c) 2024 Tenstorrent Inc.
*
* SPDX-License-Identifier: Apache-2.0
*/
#include "umd/device/blackhole_arc_message_queue.h"

#include "umd/device/cluster.h"

using namespace tt::umd;

namespace tt::umd {

BlackholeArcMessageQueue::BlackholeArcMessageQueue(
Cluster* cluster,
const chip_id_t chip,
const uint64_t base_address,
const uint64_t size,
const CoreCoord arc_core) :
base_address(base_address), size(size), cluster(cluster), chip(chip), arc_core(arc_core) {}

void BlackholeArcMessageQueue::read_words(uint32_t* data, size_t num_words, size_t offset) {
cluster->read_from_device(
data, chip, arc_core, base_address + offset * sizeof(uint32_t), num_words * sizeof(uint32_t), "LARGE_READ_TLB");
}

uint32_t BlackholeArcMessageQueue::read_word(size_t offset) {
uint32_t word;
read_words(&word, 1, offset);
return word;
}

void BlackholeArcMessageQueue::write_words(uint32_t* data, size_t num_words, size_t offset) {
cluster->write_to_device(
data,
num_words * sizeof(uint32_t),
chip,
arc_core,
base_address + offset * sizeof(uint32_t),
"LARGE_WRITE_TLB");
}

void BlackholeArcMessageQueue::create_request(
uint32_t* request, ArcMessageType message_type, uint32_t* data, size_t num_words) {
request[0] = (uint32_t)message_type;
memcpy(request + 1, data, num_words * sizeof(uint32_t));
memset(request + 1 + num_words, 0, (BlackholeArcMessageQueue::entry_len - (1 + num_words)) * sizeof(uint32_t));
}

void BlackholeArcMessageQueue::trigger_fw_int() {
cluster->write_to_device(&ARC_FW_INT_VAL, sizeof(uint32_t), chip, arc_core, ARC_FW_INT_ADDR, "LARGE_WRITE_TLB");
}

void BlackholeArcMessageQueue::push_request(std::array<uint32_t, BlackholeArcMessageQueue::entry_len>& request) {
cluster->l1_membar(chip, "LARGE_READ_TLB");
uint32_t request_queue_wptr = read_word(request_wptr_offset);

while (true) {
uint32_t request_queue_rptr = read_word(request_rptr_offset);
if (abs((int)request_queue_rptr - (int)request_queue_wptr) % (2 * size) != size) {
break;
}
}

// Offset in words.
uint32_t request_entry_offset = header_len + (request_queue_wptr % size) * BlackholeArcMessageQueue::entry_len;
write_words(request.data(), BlackholeArcMessageQueue::entry_len, request_entry_offset);

request_queue_wptr = (request_queue_wptr + 1) % (2 * size);
write_words(&request_queue_wptr, 1, request_wptr_offset);

trigger_fw_int();
}

std::array<uint32_t, BlackholeArcMessageQueue::entry_len> BlackholeArcMessageQueue::pop_response() {
cluster->l1_membar(chip, "LARGE_READ_TLB");
uint32_t response_queue_rptr = read_word(response_rptr_offset);

while (true) {
uint32_t response_queue_wptr = read_word(response_wptr_offset);

if (response_queue_rptr != response_queue_wptr) {
break;
}
}

uint32_t response_entry_offset =
header_len + (size + (response_queue_rptr % size)) * BlackholeArcMessageQueue::entry_len;
std::array<uint32_t, BlackholeArcMessageQueue::entry_len> response;
cluster->l1_membar(chip, "LARGE_READ_TLB");
read_words(response.data(), BlackholeArcMessageQueue::entry_len, response_entry_offset);

response_queue_rptr = (response_queue_rptr + 1) % (2 * size);
write_words(&response_queue_rptr, 1, response_rptr_offset);

return response;
}

uint32_t BlackholeArcMessageQueue::send_message(const ArcMessageType message_type, uint16_t arg0, uint16_t arg1) {
uint32_t arg = arg0 | (arg1 << 16);

std::array<uint32_t, BlackholeArcMessageQueue::entry_len> request = {(uint32_t)message_type, arg, 0, 0, 0, 0, 0, 0};

push_request(request);

std::array<uint32_t, BlackholeArcMessageQueue::entry_len> response = pop_response();

uint32_t status = response[0] & 0xFF;

if (status < 240) {
return response[0] >> 16;
} else if (status == 0xFF) {
throw std::runtime_error(fmt::format("Message code {} not recognized by ARC fw.", (uint32_t)message_type));
return 0;
} else {
throw std::runtime_error(fmt::format("Uknown message error code {}", status));
return 0;
}
}

std::shared_ptr<BlackholeArcMessageQueue> BlackholeArcMessageQueue::get_blackhole_arc_message_queue(
Cluster* cluster, const chip_id_t chip, const size_t queue_index) {
const CoreCoord arc_core = cluster->get_soc_descriptor(chip).get_cores(CoreType::ARC)[0];

uint32_t queue_control_block_addr;
cluster->read_from_device(
&queue_control_block_addr, chip, arc_core, blackhole::SCRATCH_RAM_11, sizeof(uint32_t), "LARGE_READ_TLB");

uint64_t queue_control_block;
cluster->read_from_device(
&queue_control_block, chip, arc_core, queue_control_block_addr, sizeof(uint64_t), "LARGE_READ_TLB");

uint32_t queue_base_addr = queue_control_block & 0xFFFFFFFF;
uint32_t num_entries_per_queue = (queue_control_block >> 32) & 0xFF;
uint32_t num_queues = (queue_control_block >> 40) & 0xFF;

uint32_t msg_queue_size = 2 * num_entries_per_queue * ARC_QUEUE_ENTRY_SIZE + ARC_MSG_QUEUE_HEADER_SIZE;
uint32_t msg_queue_base = queue_base_addr + queue_index * msg_queue_size;

return std::make_shared<tt::umd::BlackholeArcMessageQueue>(
cluster, chip, msg_queue_base, num_entries_per_queue, arc_core);
}

} // namespace tt::umd
5 changes: 4 additions & 1 deletion tests/blackhole/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,4 +1,7 @@
set(UNIT_TESTS_BH_SRCS test_cluster_bh.cpp)
set(UNIT_TESTS_BH_SRCS
test_cluster_bh.cpp
test_arc_messages_bh.cpp
)

add_executable(unit_tests_blackhole ${UNIT_TESTS_BH_SRCS})
target_link_libraries(unit_tests_blackhole PRIVATE test_common)
Expand Down
38 changes: 38 additions & 0 deletions tests/blackhole/test_arc_messages_bh.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
// SPDX-FileCopyrightText: (c) 2024 Tenstorrent Inc.
//
// SPDX-License-Identifier: Apache-2.0
#include <memory>
#include <thread>

#include "gtest/gtest.h"
#include "umd/device/blackhole_arc_message_queue.h"
#include "umd/device/cluster.h"
#include "umd/device/tt_cluster_descriptor.h"

using namespace tt::umd;

inline std::unique_ptr<Cluster> get_cluster() {
std::vector<int> pci_device_ids = PCIDevice::enumerate_devices();
// TODO: Make this test work on a host system without any tt devices.
if (pci_device_ids.empty()) {
return nullptr;
}
return std::unique_ptr<Cluster>(new Cluster());
}

TEST(BlackholeArcMessages, BlackholeArcMessagesBasic) {
const uint32_t num_loops = 100;

std::unique_ptr<Cluster> cluster = get_cluster();

std::shared_ptr<BlackholeArcMessageQueue> blackhole_arc_msg_queue =
BlackholeArcMessageQueue::get_blackhole_arc_message_queue(
cluster.get(), 0, BlackholeArcMessageQueueIndex::APPLICATION);

for (int i = 0; i < num_loops; i++) {
uint32_t response = blackhole_arc_msg_queue->send_message(ArcMessageType::TEST);
if (response != 0) {
ASSERT_TRUE(false);
}
}
}

0 comments on commit 5ebed3b

Please sign in to comment.