From 5ebed3b979f3e9b60ec960176daa87bb55910095 Mon Sep 17 00:00:00 2001 From: pjanevski Date: Mon, 23 Dec 2024 13:51:05 +0000 Subject: [PATCH] Implement BH ARC message queue --- device/CMakeLists.txt | 1 + .../umd/device/blackhole_arc_message_queue.h | 65 ++++++++ .../api/umd/device/blackhole_implementation.h | 12 ++ device/api/umd/device/cluster.h | 1 + device/api/umd/device/types/blackhole_arc.h | 59 +++++++ .../blackhole/blackhole_arc_message_queue.cpp | 144 ++++++++++++++++++ tests/blackhole/CMakeLists.txt | 5 +- tests/blackhole/test_arc_messages_bh.cpp | 38 +++++ 8 files changed, 324 insertions(+), 1 deletion(-) create mode 100644 device/api/umd/device/blackhole_arc_message_queue.h create mode 100644 device/api/umd/device/types/blackhole_arc.h create mode 100644 device/blackhole/blackhole_arc_message_queue.cpp create mode 100644 tests/blackhole/test_arc_messages_bh.cpp diff --git a/device/CMakeLists.txt b/device/CMakeLists.txt index 7eed3b49..decec299 100644 --- a/device/CMakeLists.txt +++ b/device/CMakeLists.txt @@ -48,6 +48,7 @@ target_sources( grayskull/grayskull_coordinate_manager.cpp wormhole/wormhole_coordinate_manager.cpp blackhole/blackhole_coordinate_manager.cpp + blackhole/blackhole_arc_message_queue.cpp xy_pair.cpp ${FBS_GENERATED_HEADER} ) diff --git a/device/api/umd/device/blackhole_arc_message_queue.h b/device/api/umd/device/blackhole_arc_message_queue.h new file mode 100644 index 00000000..84faf331 --- /dev/null +++ b/device/api/umd/device/blackhole_arc_message_queue.h @@ -0,0 +1,65 @@ +/* + * SPDX-FileCopyrightText: (c) 2024 Tenstorrent Inc. + * + * SPDX-License-Identifier: Apache-2.0 + */ +#pragma once + +#include "umd/device/blackhole_implementation.h" +#include "umd/device/tt_core_coordinates.h" +#include "umd/device/types/blackhole_arc.h" +#include "umd/device/types/cluster_descriptor_types.h" + +using namespace tt::umd::blackhole; + +namespace tt::umd { + +class Cluster; + +class BlackholeArcMessageQueue { +private: + // Header length and entry length in words. + static constexpr uint8_t header_len = 8; + static constexpr uint8_t entry_len = 8; + + static constexpr uint8_t request_wptr_offset = 0; + static constexpr uint8_t response_rptr_offset = 1; + static constexpr uint8_t request_rptr_offset = 4; + static constexpr uint8_t response_wptr_offset = 5; + +public: + BlackholeArcMessageQueue( + Cluster* cluster, + const chip_id_t chip, + const uint64_t base_address, + const uint64_t size, + const CoreCoord arc_core); + + uint32_t send_message(const ArcMessageType message_type, uint16_t arg0 = 0, uint16_t arg1 = 0); + + static std::shared_ptr get_blackhole_arc_message_queue( + Cluster* cluster, const chip_id_t chip, const size_t queue_index); + +private: + void push_request(std::array& request); + + std::array pop_response(); + + void read_words(uint32_t* data, size_t num_words, size_t offset); + + uint32_t read_word(size_t offset); + + void write_words(uint32_t* data, size_t num_words, size_t offset); + + void create_request(uint32_t* request, ArcMessageType message_type, uint32_t* data, size_t num_words); + + void trigger_fw_int(); + + const uint64_t base_address; + const uint64_t size; + Cluster* cluster; + const chip_id_t chip; + const CoreCoord arc_core; +}; + +} // namespace tt::umd diff --git a/device/api/umd/device/blackhole_implementation.h b/device/api/umd/device/blackhole_implementation.h index f5047c9e..f0efb3ae 100644 --- a/device/api/umd/device/blackhole_implementation.h +++ b/device/api/umd/device/blackhole_implementation.h @@ -188,6 +188,18 @@ static constexpr uint32_t MSG_TYPE_SETUP_IATU_FOR_PEER_TO_PEER = 0x97; static const uint32_t BH_NOC_NODE_ID_OFFSET = 0x1FD04044; +// Register from which address of the ARC queue control block is read. +constexpr uint64_t SCRATCH_RAM_11 = 0x8003042C; + +// ARC message queue header and entry size in bytes. +constexpr uint32_t ARC_MSG_QUEUE_HEADER_SIZE = 32; +constexpr uint32_t ARC_QUEUE_ENTRY_SIZE = 32; + +// ARC firmware interrupt address and value to write in order +// to make an interrupt request. +constexpr uint32_t ARC_FW_INT_ADDR = 2147680512; +constexpr uint32_t ARC_FW_INT_VAL = 65536; + static const size_t eth_translated_coordinate_start_x = 20; static const size_t eth_translated_coordinate_start_y = 25; diff --git a/device/api/umd/device/cluster.h b/device/api/umd/device/cluster.h index 5143db8f..b81d34a0 100644 --- a/device/api/umd/device/cluster.h +++ b/device/api/umd/device/cluster.h @@ -18,6 +18,7 @@ #include "tt_silicon_driver_common.hpp" #include "tt_soc_descriptor.h" #include "tt_xy_pair.h" +#include "umd/device/blackhole_arc_message_queue.h" #include "umd/device/chip/chip.h" #include "umd/device/tt_device/tt_device.h" #include "umd/device/tt_io.hpp" diff --git a/device/api/umd/device/types/blackhole_arc.h b/device/api/umd/device/types/blackhole_arc.h new file mode 100644 index 00000000..ac2218cd --- /dev/null +++ b/device/api/umd/device/types/blackhole_arc.h @@ -0,0 +1,59 @@ +/* + * SPDX-FileCopyrightText: (c) 2024 Tenstorrent Inc. + * + * SPDX-License-Identifier: Apache-2.0 + */ + +#pragma once + +#include + +namespace tt::umd { + +namespace blackhole { + +// Note, this only includes message IDs that have actually be implemented in CMFW +enum class ArcMessageType : uint8_t { + RESERVED_01 = 0x01, // reserved to avoid conflict with initial SCRATCH[5] value + NOP = 0x11, // Do nothing + SET_VOLTAGE = 0x12, + GET_VOLTAGE = 0x13, + SWITCH_CLK_SCHEME = 0x14, + REPORT_SCRATCH_ONLY = 0x16, + SEND_PCIE_MSI = 0x17, + SWITCH_VOUT_CONTROL = 0x18, + READ_EEPROM = 0x19, + WRITE_EEPROM = 0x1A, + READ_TS = 0x1B, + READ_PD = 0x1C, + READ_VM = 0x1D, + I2C_MESSAGE = 0x1E, + EFUSE_BURN_BITS = 0x1F, + FORCE_AICLK = 0x33, + FORCE_VDD = 0x39, + AICLK_GO_BUSY = 0x52, + AICLK_GO_LONG_IDLE = 0x54, + TRIGGER_RESET = 0x56, // arg: 3 = ASIC + M3 reset, other values = ASIC-only reset + RESERVED_60 = 0x60, // reserved to avoid conflict with boot-time SCRATCH[5] value + TEST = 0x90, + PCIE_DMA_CHIP_TO_HOST_TRANSFER = 0x9B, + PCIE_DMA_HOST_TO_CHIP_TRANSFER = 0x9C, + ASIC_STATE0 = 0xA0, + ASIC_STATE1 = 0xA1, + ASIC_STATE3 = 0xA3, + ASIC_STATE5 = 0xA5, + SET_LAST_SERIAL = 0xBE, + EFUSE_BURN = 0xBF, +}; + +// Usage of queues proposed by Syseng. +enum BlackholeArcMessageQueueIndex : uint8_t { + KMD = 0, + MONITORING = 1, + TOOLS = 2, + APPLICATION = 3, +}; + +} // namespace blackhole + +} // namespace tt::umd diff --git a/device/blackhole/blackhole_arc_message_queue.cpp b/device/blackhole/blackhole_arc_message_queue.cpp new file mode 100644 index 00000000..c76d7216 --- /dev/null +++ b/device/blackhole/blackhole_arc_message_queue.cpp @@ -0,0 +1,144 @@ +/* + * SPDX-FileCopyrightText: (c) 2024 Tenstorrent Inc. + * + * SPDX-License-Identifier: Apache-2.0 + */ +#include "umd/device/blackhole_arc_message_queue.h" + +#include "umd/device/cluster.h" + +using namespace tt::umd; + +namespace tt::umd { + +BlackholeArcMessageQueue::BlackholeArcMessageQueue( + Cluster* cluster, + const chip_id_t chip, + const uint64_t base_address, + const uint64_t size, + const CoreCoord arc_core) : + base_address(base_address), size(size), cluster(cluster), chip(chip), arc_core(arc_core) {} + +void BlackholeArcMessageQueue::read_words(uint32_t* data, size_t num_words, size_t offset) { + cluster->read_from_device( + data, chip, arc_core, base_address + offset * sizeof(uint32_t), num_words * sizeof(uint32_t), "LARGE_READ_TLB"); +} + +uint32_t BlackholeArcMessageQueue::read_word(size_t offset) { + uint32_t word; + read_words(&word, 1, offset); + return word; +} + +void BlackholeArcMessageQueue::write_words(uint32_t* data, size_t num_words, size_t offset) { + cluster->write_to_device( + data, + num_words * sizeof(uint32_t), + chip, + arc_core, + base_address + offset * sizeof(uint32_t), + "LARGE_WRITE_TLB"); +} + +void BlackholeArcMessageQueue::create_request( + uint32_t* request, ArcMessageType message_type, uint32_t* data, size_t num_words) { + request[0] = (uint32_t)message_type; + memcpy(request + 1, data, num_words * sizeof(uint32_t)); + memset(request + 1 + num_words, 0, (BlackholeArcMessageQueue::entry_len - (1 + num_words)) * sizeof(uint32_t)); +} + +void BlackholeArcMessageQueue::trigger_fw_int() { + cluster->write_to_device(&ARC_FW_INT_VAL, sizeof(uint32_t), chip, arc_core, ARC_FW_INT_ADDR, "LARGE_WRITE_TLB"); +} + +void BlackholeArcMessageQueue::push_request(std::array& request) { + cluster->l1_membar(chip, "LARGE_READ_TLB"); + uint32_t request_queue_wptr = read_word(request_wptr_offset); + + while (true) { + uint32_t request_queue_rptr = read_word(request_rptr_offset); + if (abs((int)request_queue_rptr - (int)request_queue_wptr) % (2 * size) != size) { + break; + } + } + + // Offset in words. + uint32_t request_entry_offset = header_len + (request_queue_wptr % size) * BlackholeArcMessageQueue::entry_len; + write_words(request.data(), BlackholeArcMessageQueue::entry_len, request_entry_offset); + + request_queue_wptr = (request_queue_wptr + 1) % (2 * size); + write_words(&request_queue_wptr, 1, request_wptr_offset); + + trigger_fw_int(); +} + +std::array BlackholeArcMessageQueue::pop_response() { + cluster->l1_membar(chip, "LARGE_READ_TLB"); + uint32_t response_queue_rptr = read_word(response_rptr_offset); + + while (true) { + uint32_t response_queue_wptr = read_word(response_wptr_offset); + + if (response_queue_rptr != response_queue_wptr) { + break; + } + } + + uint32_t response_entry_offset = + header_len + (size + (response_queue_rptr % size)) * BlackholeArcMessageQueue::entry_len; + std::array response; + cluster->l1_membar(chip, "LARGE_READ_TLB"); + read_words(response.data(), BlackholeArcMessageQueue::entry_len, response_entry_offset); + + response_queue_rptr = (response_queue_rptr + 1) % (2 * size); + write_words(&response_queue_rptr, 1, response_rptr_offset); + + return response; +} + +uint32_t BlackholeArcMessageQueue::send_message(const ArcMessageType message_type, uint16_t arg0, uint16_t arg1) { + uint32_t arg = arg0 | (arg1 << 16); + + std::array request = {(uint32_t)message_type, arg, 0, 0, 0, 0, 0, 0}; + + push_request(request); + + std::array response = pop_response(); + + uint32_t status = response[0] & 0xFF; + + if (status < 240) { + return response[0] >> 16; + } else if (status == 0xFF) { + throw std::runtime_error(fmt::format("Message code {} not recognized by ARC fw.", (uint32_t)message_type)); + return 0; + } else { + throw std::runtime_error(fmt::format("Uknown message error code {}", status)); + return 0; + } +} + +std::shared_ptr BlackholeArcMessageQueue::get_blackhole_arc_message_queue( + Cluster* cluster, const chip_id_t chip, const size_t queue_index) { + const CoreCoord arc_core = cluster->get_soc_descriptor(chip).get_cores(CoreType::ARC)[0]; + + uint32_t queue_control_block_addr; + cluster->read_from_device( + &queue_control_block_addr, chip, arc_core, blackhole::SCRATCH_RAM_11, sizeof(uint32_t), "LARGE_READ_TLB"); + + uint64_t queue_control_block; + cluster->read_from_device( + &queue_control_block, chip, arc_core, queue_control_block_addr, sizeof(uint64_t), "LARGE_READ_TLB"); + + uint32_t queue_base_addr = queue_control_block & 0xFFFFFFFF; + uint32_t num_entries_per_queue = (queue_control_block >> 32) & 0xFF; + uint32_t num_queues = (queue_control_block >> 40) & 0xFF; + + uint32_t msg_queue_size = 2 * num_entries_per_queue * ARC_QUEUE_ENTRY_SIZE + ARC_MSG_QUEUE_HEADER_SIZE; + uint32_t msg_queue_base = queue_base_addr + queue_index * msg_queue_size; + + return std::make_shared( + cluster, chip, msg_queue_base, num_entries_per_queue, arc_core); +} + +} // namespace tt::umd diff --git a/tests/blackhole/CMakeLists.txt b/tests/blackhole/CMakeLists.txt index e47a5655..e9dac555 100644 --- a/tests/blackhole/CMakeLists.txt +++ b/tests/blackhole/CMakeLists.txt @@ -1,4 +1,7 @@ -set(UNIT_TESTS_BH_SRCS test_cluster_bh.cpp) +set(UNIT_TESTS_BH_SRCS + test_cluster_bh.cpp + test_arc_messages_bh.cpp +) add_executable(unit_tests_blackhole ${UNIT_TESTS_BH_SRCS}) target_link_libraries(unit_tests_blackhole PRIVATE test_common) diff --git a/tests/blackhole/test_arc_messages_bh.cpp b/tests/blackhole/test_arc_messages_bh.cpp new file mode 100644 index 00000000..2a7c1ef5 --- /dev/null +++ b/tests/blackhole/test_arc_messages_bh.cpp @@ -0,0 +1,38 @@ +// SPDX-FileCopyrightText: (c) 2024 Tenstorrent Inc. +// +// SPDX-License-Identifier: Apache-2.0 +#include +#include + +#include "gtest/gtest.h" +#include "umd/device/blackhole_arc_message_queue.h" +#include "umd/device/cluster.h" +#include "umd/device/tt_cluster_descriptor.h" + +using namespace tt::umd; + +inline std::unique_ptr get_cluster() { + std::vector pci_device_ids = PCIDevice::enumerate_devices(); + // TODO: Make this test work on a host system without any tt devices. + if (pci_device_ids.empty()) { + return nullptr; + } + return std::unique_ptr(new Cluster()); +} + +TEST(BlackholeArcMessages, BlackholeArcMessagesBasic) { + const uint32_t num_loops = 100; + + std::unique_ptr cluster = get_cluster(); + + std::shared_ptr blackhole_arc_msg_queue = + BlackholeArcMessageQueue::get_blackhole_arc_message_queue( + cluster.get(), 0, BlackholeArcMessageQueueIndex::APPLICATION); + + for (int i = 0; i < num_loops; i++) { + uint32_t response = blackhole_arc_msg_queue->send_message(ArcMessageType::TEST); + if (response != 0) { + ASSERT_TRUE(false); + } + } +}