From 8027a8311e927b3d81094c8be5a81b807ef896d6 Mon Sep 17 00:00:00 2001 From: Bezulj Marko Date: Mon, 25 Nov 2024 15:34:55 +0000 Subject: [PATCH 1/4] fix graph_processor buffer_id calculus introduce buffer unique id, as an atomic static counter split id_to_counter to buffer_id_to_counter and tensor_id_to_counter --- tests/ttnn/unit_tests/gtests/CMakeLists.txt | 1 + .../unit_tests/gtests/test_graph_basic.cpp | 123 ++++++++++++++++++ tt_metal/impl/buffers/buffer.cpp | 4 + tt_metal/impl/buffers/buffer.hpp | 4 + ttnn/cpp/ttnn/graph/graph_processor.cpp | 32 ++--- ttnn/cpp/ttnn/graph/graph_processor.hpp | 3 +- 6 files changed, 150 insertions(+), 17 deletions(-) create mode 100644 tests/ttnn/unit_tests/gtests/test_graph_basic.cpp diff --git a/tests/ttnn/unit_tests/gtests/CMakeLists.txt b/tests/ttnn/unit_tests/gtests/CMakeLists.txt index fcdd7a0b35f..52e1e69c098 100644 --- a/tests/ttnn/unit_tests/gtests/CMakeLists.txt +++ b/tests/ttnn/unit_tests/gtests/CMakeLists.txt @@ -1,6 +1,7 @@ set(TTNN_UNIT_TESTS_SRC ${CMAKE_CURRENT_SOURCE_DIR}/test_add.cpp ${CMAKE_CURRENT_SOURCE_DIR}/test_graph_add.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/test_graph_basic.cpp ${CMAKE_CURRENT_SOURCE_DIR}/test_async_runtime.cpp ${CMAKE_CURRENT_SOURCE_DIR}/test_multiprod_queue.cpp ${CMAKE_CURRENT_SOURCE_DIR}/test_multi_cq_multi_dev.cpp diff --git a/tests/ttnn/unit_tests/gtests/test_graph_basic.cpp b/tests/ttnn/unit_tests/gtests/test_graph_basic.cpp new file mode 100644 index 00000000000..47fbe261ac4 --- /dev/null +++ b/tests/ttnn/unit_tests/gtests/test_graph_basic.cpp @@ -0,0 +1,123 @@ +// SPDX-FileCopyrightText: © 2023 Tenstorrent Inc. +// +// SPDX-License-Identifier: Apache-2.0 + +#include "gtest/gtest.h" +#include "ttnn_test_fixtures.hpp" +#include "ttnn/device.hpp" +#include "ttnn/tensor/tensor.hpp" +#include "ttnn/graph/graph_processor.hpp" +#include "ttnn/graph/graph_consts.hpp" + +#include + +namespace ttnn::graph::test { + +struct BufferTestParam { + ttnn::SimpleShape shape_a; + ttnn::SimpleShape shape_b; +}; + +class BufferTestFixture + : public TTNNFixtureWithDevice, + public testing::WithParamInterface> {}; + +TEST_P(BufferTestFixture, BufferTest) { + auto param_combination = GetParam(); + auto params = std::get<0>(param_combination); + auto run_mode = std::get<1>(param_combination); + + tt::tt_metal::Device* device = &(this->getDevice()); + { + ttnn::graph::GraphProcessor::begin_graph_capture(ttnn::graph::GraphProcessor::RunMode::NO_DISPATCH); + { + const auto input_a = ttnn::TensorSpec( + params.shape_a, + tt::tt_metal::TensorLayout( + tt::tt_metal::DataType::BFLOAT16, + tt::tt_metal::PageConfig(tt::tt_metal::Layout::TILE), + ttnn::L1_MEMORY_CONFIG)); + const auto input_tensor_a = tt::tt_metal::create_device_tensor(input_a, device); + } + { + const auto input_a = ttnn::TensorSpec( + params.shape_a, + tt::tt_metal::TensorLayout( + tt::tt_metal::DataType::BFLOAT16, + tt::tt_metal::PageConfig(tt::tt_metal::Layout::TILE), + ttnn::L1_MEMORY_CONFIG)); + const auto input_tensor_a = tt::tt_metal::create_device_tensor(input_a, device); + + const auto input_b = ttnn::TensorSpec( + params.shape_b, + tt::tt_metal::TensorLayout( + tt::tt_metal::DataType::BFLOAT16, + tt::tt_metal::PageConfig(tt::tt_metal::Layout::TILE), + ttnn::L1_MEMORY_CONFIG)); + + const auto input_tensor_b = tt::tt_metal::create_device_tensor(input_b, device); + } + auto trace = ttnn::graph::GraphProcessor::end_graph_capture(); + + auto find_nodes_by_type = [](const auto& trace, const std::string& type) { + std::vector nodes; + for (const auto& node : trace) { + if (node.at(kNodeType) == type) { + nodes.push_back(node); + } + } + return nodes; + }; + + // Check if there are two buffer_allocate_nodes, and if each is connected to only one different buffer + auto buffer_allocate_nodes = find_nodes_by_type(trace, kNodeBufferAllocate); + EXPECT_EQ(buffer_allocate_nodes.size(), 3); + for (const auto& node : buffer_allocate_nodes) { + EXPECT_EQ(node.at(kConnections).size(), 1); + } + auto connection_a = buffer_allocate_nodes[0].at(kConnections)[0].get(); + auto connection_a2 = buffer_allocate_nodes[1].at(kConnections)[0].get(); + auto connection_c = buffer_allocate_nodes[2].at(kConnections)[0].get(); + EXPECT_NE(connection_a, connection_a2); + EXPECT_NE(connection_a, connection_c); + EXPECT_NE(connection_a2, connection_c); + + // Check if there are two buffer nodes and they have correct sizes + auto buffer_nodes = find_nodes_by_type(trace, kNodeBuffer); + EXPECT_EQ(buffer_nodes.size(), 3); + auto size_a = std::stoi(buffer_nodes[0].at(kParams).at(kSize).get()); + EXPECT_EQ(params.shape_a.volume() * 2, size_a); + auto size_a2 = std::stoi(buffer_nodes[1].at(kParams).at(kSize).get()); + EXPECT_EQ(params.shape_a.volume() * 2, size_a2); + auto size_b = std::stoi(buffer_nodes[2].at(kParams).at(kSize).get()); + EXPECT_EQ(params.shape_b.volume() * 2, size_b); + + // Print the trace for reference + std::cout << trace << std::endl; + } +} + +INSTANTIATE_TEST_SUITE_P( + BufferTest, + BufferTestFixture, + ::testing::Combine( + ::testing::Values(BufferTestParam{ + .shape_a = ttnn::SimpleShape(tt::tt_metal::Array4D{1, 1, 32, 32}), + .shape_b = ttnn::SimpleShape(tt::tt_metal::Array4D{32, 1, 32, 32})}), + ::testing::Values( + tt::tt_metal::IGraphProcessor::RunMode::NO_DISPATCH, tt::tt_metal::IGraphProcessor::RunMode::NORMAL)), + [](const testing::TestParamInfo>& info) { + std::stringstream ss; + + static uint32_t uid = 0; + ss << uid++; + + const auto& run_mode = std::get<1>(info.param); + switch (run_mode) { + case tt::tt_metal::IGraphProcessor::RunMode::NO_DISPATCH: ss << "_NO_DISPATCH"; break; + case tt::tt_metal::IGraphProcessor::RunMode::NORMAL: ss << "_NORMAL"; break; + default: break; + } + return ss.str(); + }); +} // namespace ttnn::graph::test diff --git a/tt_metal/impl/buffers/buffer.cpp b/tt_metal/impl/buffers/buffer.cpp index 58e46f30da1..08512023ba9 100644 --- a/tt_metal/impl/buffers/buffer.cpp +++ b/tt_metal/impl/buffers/buffer.cpp @@ -13,6 +13,7 @@ #include "tt_metal/types.hpp" #include +#include #include #include #include "tt_metal/common/base.hpp" @@ -25,6 +26,8 @@ namespace tt { namespace tt_metal { +std::atomic Buffer::next_unique_id = 0; + std::ostream& operator<<(std::ostream& os, const ShardSpec& spec) { tt::stl::reflection::operator<<(os, spec); return os; @@ -247,6 +250,7 @@ Buffer::Buffer( if (size != 0) { validate_buffer_size_and_page_size(size, page_size, buffer_type, buffer_layout, shard_parameters); } + unique_id_ = next_unique_id.fetch_add(1); } std::shared_ptr Buffer::create( diff --git a/tt_metal/impl/buffers/buffer.hpp b/tt_metal/impl/buffers/buffer.hpp index a8dff2c072d..6f1259ea0fe 100644 --- a/tt_metal/impl/buffers/buffer.hpp +++ b/tt_metal/impl/buffers/buffer.hpp @@ -248,6 +248,8 @@ class Buffer final { std::optional sub_device_id() const { return sub_device_id_; } std::optional sub_device_manager_id() const { return sub_device_manager_id_; } + size_t unique_id() const { return unique_id_; } + Buffer( Device *device, DeviceAddr size, @@ -300,6 +302,8 @@ class Buffer final { std::shared_ptr buffer_page_mapping_; std::weak_ptr weak_self; + size_t unique_id_; + static std::atomic next_unique_id; }; } // namespace v0 diff --git a/ttnn/cpp/ttnn/graph/graph_processor.cpp b/ttnn/cpp/ttnn/graph/graph_processor.cpp index f702edc1a26..637e936c9f0 100644 --- a/ttnn/cpp/ttnn/graph/graph_processor.cpp +++ b/ttnn/cpp/ttnn/graph/graph_processor.cpp @@ -95,9 +95,8 @@ GraphProcessor::GraphProcessor(RunMode mode) : run_mode(mode) { } void GraphProcessor::track_allocate(const tt::tt_metal::Buffer* buffer) { const std::lock_guard lock(mutex); - auto buf_id = add_buffer(buffer); + auto buffer_id = add_buffer(buffer); - auto alloc_id = reinterpret_cast(buffer); auto counter = graph.size(); std::unordered_map params = { @@ -113,7 +112,7 @@ void GraphProcessor::track_allocate(const tt::tt_metal::Buffer* buffer) { .counter = counter, .node_type = kNodeBufferAllocate, .params = params, - .connections = {buf_id} + .connections = {buffer_id} }); graph[current_op_id.top()].connections.push_back(counter); } @@ -121,7 +120,7 @@ void GraphProcessor::track_allocate(const tt::tt_metal::Buffer* buffer) { void GraphProcessor::track_deallocate(tt::tt_metal::Buffer* buffer) { const std::lock_guard lock(mutex); - auto buffer_idx = add_buffer(buffer); + auto buffer_id = add_buffer(buffer); auto counter = graph.size(); std::unordered_map params = { {kSize, std::to_string(buffer->size())}, @@ -135,7 +134,7 @@ void GraphProcessor::track_deallocate(tt::tt_metal::Buffer* buffer) { .counter = counter, .node_type = kNodeBufferDeallocate, .params = params, - .connections = {buffer_idx} + .connections = {buffer_id} }); graph[current_op_id.top()].connections.push_back(counter); } @@ -286,21 +285,21 @@ int GraphProcessor::add_tensor(const Tensor& t) { } else { tensor_id = t.tensor_id.value(); } - auto tensor_counter = id_to_counter.count(tensor_id) > 0 ? id_to_counter[tensor_id] : graph.size(); + auto tensor_counter = tensor_id_to_counter.count(tensor_id) > 0 ? tensor_id_to_counter[tensor_id] : graph.size(); auto shape = t.get_shape(); std::unordered_map params = { {kShape, fmt::format("{}", shape)}, {kTensorId, fmt::format("{}", tensor_id)}, }; - if (id_to_counter.count(tensor_id) == 0) { + if (tensor_id_to_counter.count(tensor_id) == 0) { graph.push_back(Vertex{.counter = tensor_counter, .node_type = kNodeTensor, .params = params, .connections = {}}); - id_to_counter[tensor_id] = tensor_counter; + tensor_id_to_counter[tensor_id] = tensor_counter; } if (buffer) { - auto buffer_idx = add_buffer(buffer); - graph[buffer_idx].connections.push_back(tensor_counter); + auto buffer_id = add_buffer(buffer); + graph[buffer_id].connections.push_back(tensor_counter); } else { tt::log_info("Tensor doesn't have buffer, but storage is {}", demangle(get_type_in_var(t.get_storage()).name())); } @@ -308,9 +307,9 @@ int GraphProcessor::add_tensor(const Tensor& t) { } int GraphProcessor::add_buffer(const tt::tt_metal::Buffer* buffer) { - auto buffer_alloc_id = reinterpret_cast(buffer); - auto counter = id_to_counter.count(buffer_alloc_id) > 0 ? id_to_counter[buffer_alloc_id] : graph.size(); - if (id_to_counter.count(buffer_alloc_id) == 0) { + auto buffer_id = buffer->unique_id(); + auto counter = buffer_id_to_counter.count(buffer_id) > 0 ? buffer_id_to_counter[buffer_id] : graph.size(); + if (buffer_id_to_counter.count(buffer_id) == 0) { std::unordered_map params = { {kSize, std::to_string(buffer->size())}, {kType, buffer->is_dram() ? "DRAM" : "L1"}, @@ -324,10 +323,10 @@ int GraphProcessor::add_buffer(const tt::tt_metal::Buffer* buffer) { .connections = {} }); graph[current_op_id.top()].connections.push_back(counter); - id_to_counter[buffer_alloc_id] = counter; + buffer_id_to_counter[buffer_id] = counter; return counter; } - return id_to_counter[buffer_alloc_id]; + return buffer_id_to_counter[buffer_id]; } @@ -428,7 +427,8 @@ void GraphProcessor::end_function_process_optional_tensor(const std::any& any_va void GraphProcessor::begin_capture(RunMode mode) { const std::lock_guard lock(mutex); graph.clear(); - id_to_counter.clear(); + buffer_id_to_counter.clear(); + tensor_id_to_counter.clear(); graph.push_back(Vertex{ .counter = 0, .node_type = kNodeCaptureStart, diff --git a/ttnn/cpp/ttnn/graph/graph_processor.hpp b/ttnn/cpp/ttnn/graph/graph_processor.hpp index 7f89251622d..488d7cf1f06 100644 --- a/ttnn/cpp/ttnn/graph/graph_processor.hpp +++ b/ttnn/cpp/ttnn/graph/graph_processor.hpp @@ -73,7 +73,8 @@ namespace ttnn::graph { std::mutex mutex; RunMode run_mode = RunMode::NORMAL; std::stack current_op_id; - std::unordered_map id_to_counter; + std::unordered_map buffer_id_to_counter; + std::unordered_map tensor_id_to_counter; int last_finished_op_id = -1; std::vector graph; std::unordered_map begin_function_any_map; From ae02ef0789908c85c177a443c3ecae131683d92d Mon Sep 17 00:00:00 2001 From: Bezulj Marko Date: Wed, 27 Nov 2024 21:16:46 +0000 Subject: [PATCH 2/4] v1::GetId() --- tt_metal/impl/buffers/buffer.cpp | 2 ++ tt_metal/include/tt_metal/buffer.hpp | 8 ++++++++ 2 files changed, 10 insertions(+) diff --git a/tt_metal/impl/buffers/buffer.cpp b/tt_metal/impl/buffers/buffer.cpp index 08512023ba9..9d8ffb7ae6e 100644 --- a/tt_metal/impl/buffers/buffer.cpp +++ b/tt_metal/impl/buffers/buffer.cpp @@ -528,6 +528,8 @@ v1::BufferHandle v1::CreateBuffer(InterleavedBufferConfig config) { return v1::B void v1::DeallocateBuffer(const BufferHandle& buffer) { v0::DeallocateBuffer(*buffer); } +std::size_t v1::GetId(const BufferHandle& buffer) { return buffer->unique_id(); } + void v1::WriteToBuffer(const BufferHandle& buffer, stl::Span host_buffer) { detail::WriteToBuffer(*buffer, stl::Span{reinterpret_cast(host_buffer.data()), host_buffer.size()}); } diff --git a/tt_metal/include/tt_metal/buffer.hpp b/tt_metal/include/tt_metal/buffer.hpp index a69f6a436eb..ea7d61f5c72 100644 --- a/tt_metal/include/tt_metal/buffer.hpp +++ b/tt_metal/include/tt_metal/buffer.hpp @@ -28,6 +28,14 @@ BufferHandle CreateBuffer(InterleavedBufferConfig config); */ void DeallocateBuffer(const BufferHandle& buffer); +/** + * @brief Retrieves the ID of the specified buffer. + * + * @param buffer The buffer to get the ID from. + * @return The unique ID of the buffer. + */ +std::size_t GetId(const BufferHandle& buffer); + /** * @brief Copies data from a host buffer into the specified device buffer. * From 48a59fcb1d9c3bd68a23416cb2cb0c5b0aba7841 Mon Sep 17 00:00:00 2001 From: Bezulj Marko Date: Thu, 28 Nov 2024 06:04:49 +0000 Subject: [PATCH 3/4] init unique_id at declaration. --- tt_metal/impl/buffers/buffer.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tt_metal/impl/buffers/buffer.hpp b/tt_metal/impl/buffers/buffer.hpp index 6f1259ea0fe..8c6c59b7691 100644 --- a/tt_metal/impl/buffers/buffer.hpp +++ b/tt_metal/impl/buffers/buffer.hpp @@ -302,7 +302,7 @@ class Buffer final { std::shared_ptr buffer_page_mapping_; std::weak_ptr weak_self; - size_t unique_id_; + size_t unique_id_ = 0; static std::atomic next_unique_id; }; From 0d7a7908d7efffe0beca7eea2f4e44427deb2117 Mon Sep 17 00:00:00 2001 From: Bezulj Marko Date: Fri, 29 Nov 2024 19:40:28 +0000 Subject: [PATCH 4/4] fix run_mode --- tests/ttnn/unit_tests/gtests/test_graph_basic.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/ttnn/unit_tests/gtests/test_graph_basic.cpp b/tests/ttnn/unit_tests/gtests/test_graph_basic.cpp index 47fbe261ac4..9cfd0dec7b2 100644 --- a/tests/ttnn/unit_tests/gtests/test_graph_basic.cpp +++ b/tests/ttnn/unit_tests/gtests/test_graph_basic.cpp @@ -29,7 +29,7 @@ TEST_P(BufferTestFixture, BufferTest) { tt::tt_metal::Device* device = &(this->getDevice()); { - ttnn::graph::GraphProcessor::begin_graph_capture(ttnn::graph::GraphProcessor::RunMode::NO_DISPATCH); + ttnn::graph::GraphProcessor::begin_graph_capture(run_mode); { const auto input_a = ttnn::TensorSpec( params.shape_a,