fix graph_processor buffer_id calculus

introduce buffer unique id, as an atomic static counter split id_to_counter to buffer_id_to_counter and tensor_id_to_counter
tenstorrent · Nov 27, 2024 · 8027a83 · 8027a83
1 parent 7d40232
commit 8027a83
Show file tree

Hide file tree

Showing 6 changed files with 150 additions and 17 deletions.
diff --git a/tests/ttnn/unit_tests/gtests/CMakeLists.txt b/tests/ttnn/unit_tests/gtests/CMakeLists.txt
@@ -1,6 +1,7 @@
 set(TTNN_UNIT_TESTS_SRC
     ${CMAKE_CURRENT_SOURCE_DIR}/test_add.cpp
     ${CMAKE_CURRENT_SOURCE_DIR}/test_graph_add.cpp
+    ${CMAKE_CURRENT_SOURCE_DIR}/test_graph_basic.cpp
     ${CMAKE_CURRENT_SOURCE_DIR}/test_async_runtime.cpp
     ${CMAKE_CURRENT_SOURCE_DIR}/test_multiprod_queue.cpp
     ${CMAKE_CURRENT_SOURCE_DIR}/test_multi_cq_multi_dev.cpp

diff --git a/tests/ttnn/unit_tests/gtests/test_graph_basic.cpp b/tests/ttnn/unit_tests/gtests/test_graph_basic.cpp
@@ -0,0 +1,123 @@
+// SPDX-FileCopyrightText: © 2023 Tenstorrent Inc.
+//
+// SPDX-License-Identifier: Apache-2.0
+
+#include "gtest/gtest.h"
+#include "ttnn_test_fixtures.hpp"
+#include "ttnn/device.hpp"
+#include "ttnn/tensor/tensor.hpp"
+#include "ttnn/graph/graph_processor.hpp"
+#include "ttnn/graph/graph_consts.hpp"
+
+#include <string>
+
+namespace ttnn::graph::test {
+
+struct BufferTestParam {
+    ttnn::SimpleShape shape_a;
+    ttnn::SimpleShape shape_b;
+};
+
+class BufferTestFixture
+    : public TTNNFixtureWithDevice,
+      public testing::WithParamInterface<std::tuple<BufferTestParam, tt::tt_metal::IGraphProcessor::RunMode>> {};
+
+TEST_P(BufferTestFixture, BufferTest) {
+    auto param_combination = GetParam();
+    auto params = std::get<0>(param_combination);
+    auto run_mode = std::get<1>(param_combination);
+
+    tt::tt_metal::Device* device = &(this->getDevice());
+    {
+        ttnn::graph::GraphProcessor::begin_graph_capture(ttnn::graph::GraphProcessor::RunMode::NO_DISPATCH);
+        {
+            const auto input_a = ttnn::TensorSpec(
+                params.shape_a,
+                tt::tt_metal::TensorLayout(
+                    tt::tt_metal::DataType::BFLOAT16,
+                    tt::tt_metal::PageConfig(tt::tt_metal::Layout::TILE),
+                    ttnn::L1_MEMORY_CONFIG));
+            const auto input_tensor_a = tt::tt_metal::create_device_tensor(input_a, device);
+        }
+        {
+            const auto input_a = ttnn::TensorSpec(
+                params.shape_a,
+                tt::tt_metal::TensorLayout(
+                    tt::tt_metal::DataType::BFLOAT16,
+                    tt::tt_metal::PageConfig(tt::tt_metal::Layout::TILE),
+                    ttnn::L1_MEMORY_CONFIG));
+            const auto input_tensor_a = tt::tt_metal::create_device_tensor(input_a, device);
+
+            const auto input_b = ttnn::TensorSpec(
+                params.shape_b,
+                tt::tt_metal::TensorLayout(
+                    tt::tt_metal::DataType::BFLOAT16,
+                    tt::tt_metal::PageConfig(tt::tt_metal::Layout::TILE),
+                    ttnn::L1_MEMORY_CONFIG));
+
+            const auto input_tensor_b = tt::tt_metal::create_device_tensor(input_b, device);
+        }
+        auto trace = ttnn::graph::GraphProcessor::end_graph_capture();
+
+        auto find_nodes_by_type = [](const auto& trace, const std::string& type) {
+            std::vector<nlohmann::json> nodes;
+            for (const auto& node : trace) {
+                if (node.at(kNodeType) == type) {
+                    nodes.push_back(node);
+                }
+            }
+            return nodes;
+        };
+
+        // Check if there are two buffer_allocate_nodes, and if each is connected to only one different buffer
+        auto buffer_allocate_nodes = find_nodes_by_type(trace, kNodeBufferAllocate);
+        EXPECT_EQ(buffer_allocate_nodes.size(), 3);
+        for (const auto& node : buffer_allocate_nodes) {
+            EXPECT_EQ(node.at(kConnections).size(), 1);
+        }
+        auto connection_a = buffer_allocate_nodes[0].at(kConnections)[0].get<int>();
+        auto connection_a2 = buffer_allocate_nodes[1].at(kConnections)[0].get<int>();
+        auto connection_c = buffer_allocate_nodes[2].at(kConnections)[0].get<int>();
+        EXPECT_NE(connection_a, connection_a2);
+        EXPECT_NE(connection_a, connection_c);
+        EXPECT_NE(connection_a2, connection_c);
+
+        // Check if there are two buffer nodes and they have correct sizes
+        auto buffer_nodes = find_nodes_by_type(trace, kNodeBuffer);
+        EXPECT_EQ(buffer_nodes.size(), 3);
+        auto size_a = std::stoi(buffer_nodes[0].at(kParams).at(kSize).get<std::string>());
+        EXPECT_EQ(params.shape_a.volume() * 2, size_a);
+        auto size_a2 = std::stoi(buffer_nodes[1].at(kParams).at(kSize).get<std::string>());
+        EXPECT_EQ(params.shape_a.volume() * 2, size_a2);
+        auto size_b = std::stoi(buffer_nodes[2].at(kParams).at(kSize).get<std::string>());
+        EXPECT_EQ(params.shape_b.volume() * 2, size_b);
+
+        // Print the trace for reference
+        std::cout << trace << std::endl;
+    }
+}
+
+INSTANTIATE_TEST_SUITE_P(
+    BufferTest,
+    BufferTestFixture,
+    ::testing::Combine(
+        ::testing::Values(BufferTestParam{
+            .shape_a = ttnn::SimpleShape(tt::tt_metal::Array4D{1, 1, 32, 32}),
+            .shape_b = ttnn::SimpleShape(tt::tt_metal::Array4D{32, 1, 32, 32})}),
+        ::testing::Values(
+            tt::tt_metal::IGraphProcessor::RunMode::NO_DISPATCH, tt::tt_metal::IGraphProcessor::RunMode::NORMAL)),
+    [](const testing::TestParamInfo<std::tuple<BufferTestParam, tt::tt_metal::IGraphProcessor::RunMode>>& info) {
+        std::stringstream ss;
+
+        static uint32_t uid = 0;
+        ss << uid++;
+
+        const auto& run_mode = std::get<1>(info.param);
+        switch (run_mode) {
+            case tt::tt_metal::IGraphProcessor::RunMode::NO_DISPATCH: ss << "_NO_DISPATCH"; break;
+            case tt::tt_metal::IGraphProcessor::RunMode::NORMAL: ss << "_NORMAL"; break;
+            default: break;
+        }
+        return ss.str();
+    });
+}  // namespace ttnn::graph::test
diff --git a/tt_metal/impl/buffers/buffer.cpp b/tt_metal/impl/buffers/buffer.cpp
@@ -13,6 +13,7 @@
 #include "tt_metal/types.hpp"
 
 #include <algorithm>
+#include <atomic>
 #include <mutex>
 #include <utility>
 #include "tt_metal/common/base.hpp"
@@ -25,6 +26,8 @@ namespace tt {
 
 namespace tt_metal {
 
+std::atomic<size_t> Buffer::next_unique_id = 0;
+
 std::ostream& operator<<(std::ostream& os, const ShardSpec& spec) {
     tt::stl::reflection::operator<<(os, spec);
     return os;
@@ -247,6 +250,7 @@ Buffer::Buffer(
     if (size != 0) {
         validate_buffer_size_and_page_size(size, page_size, buffer_type, buffer_layout, shard_parameters);
     }
+    unique_id_ = next_unique_id.fetch_add(1);
 }
 
 std::shared_ptr<Buffer> Buffer::create(

diff --git a/tt_metal/impl/buffers/buffer.hpp b/tt_metal/impl/buffers/buffer.hpp
@@ -248,6 +248,8 @@ class Buffer final {
     std::optional<SubDeviceId> sub_device_id() const { return sub_device_id_; }
     std::optional<SubDeviceManagerId> sub_device_manager_id() const { return sub_device_manager_id_; }
 
+    size_t unique_id() const { return unique_id_; }
+
     Buffer(
         Device *device,
         DeviceAddr size,
@@ -300,6 +302,8 @@ class Buffer final {
     std::shared_ptr<const BufferPageMapping> buffer_page_mapping_;
 
     std::weak_ptr<Buffer> weak_self;
+    size_t unique_id_;
+    static std::atomic<size_t> next_unique_id;
 };
 
 }  // namespace v0

diff --git a/ttnn/cpp/ttnn/graph/graph_processor.cpp b/ttnn/cpp/ttnn/graph/graph_processor.cpp
@@ -95,9 +95,8 @@ GraphProcessor::GraphProcessor(RunMode mode) : run_mode(mode) {
 }
 void GraphProcessor::track_allocate(const tt::tt_metal::Buffer* buffer) {
     const std::lock_guard<std::mutex> lock(mutex);
-    auto buf_id = add_buffer(buffer);
+    auto buffer_id = add_buffer(buffer);
 
-    auto alloc_id = reinterpret_cast<std::uintptr_t>(buffer);
     auto counter = graph.size();
 
     std::unordered_map<std::string, std::string> params = {
@@ -113,15 +112,15 @@ void GraphProcessor::track_allocate(const tt::tt_metal::Buffer* buffer) {
             .counter = counter,
             .node_type = kNodeBufferAllocate,
             .params = params,
-            .connections = {buf_id}
+            .connections = {buffer_id}
         });
         graph[current_op_id.top()].connections.push_back(counter);
     }
 }
 
 void GraphProcessor::track_deallocate(tt::tt_metal::Buffer* buffer) {
     const std::lock_guard<std::mutex> lock(mutex);
-    auto buffer_idx = add_buffer(buffer);
+    auto buffer_id = add_buffer(buffer);
     auto counter = graph.size();
     std::unordered_map<std::string, std::string> params = {
             {kSize, std::to_string(buffer->size())},
@@ -135,7 +134,7 @@ void GraphProcessor::track_deallocate(tt::tt_metal::Buffer* buffer) {
             .counter = counter,
             .node_type = kNodeBufferDeallocate,
             .params = params,
-            .connections = {buffer_idx}
+            .connections = {buffer_id}
         });
         graph[current_op_id.top()].connections.push_back(counter);
     }
@@ -286,31 +285,31 @@ int GraphProcessor::add_tensor(const Tensor& t) {
     } else {
         tensor_id = t.tensor_id.value();
     }
-    auto tensor_counter = id_to_counter.count(tensor_id) > 0 ? id_to_counter[tensor_id] : graph.size();
+    auto tensor_counter = tensor_id_to_counter.count(tensor_id) > 0 ? tensor_id_to_counter[tensor_id] : graph.size();
     auto shape = t.get_shape();
     std::unordered_map<std::string, std::string> params = {
         {kShape, fmt::format("{}", shape)},
         {kTensorId, fmt::format("{}", tensor_id)},
     };
 
-    if (id_to_counter.count(tensor_id) == 0) {
+    if (tensor_id_to_counter.count(tensor_id) == 0) {
         graph.push_back(Vertex{.counter = tensor_counter, .node_type = kNodeTensor, .params = params, .connections = {}});
-        id_to_counter[tensor_id] = tensor_counter;
+        tensor_id_to_counter[tensor_id] = tensor_counter;
     }
 
     if (buffer) {
-        auto buffer_idx = add_buffer(buffer);
-        graph[buffer_idx].connections.push_back(tensor_counter);
+        auto buffer_id = add_buffer(buffer);
+        graph[buffer_id].connections.push_back(tensor_counter);
     } else {
         tt::log_info("Tensor doesn't have buffer, but storage is {}", demangle(get_type_in_var(t.get_storage()).name()));
     }
     return tensor_counter;
 }
 
 int GraphProcessor::add_buffer(const tt::tt_metal::Buffer* buffer) {
-    auto buffer_alloc_id = reinterpret_cast<std::uintptr_t>(buffer);
-    auto counter = id_to_counter.count(buffer_alloc_id) > 0 ? id_to_counter[buffer_alloc_id] : graph.size();
-    if (id_to_counter.count(buffer_alloc_id) == 0) {
+    auto buffer_id = buffer->unique_id();
+    auto counter = buffer_id_to_counter.count(buffer_id) > 0 ? buffer_id_to_counter[buffer_id] : graph.size();
+    if (buffer_id_to_counter.count(buffer_id) == 0) {
         std::unordered_map<std::string, std::string> params = {
             {kSize, std::to_string(buffer->size())},
             {kType, buffer->is_dram() ? "DRAM" : "L1"},
@@ -324,10 +323,10 @@ int GraphProcessor::add_buffer(const tt::tt_metal::Buffer* buffer) {
             .connections = {}
         });
         graph[current_op_id.top()].connections.push_back(counter);
-        id_to_counter[buffer_alloc_id] = counter;
+        buffer_id_to_counter[buffer_id] = counter;
         return counter;
     }
-    return id_to_counter[buffer_alloc_id];
+    return buffer_id_to_counter[buffer_id];
 }
 
 
@@ -428,7 +427,8 @@ void GraphProcessor::end_function_process_optional_tensor(const std::any& any_va
 void GraphProcessor::begin_capture(RunMode mode) {
     const std::lock_guard<std::mutex> lock(mutex);
     graph.clear();
-    id_to_counter.clear();
+    buffer_id_to_counter.clear();
+    tensor_id_to_counter.clear();
     graph.push_back(Vertex{
         .counter = 0,
         .node_type = kNodeCaptureStart,

diff --git a/ttnn/cpp/ttnn/graph/graph_processor.hpp b/ttnn/cpp/ttnn/graph/graph_processor.hpp
@@ -73,7 +73,8 @@ namespace ttnn::graph {
         std::mutex mutex;
         RunMode run_mode = RunMode::NORMAL;
         std::stack<int> current_op_id;
-        std::unordered_map<std::int64_t, int> id_to_counter;
+        std::unordered_map<std::int64_t, int> buffer_id_to_counter;
+        std::unordered_map<std::int64_t, int> tensor_id_to_counter;
         int last_finished_op_id = -1;
         std::vector<Vertex> graph;
         std::unordered_map<std::type_index, ProcessFunc> begin_function_any_map;