Skip to content

Commit

Permalink
fix graph_processor buffer_id calculus
Browse files Browse the repository at this point in the history
introduce buffer unique id, as an atomic static counter
split id_to_counter to buffer_id_to_counter and tensor_id_to_counter
  • Loading branch information
mbezuljTT committed Nov 27, 2024
1 parent 7d40232 commit 8027a83
Show file tree
Hide file tree
Showing 6 changed files with 150 additions and 17 deletions.
1 change: 1 addition & 0 deletions tests/ttnn/unit_tests/gtests/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
set(TTNN_UNIT_TESTS_SRC
${CMAKE_CURRENT_SOURCE_DIR}/test_add.cpp
${CMAKE_CURRENT_SOURCE_DIR}/test_graph_add.cpp
${CMAKE_CURRENT_SOURCE_DIR}/test_graph_basic.cpp
${CMAKE_CURRENT_SOURCE_DIR}/test_async_runtime.cpp
${CMAKE_CURRENT_SOURCE_DIR}/test_multiprod_queue.cpp
${CMAKE_CURRENT_SOURCE_DIR}/test_multi_cq_multi_dev.cpp
Expand Down
123 changes: 123 additions & 0 deletions tests/ttnn/unit_tests/gtests/test_graph_basic.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,123 @@
// SPDX-FileCopyrightText: © 2023 Tenstorrent Inc.
//
// SPDX-License-Identifier: Apache-2.0

#include "gtest/gtest.h"
#include "ttnn_test_fixtures.hpp"
#include "ttnn/device.hpp"
#include "ttnn/tensor/tensor.hpp"
#include "ttnn/graph/graph_processor.hpp"
#include "ttnn/graph/graph_consts.hpp"

#include <string>

namespace ttnn::graph::test {

struct BufferTestParam {
ttnn::SimpleShape shape_a;
ttnn::SimpleShape shape_b;
};

class BufferTestFixture
: public TTNNFixtureWithDevice,
public testing::WithParamInterface<std::tuple<BufferTestParam, tt::tt_metal::IGraphProcessor::RunMode>> {};

TEST_P(BufferTestFixture, BufferTest) {
auto param_combination = GetParam();
auto params = std::get<0>(param_combination);
auto run_mode = std::get<1>(param_combination);

tt::tt_metal::Device* device = &(this->getDevice());
{
ttnn::graph::GraphProcessor::begin_graph_capture(ttnn::graph::GraphProcessor::RunMode::NO_DISPATCH);
{
const auto input_a = ttnn::TensorSpec(
params.shape_a,
tt::tt_metal::TensorLayout(
tt::tt_metal::DataType::BFLOAT16,
tt::tt_metal::PageConfig(tt::tt_metal::Layout::TILE),
ttnn::L1_MEMORY_CONFIG));
const auto input_tensor_a = tt::tt_metal::create_device_tensor(input_a, device);
}
{
const auto input_a = ttnn::TensorSpec(
params.shape_a,
tt::tt_metal::TensorLayout(
tt::tt_metal::DataType::BFLOAT16,
tt::tt_metal::PageConfig(tt::tt_metal::Layout::TILE),
ttnn::L1_MEMORY_CONFIG));
const auto input_tensor_a = tt::tt_metal::create_device_tensor(input_a, device);

const auto input_b = ttnn::TensorSpec(
params.shape_b,
tt::tt_metal::TensorLayout(
tt::tt_metal::DataType::BFLOAT16,
tt::tt_metal::PageConfig(tt::tt_metal::Layout::TILE),
ttnn::L1_MEMORY_CONFIG));

const auto input_tensor_b = tt::tt_metal::create_device_tensor(input_b, device);
}
auto trace = ttnn::graph::GraphProcessor::end_graph_capture();

auto find_nodes_by_type = [](const auto& trace, const std::string& type) {
std::vector<nlohmann::json> nodes;
for (const auto& node : trace) {
if (node.at(kNodeType) == type) {
nodes.push_back(node);
}
}
return nodes;
};

// Check if there are two buffer_allocate_nodes, and if each is connected to only one different buffer
auto buffer_allocate_nodes = find_nodes_by_type(trace, kNodeBufferAllocate);
EXPECT_EQ(buffer_allocate_nodes.size(), 3);
for (const auto& node : buffer_allocate_nodes) {
EXPECT_EQ(node.at(kConnections).size(), 1);
}
auto connection_a = buffer_allocate_nodes[0].at(kConnections)[0].get<int>();
auto connection_a2 = buffer_allocate_nodes[1].at(kConnections)[0].get<int>();
auto connection_c = buffer_allocate_nodes[2].at(kConnections)[0].get<int>();
EXPECT_NE(connection_a, connection_a2);
EXPECT_NE(connection_a, connection_c);
EXPECT_NE(connection_a2, connection_c);

// Check if there are two buffer nodes and they have correct sizes
auto buffer_nodes = find_nodes_by_type(trace, kNodeBuffer);
EXPECT_EQ(buffer_nodes.size(), 3);
auto size_a = std::stoi(buffer_nodes[0].at(kParams).at(kSize).get<std::string>());
EXPECT_EQ(params.shape_a.volume() * 2, size_a);
auto size_a2 = std::stoi(buffer_nodes[1].at(kParams).at(kSize).get<std::string>());
EXPECT_EQ(params.shape_a.volume() * 2, size_a2);
auto size_b = std::stoi(buffer_nodes[2].at(kParams).at(kSize).get<std::string>());
EXPECT_EQ(params.shape_b.volume() * 2, size_b);

// Print the trace for reference
std::cout << trace << std::endl;
}
}

INSTANTIATE_TEST_SUITE_P(
BufferTest,
BufferTestFixture,
::testing::Combine(
::testing::Values(BufferTestParam{
.shape_a = ttnn::SimpleShape(tt::tt_metal::Array4D{1, 1, 32, 32}),
.shape_b = ttnn::SimpleShape(tt::tt_metal::Array4D{32, 1, 32, 32})}),
::testing::Values(
tt::tt_metal::IGraphProcessor::RunMode::NO_DISPATCH, tt::tt_metal::IGraphProcessor::RunMode::NORMAL)),
[](const testing::TestParamInfo<std::tuple<BufferTestParam, tt::tt_metal::IGraphProcessor::RunMode>>& info) {
std::stringstream ss;

static uint32_t uid = 0;
ss << uid++;

const auto& run_mode = std::get<1>(info.param);
switch (run_mode) {
case tt::tt_metal::IGraphProcessor::RunMode::NO_DISPATCH: ss << "_NO_DISPATCH"; break;
case tt::tt_metal::IGraphProcessor::RunMode::NORMAL: ss << "_NORMAL"; break;
default: break;
}
return ss.str();
});
} // namespace ttnn::graph::test
4 changes: 4 additions & 0 deletions tt_metal/impl/buffers/buffer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
#include "tt_metal/types.hpp"

#include <algorithm>
#include <atomic>
#include <mutex>
#include <utility>
#include "tt_metal/common/base.hpp"
Expand All @@ -25,6 +26,8 @@ namespace tt {

namespace tt_metal {

std::atomic<size_t> Buffer::next_unique_id = 0;

std::ostream& operator<<(std::ostream& os, const ShardSpec& spec) {
tt::stl::reflection::operator<<(os, spec);
return os;
Expand Down Expand Up @@ -247,6 +250,7 @@ Buffer::Buffer(
if (size != 0) {
validate_buffer_size_and_page_size(size, page_size, buffer_type, buffer_layout, shard_parameters);
}
unique_id_ = next_unique_id.fetch_add(1);
}

std::shared_ptr<Buffer> Buffer::create(
Expand Down
4 changes: 4 additions & 0 deletions tt_metal/impl/buffers/buffer.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -248,6 +248,8 @@ class Buffer final {
std::optional<SubDeviceId> sub_device_id() const { return sub_device_id_; }
std::optional<SubDeviceManagerId> sub_device_manager_id() const { return sub_device_manager_id_; }

size_t unique_id() const { return unique_id_; }

Buffer(
Device *device,
DeviceAddr size,
Expand Down Expand Up @@ -300,6 +302,8 @@ class Buffer final {
std::shared_ptr<const BufferPageMapping> buffer_page_mapping_;

std::weak_ptr<Buffer> weak_self;
size_t unique_id_;
static std::atomic<size_t> next_unique_id;
};

} // namespace v0
Expand Down
32 changes: 16 additions & 16 deletions ttnn/cpp/ttnn/graph/graph_processor.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -95,9 +95,8 @@ GraphProcessor::GraphProcessor(RunMode mode) : run_mode(mode) {
}
void GraphProcessor::track_allocate(const tt::tt_metal::Buffer* buffer) {
const std::lock_guard<std::mutex> lock(mutex);
auto buf_id = add_buffer(buffer);
auto buffer_id = add_buffer(buffer);

auto alloc_id = reinterpret_cast<std::uintptr_t>(buffer);
auto counter = graph.size();

std::unordered_map<std::string, std::string> params = {
Expand All @@ -113,15 +112,15 @@ void GraphProcessor::track_allocate(const tt::tt_metal::Buffer* buffer) {
.counter = counter,
.node_type = kNodeBufferAllocate,
.params = params,
.connections = {buf_id}
.connections = {buffer_id}
});
graph[current_op_id.top()].connections.push_back(counter);
}
}

void GraphProcessor::track_deallocate(tt::tt_metal::Buffer* buffer) {
const std::lock_guard<std::mutex> lock(mutex);
auto buffer_idx = add_buffer(buffer);
auto buffer_id = add_buffer(buffer);
auto counter = graph.size();
std::unordered_map<std::string, std::string> params = {
{kSize, std::to_string(buffer->size())},
Expand All @@ -135,7 +134,7 @@ void GraphProcessor::track_deallocate(tt::tt_metal::Buffer* buffer) {
.counter = counter,
.node_type = kNodeBufferDeallocate,
.params = params,
.connections = {buffer_idx}
.connections = {buffer_id}
});
graph[current_op_id.top()].connections.push_back(counter);
}
Expand Down Expand Up @@ -286,31 +285,31 @@ int GraphProcessor::add_tensor(const Tensor& t) {
} else {
tensor_id = t.tensor_id.value();
}
auto tensor_counter = id_to_counter.count(tensor_id) > 0 ? id_to_counter[tensor_id] : graph.size();
auto tensor_counter = tensor_id_to_counter.count(tensor_id) > 0 ? tensor_id_to_counter[tensor_id] : graph.size();
auto shape = t.get_shape();
std::unordered_map<std::string, std::string> params = {
{kShape, fmt::format("{}", shape)},
{kTensorId, fmt::format("{}", tensor_id)},
};

if (id_to_counter.count(tensor_id) == 0) {
if (tensor_id_to_counter.count(tensor_id) == 0) {
graph.push_back(Vertex{.counter = tensor_counter, .node_type = kNodeTensor, .params = params, .connections = {}});
id_to_counter[tensor_id] = tensor_counter;
tensor_id_to_counter[tensor_id] = tensor_counter;
}

if (buffer) {
auto buffer_idx = add_buffer(buffer);
graph[buffer_idx].connections.push_back(tensor_counter);
auto buffer_id = add_buffer(buffer);
graph[buffer_id].connections.push_back(tensor_counter);
} else {
tt::log_info("Tensor doesn't have buffer, but storage is {}", demangle(get_type_in_var(t.get_storage()).name()));
}
return tensor_counter;
}

int GraphProcessor::add_buffer(const tt::tt_metal::Buffer* buffer) {
auto buffer_alloc_id = reinterpret_cast<std::uintptr_t>(buffer);
auto counter = id_to_counter.count(buffer_alloc_id) > 0 ? id_to_counter[buffer_alloc_id] : graph.size();
if (id_to_counter.count(buffer_alloc_id) == 0) {
auto buffer_id = buffer->unique_id();
auto counter = buffer_id_to_counter.count(buffer_id) > 0 ? buffer_id_to_counter[buffer_id] : graph.size();
if (buffer_id_to_counter.count(buffer_id) == 0) {
std::unordered_map<std::string, std::string> params = {
{kSize, std::to_string(buffer->size())},
{kType, buffer->is_dram() ? "DRAM" : "L1"},
Expand All @@ -324,10 +323,10 @@ int GraphProcessor::add_buffer(const tt::tt_metal::Buffer* buffer) {
.connections = {}
});
graph[current_op_id.top()].connections.push_back(counter);
id_to_counter[buffer_alloc_id] = counter;
buffer_id_to_counter[buffer_id] = counter;
return counter;
}
return id_to_counter[buffer_alloc_id];
return buffer_id_to_counter[buffer_id];
}


Expand Down Expand Up @@ -428,7 +427,8 @@ void GraphProcessor::end_function_process_optional_tensor(const std::any& any_va
void GraphProcessor::begin_capture(RunMode mode) {
const std::lock_guard<std::mutex> lock(mutex);
graph.clear();
id_to_counter.clear();
buffer_id_to_counter.clear();
tensor_id_to_counter.clear();
graph.push_back(Vertex{
.counter = 0,
.node_type = kNodeCaptureStart,
Expand Down
3 changes: 2 additions & 1 deletion ttnn/cpp/ttnn/graph/graph_processor.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,8 @@ namespace ttnn::graph {
std::mutex mutex;
RunMode run_mode = RunMode::NORMAL;
std::stack<int> current_op_id;
std::unordered_map<std::int64_t, int> id_to_counter;
std::unordered_map<std::int64_t, int> buffer_id_to_counter;
std::unordered_map<std::int64_t, int> tensor_id_to_counter;
int last_finished_op_id = -1;
std::vector<Vertex> graph;
std::unordered_map<std::type_index, ProcessFunc> begin_function_any_map;
Expand Down

0 comments on commit 8027a83

Please sign in to comment.