Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix graph_processor buffer_alloc_id calculus #15426

Merged
merged 6 commits into from
Nov 30, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions tests/ttnn/unit_tests/gtests/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
set(TTNN_UNIT_TESTS_SRC
${CMAKE_CURRENT_SOURCE_DIR}/test_add.cpp
${CMAKE_CURRENT_SOURCE_DIR}/test_graph_add.cpp
${CMAKE_CURRENT_SOURCE_DIR}/test_graph_basic.cpp
${CMAKE_CURRENT_SOURCE_DIR}/test_async_runtime.cpp
${CMAKE_CURRENT_SOURCE_DIR}/test_multiprod_queue.cpp
${CMAKE_CURRENT_SOURCE_DIR}/test_multi_cq_multi_dev.cpp
Expand Down
123 changes: 123 additions & 0 deletions tests/ttnn/unit_tests/gtests/test_graph_basic.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,123 @@
// SPDX-FileCopyrightText: © 2023 Tenstorrent Inc.
//
// SPDX-License-Identifier: Apache-2.0

#include "gtest/gtest.h"
#include "ttnn_test_fixtures.hpp"
#include "ttnn/device.hpp"
#include "ttnn/tensor/tensor.hpp"
#include "ttnn/graph/graph_processor.hpp"
#include "ttnn/graph/graph_consts.hpp"

#include <string>

namespace ttnn::graph::test {

struct BufferTestParam {
ttnn::SimpleShape shape_a;
ttnn::SimpleShape shape_b;
};

class BufferTestFixture
: public TTNNFixtureWithDevice,
public testing::WithParamInterface<std::tuple<BufferTestParam, tt::tt_metal::IGraphProcessor::RunMode>> {};

TEST_P(BufferTestFixture, BufferTest) {
auto param_combination = GetParam();
auto params = std::get<0>(param_combination);
auto run_mode = std::get<1>(param_combination);

tt::tt_metal::Device* device = &(this->getDevice());
{
ttnn::graph::GraphProcessor::begin_graph_capture(run_mode);
{
const auto input_a = ttnn::TensorSpec(
params.shape_a,
tt::tt_metal::TensorLayout(
tt::tt_metal::DataType::BFLOAT16,
tt::tt_metal::PageConfig(tt::tt_metal::Layout::TILE),
ttnn::L1_MEMORY_CONFIG));
const auto input_tensor_a = tt::tt_metal::create_device_tensor(input_a, device);
}
{
const auto input_a = ttnn::TensorSpec(
params.shape_a,
tt::tt_metal::TensorLayout(
tt::tt_metal::DataType::BFLOAT16,
tt::tt_metal::PageConfig(tt::tt_metal::Layout::TILE),
ttnn::L1_MEMORY_CONFIG));
const auto input_tensor_a = tt::tt_metal::create_device_tensor(input_a, device);

const auto input_b = ttnn::TensorSpec(
params.shape_b,
tt::tt_metal::TensorLayout(
tt::tt_metal::DataType::BFLOAT16,
tt::tt_metal::PageConfig(tt::tt_metal::Layout::TILE),
ttnn::L1_MEMORY_CONFIG));

const auto input_tensor_b = tt::tt_metal::create_device_tensor(input_b, device);
}
auto trace = ttnn::graph::GraphProcessor::end_graph_capture();

auto find_nodes_by_type = [](const auto& trace, const std::string& type) {
std::vector<nlohmann::json> nodes;
for (const auto& node : trace) {
if (node.at(kNodeType) == type) {
nodes.push_back(node);
}
}
return nodes;
};

// Check if there are two buffer_allocate_nodes, and if each is connected to only one different buffer
auto buffer_allocate_nodes = find_nodes_by_type(trace, kNodeBufferAllocate);
EXPECT_EQ(buffer_allocate_nodes.size(), 3);
for (const auto& node : buffer_allocate_nodes) {
EXPECT_EQ(node.at(kConnections).size(), 1);
}
auto connection_a = buffer_allocate_nodes[0].at(kConnections)[0].get<int>();
auto connection_a2 = buffer_allocate_nodes[1].at(kConnections)[0].get<int>();
auto connection_c = buffer_allocate_nodes[2].at(kConnections)[0].get<int>();
EXPECT_NE(connection_a, connection_a2);
EXPECT_NE(connection_a, connection_c);
EXPECT_NE(connection_a2, connection_c);

// Check if there are two buffer nodes and they have correct sizes
auto buffer_nodes = find_nodes_by_type(trace, kNodeBuffer);
EXPECT_EQ(buffer_nodes.size(), 3);
auto size_a = std::stoi(buffer_nodes[0].at(kParams).at(kSize).get<std::string>());
EXPECT_EQ(params.shape_a.volume() * 2, size_a);
auto size_a2 = std::stoi(buffer_nodes[1].at(kParams).at(kSize).get<std::string>());
EXPECT_EQ(params.shape_a.volume() * 2, size_a2);
auto size_b = std::stoi(buffer_nodes[2].at(kParams).at(kSize).get<std::string>());
EXPECT_EQ(params.shape_b.volume() * 2, size_b);

// Print the trace for reference
std::cout << trace << std::endl;
}
}

INSTANTIATE_TEST_SUITE_P(
BufferTest,
BufferTestFixture,
::testing::Combine(
::testing::Values(BufferTestParam{
.shape_a = ttnn::SimpleShape(tt::tt_metal::Array4D{1, 1, 32, 32}),
.shape_b = ttnn::SimpleShape(tt::tt_metal::Array4D{32, 1, 32, 32})}),
::testing::Values(
tt::tt_metal::IGraphProcessor::RunMode::NO_DISPATCH, tt::tt_metal::IGraphProcessor::RunMode::NORMAL)),
[](const testing::TestParamInfo<std::tuple<BufferTestParam, tt::tt_metal::IGraphProcessor::RunMode>>& info) {
std::stringstream ss;

ayerofieiev-tt marked this conversation as resolved.
Show resolved Hide resolved
static uint32_t uid = 0;
ss << uid++;

const auto& run_mode = std::get<1>(info.param);
switch (run_mode) {
case tt::tt_metal::IGraphProcessor::RunMode::NO_DISPATCH: ss << "_NO_DISPATCH"; break;
case tt::tt_metal::IGraphProcessor::RunMode::NORMAL: ss << "_NORMAL"; break;
default: break;
}
return ss.str();
});
} // namespace ttnn::graph::test
6 changes: 6 additions & 0 deletions tt_metal/impl/buffers/buffer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
#include "tt_metal/types.hpp"

#include <algorithm>
#include <atomic>
#include <mutex>
#include <utility>
#include "tt_metal/common/base.hpp"
Expand All @@ -25,6 +26,8 @@ namespace tt {

namespace tt_metal {

std::atomic<size_t> Buffer::next_unique_id = 0;

std::ostream& operator<<(std::ostream& os, const ShardSpec& spec) {
tt::stl::reflection::operator<<(os, spec);
return os;
Expand Down Expand Up @@ -247,6 +250,7 @@ Buffer::Buffer(
if (size != 0) {
validate_buffer_size_and_page_size(size, page_size, buffer_type, buffer_layout, shard_parameters);
}
unique_id_ = next_unique_id.fetch_add(1);
}

std::shared_ptr<Buffer> Buffer::create(
Expand Down Expand Up @@ -524,6 +528,8 @@ v1::BufferHandle v1::CreateBuffer(InterleavedBufferConfig config) { return v1::B

void v1::DeallocateBuffer(const BufferHandle& buffer) { v0::DeallocateBuffer(*buffer); }

std::size_t v1::GetId(const BufferHandle& buffer) { return buffer->unique_id(); }

void v1::WriteToBuffer(const BufferHandle& buffer, stl::Span<const std::byte> host_buffer) {
detail::WriteToBuffer(*buffer, stl::Span<const uint8_t>{reinterpret_cast<const std::uint8_t *>(host_buffer.data()), host_buffer.size()});
}
Expand Down
4 changes: 4 additions & 0 deletions tt_metal/impl/buffers/buffer.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -248,6 +248,8 @@ class Buffer final {
std::optional<SubDeviceId> sub_device_id() const { return sub_device_id_; }
std::optional<SubDeviceManagerId> sub_device_manager_id() const { return sub_device_manager_id_; }

size_t unique_id() const { return unique_id_; }

Buffer(
Device *device,
DeviceAddr size,
Expand Down Expand Up @@ -300,6 +302,8 @@ class Buffer final {
std::shared_ptr<const BufferPageMapping> buffer_page_mapping_;

std::weak_ptr<Buffer> weak_self;
size_t unique_id_ = 0;
static std::atomic<size_t> next_unique_id;
};

} // namespace v0
Expand Down
8 changes: 8 additions & 0 deletions tt_metal/include/tt_metal/buffer.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,14 @@ BufferHandle CreateBuffer(InterleavedBufferConfig config);
*/
void DeallocateBuffer(const BufferHandle& buffer);

/**
* @brief Retrieves the ID of the specified buffer.
*
* @param buffer The buffer to get the ID from.
* @return The unique ID of the buffer.
*/
std::size_t GetId(const BufferHandle& buffer);

/**
* @brief Copies data from a host buffer into the specified device buffer.
*
Expand Down
32 changes: 16 additions & 16 deletions ttnn/cpp/ttnn/graph/graph_processor.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -95,9 +95,8 @@ GraphProcessor::GraphProcessor(RunMode mode) : run_mode(mode) {
}
void GraphProcessor::track_allocate(const tt::tt_metal::Buffer* buffer) {
const std::lock_guard<std::mutex> lock(mutex);
auto buf_id = add_buffer(buffer);
auto buffer_id = add_buffer(buffer);

auto alloc_id = reinterpret_cast<std::uintptr_t>(buffer);
auto counter = graph.size();

std::unordered_map<std::string, std::string> params = {
Expand All @@ -113,15 +112,15 @@ void GraphProcessor::track_allocate(const tt::tt_metal::Buffer* buffer) {
.counter = counter,
.node_type = kNodeBufferAllocate,
.params = params,
.connections = {buf_id}
.connections = {buffer_id}
});
graph[current_op_id.top()].connections.push_back(counter);
}
}

void GraphProcessor::track_deallocate(tt::tt_metal::Buffer* buffer) {
const std::lock_guard<std::mutex> lock(mutex);
auto buffer_idx = add_buffer(buffer);
auto buffer_id = add_buffer(buffer);
auto counter = graph.size();
std::unordered_map<std::string, std::string> params = {
{kSize, std::to_string(buffer->size())},
Expand All @@ -135,7 +134,7 @@ void GraphProcessor::track_deallocate(tt::tt_metal::Buffer* buffer) {
.counter = counter,
.node_type = kNodeBufferDeallocate,
.params = params,
.connections = {buffer_idx}
.connections = {buffer_id}
});
graph[current_op_id.top()].connections.push_back(counter);
}
Expand Down Expand Up @@ -286,31 +285,31 @@ int GraphProcessor::add_tensor(const Tensor& t) {
} else {
tensor_id = t.tensor_id.value();
}
auto tensor_counter = id_to_counter.count(tensor_id) > 0 ? id_to_counter[tensor_id] : graph.size();
auto tensor_counter = tensor_id_to_counter.count(tensor_id) > 0 ? tensor_id_to_counter[tensor_id] : graph.size();
auto shape = t.get_shape();
std::unordered_map<std::string, std::string> params = {
{kShape, fmt::format("{}", shape)},
{kTensorId, fmt::format("{}", tensor_id)},
};

if (id_to_counter.count(tensor_id) == 0) {
if (tensor_id_to_counter.count(tensor_id) == 0) {
graph.push_back(Vertex{.counter = tensor_counter, .node_type = kNodeTensor, .params = params, .connections = {}});
id_to_counter[tensor_id] = tensor_counter;
tensor_id_to_counter[tensor_id] = tensor_counter;
}

if (buffer) {
auto buffer_idx = add_buffer(buffer);
graph[buffer_idx].connections.push_back(tensor_counter);
auto buffer_id = add_buffer(buffer);
graph[buffer_id].connections.push_back(tensor_counter);
} else {
tt::log_info("Tensor doesn't have buffer, but storage is {}", demangle(get_type_in_var(t.get_storage()).name()));
}
return tensor_counter;
}

int GraphProcessor::add_buffer(const tt::tt_metal::Buffer* buffer) {
auto buffer_alloc_id = reinterpret_cast<std::uintptr_t>(buffer);
auto counter = id_to_counter.count(buffer_alloc_id) > 0 ? id_to_counter[buffer_alloc_id] : graph.size();
if (id_to_counter.count(buffer_alloc_id) == 0) {
auto buffer_id = buffer->unique_id();
mbezuljTT marked this conversation as resolved.
Show resolved Hide resolved
auto counter = buffer_id_to_counter.count(buffer_id) > 0 ? buffer_id_to_counter[buffer_id] : graph.size();
if (buffer_id_to_counter.count(buffer_id) == 0) {
std::unordered_map<std::string, std::string> params = {
{kSize, std::to_string(buffer->size())},
{kType, buffer->is_dram() ? "DRAM" : "L1"},
Expand All @@ -324,10 +323,10 @@ int GraphProcessor::add_buffer(const tt::tt_metal::Buffer* buffer) {
.connections = {}
});
graph[current_op_id.top()].connections.push_back(counter);
id_to_counter[buffer_alloc_id] = counter;
buffer_id_to_counter[buffer_id] = counter;
return counter;
}
return id_to_counter[buffer_alloc_id];
return buffer_id_to_counter[buffer_id];
}


Expand Down Expand Up @@ -428,7 +427,8 @@ void GraphProcessor::end_function_process_optional_tensor(const std::any& any_va
void GraphProcessor::begin_capture(RunMode mode) {
const std::lock_guard<std::mutex> lock(mutex);
graph.clear();
id_to_counter.clear();
buffer_id_to_counter.clear();
tensor_id_to_counter.clear();
graph.push_back(Vertex{
.counter = 0,
.node_type = kNodeCaptureStart,
Expand Down
3 changes: 2 additions & 1 deletion ttnn/cpp/ttnn/graph/graph_processor.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,8 @@ namespace ttnn::graph {
std::mutex mutex;
RunMode run_mode = RunMode::NORMAL;
std::stack<int> current_op_id;
std::unordered_map<std::int64_t, int> id_to_counter;
std::unordered_map<std::int64_t, int> buffer_id_to_counter;
std::unordered_map<std::int64_t, int> tensor_id_to_counter;
int last_finished_op_id = -1;
std::vector<Vertex> graph;
std::unordered_map<std::type_index, ProcessFunc> begin_function_any_map;
Expand Down
Loading