Skip to content

Commit

Permalink
fix graph_processor buffer_alloc_id calculus (#15426)
Browse files Browse the repository at this point in the history
buffer_alloc_id was assigned per memory address, however this created
wrong results when capturing composite op that might be allocating and
deallocating entire tensors, so two buffers might have the same address.

add  buffer->unique_id() and used as a key, with id_to_counter map. 
split id_to_counter into tensor_id_to_counter and buffer_id_to_counter.
  • Loading branch information
mbezuljTT authored Nov 30, 2024
1 parent 35cd995 commit 273a940
Show file tree
Hide file tree
Showing 7 changed files with 160 additions and 17 deletions.
1 change: 1 addition & 0 deletions tests/ttnn/unit_tests/gtests/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
set(TTNN_UNIT_TESTS_SRC
${CMAKE_CURRENT_SOURCE_DIR}/test_add.cpp
${CMAKE_CURRENT_SOURCE_DIR}/test_graph_add.cpp
${CMAKE_CURRENT_SOURCE_DIR}/test_graph_basic.cpp
${CMAKE_CURRENT_SOURCE_DIR}/test_async_runtime.cpp
${CMAKE_CURRENT_SOURCE_DIR}/test_multiprod_queue.cpp
${CMAKE_CURRENT_SOURCE_DIR}/test_multi_cq_multi_dev.cpp
Expand Down
123 changes: 123 additions & 0 deletions tests/ttnn/unit_tests/gtests/test_graph_basic.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,123 @@
// SPDX-FileCopyrightText: © 2023 Tenstorrent Inc.
//
// SPDX-License-Identifier: Apache-2.0

#include "gtest/gtest.h"
#include "ttnn_test_fixtures.hpp"
#include "ttnn/device.hpp"
#include "ttnn/tensor/tensor.hpp"
#include "ttnn/graph/graph_processor.hpp"
#include "ttnn/graph/graph_consts.hpp"

#include <string>

namespace ttnn::graph::test {

struct BufferTestParam {
ttnn::SimpleShape shape_a;
ttnn::SimpleShape shape_b;
};

class BufferTestFixture
: public TTNNFixtureWithDevice,
public testing::WithParamInterface<std::tuple<BufferTestParam, tt::tt_metal::IGraphProcessor::RunMode>> {};

TEST_P(BufferTestFixture, BufferTest) {
auto param_combination = GetParam();
auto params = std::get<0>(param_combination);
auto run_mode = std::get<1>(param_combination);

tt::tt_metal::Device* device = &(this->getDevice());
{
ttnn::graph::GraphProcessor::begin_graph_capture(run_mode);
{
const auto input_a = ttnn::TensorSpec(
params.shape_a,
tt::tt_metal::TensorLayout(
tt::tt_metal::DataType::BFLOAT16,
tt::tt_metal::PageConfig(tt::tt_metal::Layout::TILE),
ttnn::L1_MEMORY_CONFIG));
const auto input_tensor_a = tt::tt_metal::create_device_tensor(input_a, device);
}
{
const auto input_a = ttnn::TensorSpec(
params.shape_a,
tt::tt_metal::TensorLayout(
tt::tt_metal::DataType::BFLOAT16,
tt::tt_metal::PageConfig(tt::tt_metal::Layout::TILE),
ttnn::L1_MEMORY_CONFIG));
const auto input_tensor_a = tt::tt_metal::create_device_tensor(input_a, device);

const auto input_b = ttnn::TensorSpec(
params.shape_b,
tt::tt_metal::TensorLayout(
tt::tt_metal::DataType::BFLOAT16,
tt::tt_metal::PageConfig(tt::tt_metal::Layout::TILE),
ttnn::L1_MEMORY_CONFIG));

const auto input_tensor_b = tt::tt_metal::create_device_tensor(input_b, device);
}
auto trace = ttnn::graph::GraphProcessor::end_graph_capture();

auto find_nodes_by_type = [](const auto& trace, const std::string& type) {
std::vector<nlohmann::json> nodes;
for (const auto& node : trace) {
if (node.at(kNodeType) == type) {
nodes.push_back(node);
}
}
return nodes;
};

// Check if there are two buffer_allocate_nodes, and if each is connected to only one different buffer
auto buffer_allocate_nodes = find_nodes_by_type(trace, kNodeBufferAllocate);
EXPECT_EQ(buffer_allocate_nodes.size(), 3);
for (const auto& node : buffer_allocate_nodes) {
EXPECT_EQ(node.at(kConnections).size(), 1);
}
auto connection_a = buffer_allocate_nodes[0].at(kConnections)[0].get<int>();
auto connection_a2 = buffer_allocate_nodes[1].at(kConnections)[0].get<int>();
auto connection_c = buffer_allocate_nodes[2].at(kConnections)[0].get<int>();
EXPECT_NE(connection_a, connection_a2);
EXPECT_NE(connection_a, connection_c);
EXPECT_NE(connection_a2, connection_c);

// Check if there are two buffer nodes and they have correct sizes
auto buffer_nodes = find_nodes_by_type(trace, kNodeBuffer);
EXPECT_EQ(buffer_nodes.size(), 3);
auto size_a = std::stoi(buffer_nodes[0].at(kParams).at(kSize).get<std::string>());
EXPECT_EQ(params.shape_a.volume() * 2, size_a);
auto size_a2 = std::stoi(buffer_nodes[1].at(kParams).at(kSize).get<std::string>());
EXPECT_EQ(params.shape_a.volume() * 2, size_a2);
auto size_b = std::stoi(buffer_nodes[2].at(kParams).at(kSize).get<std::string>());
EXPECT_EQ(params.shape_b.volume() * 2, size_b);

// Print the trace for reference
std::cout << trace << std::endl;
}
}

INSTANTIATE_TEST_SUITE_P(
BufferTest,
BufferTestFixture,
::testing::Combine(
::testing::Values(BufferTestParam{
.shape_a = ttnn::SimpleShape(tt::tt_metal::Array4D{1, 1, 32, 32}),
.shape_b = ttnn::SimpleShape(tt::tt_metal::Array4D{32, 1, 32, 32})}),
::testing::Values(
tt::tt_metal::IGraphProcessor::RunMode::NO_DISPATCH, tt::tt_metal::IGraphProcessor::RunMode::NORMAL)),
[](const testing::TestParamInfo<std::tuple<BufferTestParam, tt::tt_metal::IGraphProcessor::RunMode>>& info) {
std::stringstream ss;

static uint32_t uid = 0;
ss << uid++;

const auto& run_mode = std::get<1>(info.param);
switch (run_mode) {
case tt::tt_metal::IGraphProcessor::RunMode::NO_DISPATCH: ss << "_NO_DISPATCH"; break;
case tt::tt_metal::IGraphProcessor::RunMode::NORMAL: ss << "_NORMAL"; break;
default: break;
}
return ss.str();
});
} // namespace ttnn::graph::test
6 changes: 6 additions & 0 deletions tt_metal/impl/buffers/buffer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
#include "tt_metal/types.hpp"

#include <algorithm>
#include <atomic>
#include <mutex>
#include <utility>
#include "tt_metal/common/base.hpp"
Expand All @@ -25,6 +26,8 @@ namespace tt {

namespace tt_metal {

std::atomic<size_t> Buffer::next_unique_id = 0;

std::ostream& operator<<(std::ostream& os, const ShardSpec& spec) {
tt::stl::reflection::operator<<(os, spec);
return os;
Expand Down Expand Up @@ -247,6 +250,7 @@ Buffer::Buffer(
if (size != 0) {
validate_buffer_size_and_page_size(size, page_size, buffer_type, buffer_layout, shard_parameters);
}
unique_id_ = next_unique_id.fetch_add(1);
}

std::shared_ptr<Buffer> Buffer::create(
Expand Down Expand Up @@ -524,6 +528,8 @@ v1::BufferHandle v1::CreateBuffer(InterleavedBufferConfig config) { return v1::B

void v1::DeallocateBuffer(const BufferHandle& buffer) { v0::DeallocateBuffer(*buffer); }

std::size_t v1::GetId(const BufferHandle& buffer) { return buffer->unique_id(); }

void v1::WriteToBuffer(const BufferHandle& buffer, stl::Span<const std::byte> host_buffer) {
detail::WriteToBuffer(*buffer, stl::Span<const uint8_t>{reinterpret_cast<const std::uint8_t *>(host_buffer.data()), host_buffer.size()});
}
Expand Down
4 changes: 4 additions & 0 deletions tt_metal/impl/buffers/buffer.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -248,6 +248,8 @@ class Buffer final {
std::optional<SubDeviceId> sub_device_id() const { return sub_device_id_; }
std::optional<SubDeviceManagerId> sub_device_manager_id() const { return sub_device_manager_id_; }

size_t unique_id() const { return unique_id_; }

Buffer(
Device *device,
DeviceAddr size,
Expand Down Expand Up @@ -300,6 +302,8 @@ class Buffer final {
std::shared_ptr<const BufferPageMapping> buffer_page_mapping_;

std::weak_ptr<Buffer> weak_self;
size_t unique_id_ = 0;
static std::atomic<size_t> next_unique_id;
};

} // namespace v0
Expand Down
8 changes: 8 additions & 0 deletions tt_metal/include/tt_metal/buffer.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,14 @@ BufferHandle CreateBuffer(InterleavedBufferConfig config);
*/
void DeallocateBuffer(const BufferHandle& buffer);

/**
* @brief Retrieves the ID of the specified buffer.
*
* @param buffer The buffer to get the ID from.
* @return The unique ID of the buffer.
*/
std::size_t GetId(const BufferHandle& buffer);

/**
* @brief Copies data from a host buffer into the specified device buffer.
*
Expand Down
32 changes: 16 additions & 16 deletions ttnn/cpp/ttnn/graph/graph_processor.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -95,9 +95,8 @@ GraphProcessor::GraphProcessor(RunMode mode) : run_mode(mode) {
}
void GraphProcessor::track_allocate(const tt::tt_metal::Buffer* buffer) {
const std::lock_guard<std::mutex> lock(mutex);
auto buf_id = add_buffer(buffer);
auto buffer_id = add_buffer(buffer);

auto alloc_id = reinterpret_cast<std::uintptr_t>(buffer);
auto counter = graph.size();

std::unordered_map<std::string, std::string> params = {
Expand All @@ -113,15 +112,15 @@ void GraphProcessor::track_allocate(const tt::tt_metal::Buffer* buffer) {
.counter = counter,
.node_type = kNodeBufferAllocate,
.params = params,
.connections = {buf_id}
.connections = {buffer_id}
});
graph[current_op_id.top()].connections.push_back(counter);
}
}

void GraphProcessor::track_deallocate(tt::tt_metal::Buffer* buffer) {
const std::lock_guard<std::mutex> lock(mutex);
auto buffer_idx = add_buffer(buffer);
auto buffer_id = add_buffer(buffer);
auto counter = graph.size();
std::unordered_map<std::string, std::string> params = {
{kSize, std::to_string(buffer->size())},
Expand All @@ -135,7 +134,7 @@ void GraphProcessor::track_deallocate(tt::tt_metal::Buffer* buffer) {
.counter = counter,
.node_type = kNodeBufferDeallocate,
.params = params,
.connections = {buffer_idx}
.connections = {buffer_id}
});
graph[current_op_id.top()].connections.push_back(counter);
}
Expand Down Expand Up @@ -286,31 +285,31 @@ int GraphProcessor::add_tensor(const Tensor& t) {
} else {
tensor_id = t.tensor_id.value();
}
auto tensor_counter = id_to_counter.count(tensor_id) > 0 ? id_to_counter[tensor_id] : graph.size();
auto tensor_counter = tensor_id_to_counter.count(tensor_id) > 0 ? tensor_id_to_counter[tensor_id] : graph.size();
auto shape = t.get_shape();
std::unordered_map<std::string, std::string> params = {
{kShape, fmt::format("{}", shape)},
{kTensorId, fmt::format("{}", tensor_id)},
};

if (id_to_counter.count(tensor_id) == 0) {
if (tensor_id_to_counter.count(tensor_id) == 0) {
graph.push_back(Vertex{.counter = tensor_counter, .node_type = kNodeTensor, .params = params, .connections = {}});
id_to_counter[tensor_id] = tensor_counter;
tensor_id_to_counter[tensor_id] = tensor_counter;
}

if (buffer) {
auto buffer_idx = add_buffer(buffer);
graph[buffer_idx].connections.push_back(tensor_counter);
auto buffer_id = add_buffer(buffer);
graph[buffer_id].connections.push_back(tensor_counter);
} else {
tt::log_info("Tensor doesn't have buffer, but storage is {}", demangle(get_type_in_var(t.get_storage()).name()));
}
return tensor_counter;
}

int GraphProcessor::add_buffer(const tt::tt_metal::Buffer* buffer) {
auto buffer_alloc_id = reinterpret_cast<std::uintptr_t>(buffer);
auto counter = id_to_counter.count(buffer_alloc_id) > 0 ? id_to_counter[buffer_alloc_id] : graph.size();
if (id_to_counter.count(buffer_alloc_id) == 0) {
auto buffer_id = buffer->unique_id();
auto counter = buffer_id_to_counter.count(buffer_id) > 0 ? buffer_id_to_counter[buffer_id] : graph.size();
if (buffer_id_to_counter.count(buffer_id) == 0) {
std::unordered_map<std::string, std::string> params = {
{kSize, std::to_string(buffer->size())},
{kType, buffer->is_dram() ? "DRAM" : "L1"},
Expand All @@ -324,10 +323,10 @@ int GraphProcessor::add_buffer(const tt::tt_metal::Buffer* buffer) {
.connections = {}
});
graph[current_op_id.top()].connections.push_back(counter);
id_to_counter[buffer_alloc_id] = counter;
buffer_id_to_counter[buffer_id] = counter;
return counter;
}
return id_to_counter[buffer_alloc_id];
return buffer_id_to_counter[buffer_id];
}


Expand Down Expand Up @@ -428,7 +427,8 @@ void GraphProcessor::end_function_process_optional_tensor(const std::any& any_va
void GraphProcessor::begin_capture(RunMode mode) {
const std::lock_guard<std::mutex> lock(mutex);
graph.clear();
id_to_counter.clear();
buffer_id_to_counter.clear();
tensor_id_to_counter.clear();
graph.push_back(Vertex{
.counter = 0,
.node_type = kNodeCaptureStart,
Expand Down
3 changes: 2 additions & 1 deletion ttnn/cpp/ttnn/graph/graph_processor.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,8 @@ namespace ttnn::graph {
std::mutex mutex;
RunMode run_mode = RunMode::NORMAL;
std::stack<int> current_op_id;
std::unordered_map<std::int64_t, int> id_to_counter;
std::unordered_map<std::int64_t, int> buffer_id_to_counter;
std::unordered_map<std::int64_t, int> tensor_id_to_counter;
int last_finished_op_id = -1;
std::vector<Vertex> graph;
std::unordered_map<std::type_index, ProcessFunc> begin_function_any_map;
Expand Down

0 comments on commit 273a940

Please sign in to comment.