Skip to content

Commit

Permalink
#12624: Fix tracking of globaly allocated CBs in graph capture
Browse files Browse the repository at this point in the history
  • Loading branch information
derdeljanTT committed Oct 31, 2024
1 parent fe1129c commit 4c9787e
Show file tree
Hide file tree
Showing 6 changed files with 35 additions and 35 deletions.
4 changes: 2 additions & 2 deletions tt_metal/graph/graph_tracking.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -45,12 +45,12 @@ void GraphTracker::track_deallocate(Buffer* buffer) {
}
}

void GraphTracker::track_allocate_cb(const CoreRangeSet &core_range_set, uint64_t addr, uint64_t size) {
void GraphTracker::track_allocate_cb(const CoreRangeSet &core_range_set, uint64_t addr, uint64_t size, bool is_globally_allocated) {
if (processors.empty()) {
return;
}
for (auto& it : processors) {
it->track_allocate_cb(core_range_set, addr, size);
it->track_allocate_cb(core_range_set, addr, size, is_globally_allocated);
}
}

Expand Down
4 changes: 2 additions & 2 deletions tt_metal/graph/graph_tracking.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ inline namespace v0 {

virtual void track_deallocate(tt::tt_metal::Buffer* buffer) {};

virtual void track_allocate_cb(const CoreRangeSet &core_range_set, uint64_t addr, uint64_t size) {};
virtual void track_allocate_cb(const CoreRangeSet &core_range_set, uint64_t addr, uint64_t size, bool is_globally_allocated) {};

virtual void track_deallocate_cb() {};

Expand Down Expand Up @@ -81,7 +81,7 @@ inline namespace v0 {

void track_deallocate(Buffer* buffer);

void track_allocate_cb(const CoreRangeSet &core_range_set, uint64_t addr, uint64_t size);
void track_allocate_cb(const CoreRangeSet &core_range_set, uint64_t addr, uint64_t size, bool is_globally_allocated);

void track_deallocate_cb();

Expand Down
2 changes: 1 addition & 1 deletion tt_metal/impl/program/program.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -700,7 +700,7 @@ void detail::Program_::allocate_circular_buffers(const Device *device) {
}
}
}
tt::tt_metal::GraphTracker::instance().track_allocate_cb(circular_buffer->core_ranges(), computed_addr, circular_buffer->size());
tt::tt_metal::GraphTracker::instance().track_allocate_cb(circular_buffer->core_ranges(), computed_addr, circular_buffer->size(), circular_buffer->globally_allocated());
circular_buffer->set_locally_allocated_address(computed_addr);
}
this->local_circular_buffer_allocation_needed_ = false;
Expand Down
8 changes: 5 additions & 3 deletions ttnn/cpp/ttnn/graph/graph_processor.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
#include "ttnn/graph/graph_consts.hpp"
#include <cxxabi.h>
#include <memory>
#include <string>
#include <typeindex>
#include <unordered_map>
#include "ttnn/core.hpp"
Expand Down Expand Up @@ -135,12 +136,13 @@ void GraphProcessor::track_deallocate(tt::tt_metal::Buffer* buffer) {

}

void GraphProcessor::track_allocate_cb(const CoreRangeSet &core_range_set, uint64_t addr, uint64_t size) {
void GraphProcessor::track_allocate_cb(const CoreRangeSet &core_range_set, uint64_t addr, uint64_t size, bool is_globally_allocated) {
const std::lock_guard<std::mutex> lock(mutex);
std::unordered_map<std::string, std::string> params = {
{kSize, std::to_string(size)},
{kAddress, std::to_string(addr)},
{"core_range_set", core_range_set.str()}
{"core_range_set", core_range_set.str()},
{"globally_allocated", std::to_string(is_globally_allocated)}
};
auto counter = graph.size();
{
Expand Down Expand Up @@ -179,7 +181,7 @@ void GraphProcessor::track_program(tt::tt_metal::Program* program) {
}

for (auto& cb : program->circular_buffers()) {
track_allocate_cb(cb->core_ranges(), 0, cb->size());
track_allocate_cb(cb->core_ranges(), 0, cb->size(), cb->globally_allocated());
}
}

Expand Down
2 changes: 1 addition & 1 deletion ttnn/cpp/ttnn/graph/graph_processor.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ namespace ttnn::graph {

void track_deallocate(tt::tt_metal::Buffer* buffer) override;

void track_allocate_cb(const CoreRangeSet &core_range, uint64_t addr, uint64_t size) override;
void track_allocate_cb(const CoreRangeSet &core_range, uint64_t addr, uint64_t size, bool is_globally_allocated) override;

void track_deallocate_cb() override;

Expand Down
50 changes: 24 additions & 26 deletions ttnn/cpp/ttnn/graph/graph_trace_utils.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4,17 +4,15 @@

#include "graph_trace_utils.hpp"

#include "graph_processor.hpp"
#include "graph_consts.hpp"


#include "tt_metal/common/assert.hpp"

#include <unordered_set>
#include <cstdlib> // std::strtoul
#include <string>
#include <cstdlib> // std::strtoul


#include "graph_consts.hpp"
#include "graph_processor.hpp"
#include "tt_metal/common/assert.hpp"

namespace ttnn::graph {

namespace {
Expand All @@ -32,20 +30,21 @@ ttnn::Shape parse_shape(std::string_view shape_string) {
while (str < end_str) {
char* next;
uint32_t value = std::strtoul(str, &next, 10);
if (str == next) break; // no conversion happened
if (str == next)
break; // no conversion happened
shape.push_back(value);
str = next;
if (*str == ',') {
++str; // skip the comma
++str; // skip the comma
}
if (*str == ' ') {
++str; // skip spaces, assume a single space
++str; // skip spaces, assume a single space
}
}

return ttnn::Shape(shape);
}
} // namespace
} // namespace

uint32_t extract_peak_L1_memory_usage(const nlohmann::json& trace) {
uint32_t total_cb = 0;
Expand Down Expand Up @@ -80,7 +79,7 @@ uint32_t extract_peak_L1_memory_usage(const nlohmann::json& trace) {
} else if (v[kNodeType] == kNodeBufferDeallocate) {
auto connection = v[kConnections][0].get<int>();
auto buffer = trace[connection];
if(buffer[kParams][kType] == "L1") {
if (buffer[kParams][kType] == "L1") {
total_buffer -= stoi(buffer[kParams][kSize].get<std::string>());
}
} else if (v[kNodeType] == kNodeFunctionEnd) {
Expand Down Expand Up @@ -113,7 +112,7 @@ std::pair<uint32_t, uint32_t> count_intermediate_and_output_tensors(const nlohma
last_end_found = true;
last_end_index = i;

if(v[kParams][kName] == "create_device_tensor") {
if (v[kParams][kName] == "create_device_tensor") {
auto id = v[kConnections][0].get<int>();
intermediate_tensors.insert(id);
}
Expand All @@ -124,22 +123,22 @@ std::pair<uint32_t, uint32_t> count_intermediate_and_output_tensors(const nlohma
TT_ASSERT(last_end_found);

auto connections = trace[last_end_index][kConnections].get<std::unordered_set<uint32_t>>();
for(auto index : connections) {
for (auto index : connections) {
// It can be tensor or some other node like
if(trace[index][kNodeType] == kNodeTensor) {
if (trace[index][kNodeType] == kNodeTensor) {
output_tensors.insert(index);
}
}

for(int index : output_tensors) {
for (int index : output_tensors) {
intermediate_tensors.erase(index);
}

// Return the counts of intermediate and output tensors
return {intermediate_tensors.size(), output_tensors.size()};
}

std::vector<std::string> extract_calltrace(const nlohmann::json& trace){
std::vector<std::string> extract_calltrace(const nlohmann::json& trace) {
std::vector<std::string> op_calls;
size_t i = 0;

Expand All @@ -155,11 +154,10 @@ std::vector<std::string> extract_calltrace(const nlohmann::json& trace){
return op_calls;
}

std::unordered_set<uint32_t> extract_output_tensors(const nlohmann::json& trace)
{
std::unordered_set<uint32_t> extract_output_tensors(const nlohmann::json& trace) {
// Lambda to find the last 'function_end' node
auto find_function_end_node = [](const auto& trace) -> const nlohmann::json& {
for(int i = trace.size() - 1; i >= 0; --i) {
for (int i = trace.size() - 1; i >= 0; --i) {
const auto& v = trace[i];
if (v[kNodeType] == kNodeFunctionEnd) {
return v;
Expand Down Expand Up @@ -187,33 +185,33 @@ std::unordered_set<uint32_t> extract_output_tensors(const nlohmann::json& trace)
return output_tensors;
}

std::vector<TensorInfo> extract_output_info(const nlohmann::json& trace)
{
std::vector<TensorInfo> extract_output_info(const nlohmann::json& trace) {
std::vector<TensorInfo> output;
auto output_tensors = extract_output_tensors(trace);

for (const auto& node : trace) {
if (node[kNodeType] != kNodeBuffer )
if (node[kNodeType] != kNodeBuffer)
continue;

auto connections = node[kConnections].get<std::unordered_set<uint32_t>>();
for (const auto& tensor_id : connections) {
if (output_tensors.find(tensor_id) == output_tensors.end())
continue;

const auto type = node[kParams][kType] == "L1" ? tt::tt_metal::BufferType::L1 : tt::tt_metal::BufferType::DRAM;
const auto type =
node[kParams][kType] == "L1" ? tt::tt_metal::BufferType::L1 : tt::tt_metal::BufferType::DRAM;
const auto size = stoi(node[kParams][kSize].get<std::string>());

const auto& tensor = trace[tensor_id];
const std::string shape_string = tensor[kParams][kShape];
const auto shape = parse_shape(shape_string);

output.emplace_back(TensorInfo {.shape = shape, .size = size, .type = type});
output.emplace_back(TensorInfo{.shape = shape, .size = size, .type = type});
}
}

return output;
}


} // namespace ttnn::graph
} // namespace ttnn::graph

0 comments on commit 4c9787e

Please sign in to comment.