From 561e2fd349ad4598150190ef531288cf6c21104f Mon Sep 17 00:00:00 2001
From: Austin Ho <aho@tenstorrent.com>
Date: Mon, 21 Oct 2024 13:45:40 +0000
Subject: [PATCH] #14038: Remove global BUFFER_MAP and make the tracking of
 buffers local to an allocator

---
 tt_metal/detail/tt_metal.hpp             |   2 +-
 tt_metal/graph/graph_tracking.cpp        |   8 +-
 tt_metal/graph/graph_tracking.hpp        |   8 +-
 tt_metal/impl/allocator/allocator.cpp    |  41 ++++---
 tt_metal/impl/allocator/allocator.hpp    |  13 ++-
 tt_metal/impl/buffers/buffer.cpp         |  31 +++--
 tt_metal/impl/buffers/buffer.hpp         |  36 +-----
 tt_metal/impl/device/device.cpp          |  11 +-
 tt_metal/impl/device/device.hpp          |   2 +
 tt_metal/impl/dispatch/command_queue.hpp |   3 -
 tt_metal/tt_metal.cpp                    |  26 ++---
 ttnn/cpp/ttnn/graph/graph_processor.cpp  |   4 +-
 ttnn/cpp/ttnn/graph/graph_processor.hpp  |   4 +-
 ttnn/cpp/ttnn/reports.hpp                | 139 +++++++++++------------
 14 files changed, 156 insertions(+), 172 deletions(-)
diff --git a/tt_metal/detail/tt_metal.hpp b/tt_metal/detail/tt_metal.hpp
index d6168102a5e..e5464e721a6 100644
--- a/tt_metal/detail/tt_metal.hpp
+++ b/tt_metal/detail/tt_metal.hpp
@@ -276,7 +276,7 @@ inline namespace v0 {
 
         void SetLazyCommandQueueMode(bool lazy);
 
-        DeviceAddr AllocateBuffer(const Buffer* buffer, bool bottom_up);
+        DeviceAddr AllocateBuffer(Buffer* buffer);
 
         void DeallocateBuffer(Buffer *buffer);
     }  // namespace detail
diff --git a/tt_metal/graph/graph_tracking.cpp b/tt_metal/graph/graph_tracking.cpp
index 17a72ddd5ee..c12eff0d7ec 100644
--- a/tt_metal/graph/graph_tracking.cpp
+++ b/tt_metal/graph/graph_tracking.cpp
@@ -27,12 +27,12 @@ bool GraphTracker::add_hook(const std::shared_ptr<IGraphHooks>& new_hook) {
     return true;
 }
 
-void GraphTracker::track_allocate(const Buffer* buffer, bool bottom_up) {
+void GraphTracker::track_allocate(const Buffer* buffer) {
     if (processors.empty()) {
         return;
     }
     for (auto& it : processors) {
-        it->track_allocate(buffer, bottom_up);
+        it->track_allocate(buffer);
     }
 }
 
@@ -73,11 +73,11 @@ void GraphTracker::track_program(Program* program) {
     }
 }
 
-bool GraphTracker::hook_allocate(const Buffer* buffer, bool bottom_up) {
+bool GraphTracker::hook_allocate(const Buffer* buffer) {
     if (hook == nullptr)
         return false;
 
-    return hook->hook_allocate(buffer, bottom_up);
+    return hook->hook_allocate(buffer);
 }
 
 bool GraphTracker::hook_deallocate(Buffer* buffer) {
diff --git a/tt_metal/graph/graph_tracking.hpp b/tt_metal/graph/graph_tracking.hpp
index 54ee8eef41d..712373ab005 100644
--- a/tt_metal/graph/graph_tracking.hpp
+++ b/tt_metal/graph/graph_tracking.hpp
@@ -28,7 +28,7 @@ inline namespace v0 {
 
         IGraphProcessor() = default;
 
-        virtual void track_allocate(const tt::tt_metal::Buffer* buffer, bool bottom_up) {};
+        virtual void track_allocate(const tt::tt_metal::Buffer* buffer) {};
 
         virtual void track_deallocate(tt::tt_metal::Buffer* buffer) {};
 
@@ -54,7 +54,7 @@ inline namespace v0 {
     class IGraphHooks {
     public:
         IGraphHooks() = default;
-        virtual bool hook_allocate(const tt::tt_metal::Buffer* buffer, bool bottom_up) = 0;
+        virtual bool hook_allocate(const tt::tt_metal::Buffer* buffer) = 0;
 
         virtual bool hook_deallocate(tt::tt_metal::Buffer* buffer) = 0;
 
@@ -77,7 +77,7 @@ inline namespace v0 {
 
         bool add_hook(const std::shared_ptr<IGraphHooks>& hook);
 
-        void track_allocate(const Buffer* buffer, bool bottom_up);
+        void track_allocate(const Buffer* buffer);
 
         void track_deallocate(Buffer* buffer);
 
@@ -118,7 +118,7 @@ inline namespace v0 {
             }
         }
 
-        bool hook_allocate(const Buffer* buffer, bool bottom_up);
+        bool hook_allocate(const Buffer* buffer);
 
         bool hook_deallocate(Buffer* buffer);
 
diff --git a/tt_metal/impl/allocator/allocator.cpp b/tt_metal/impl/allocator/allocator.cpp
index 023826e5cd9..7e760b3bf37 100644
--- a/tt_metal/impl/allocator/allocator.cpp
+++ b/tt_metal/impl/allocator/allocator.cpp
@@ -377,38 +377,45 @@ void verify_safe_allocation(Allocator& allocator) {
     }
 }
 
-uint64_t allocate_buffer(
-    Allocator &allocator,
-    DeviceAddr size,
-    DeviceAddr page_size,
-    const BufferType &buffer_type,
-    bool bottom_up,
-    std::optional<uint32_t> num_shards) {
-    uint64_t address = 0;
+const std::unordered_set<Buffer *> &get_allocated_buffers(const Allocator &allocator) { return allocator.allocated_buffers; }
+
+DeviceAddr allocate_buffer(Allocator &allocator, DeviceAddr size, Buffer *buffer) {
+    DeviceAddr address = 0;
+    auto page_size = buffer->page_size();
+    auto buffer_type = buffer->buffer_type();
+    auto bottom_up = buffer->bottom_up();
+    auto num_shards = buffer->num_cores();
     verify_safe_allocation(allocator);
     switch (buffer_type) {
         case BufferType::DRAM:
-            return allocator.descriptor.dram.alloc(
+            address = allocator.descriptor.dram.alloc(
                 allocator.config, allocator.dram_manager, size, page_size, bottom_up, num_shards);
+            break;
         case BufferType::L1:
-            return allocator.descriptor.l1.alloc(
+            address = allocator.descriptor.l1.alloc(
                 allocator.config, allocator.l1_manager, size, page_size, bottom_up, num_shards);
+            break;
         case BufferType::L1_SMALL: {
             TT_FATAL(num_shards.has_value(), "L1_SMALL only supports sharded allocations, see validate_num_banks");
-            return allocator.descriptor.l1.alloc(
+            address = allocator.descriptor.l1.alloc(
                 allocator.config, allocator.l1_small_manager, size, page_size, bottom_up, num_shards);
-            case BufferType::TRACE:
-                return allocator.descriptor.dram.alloc(
-                    allocator.config, allocator.trace_buffer_manager, size, page_size, bottom_up, num_shards);
+            break;
         }
+        case BufferType::TRACE:
+            address = allocator.descriptor.dram.alloc(
+                allocator.config, allocator.trace_buffer_manager, size, page_size, bottom_up, num_shards);
+            break;
         default: {
             TT_THROW("Unsupported buffer type!");
         }
     }
+    allocator.allocated_buffers.insert(buffer);
     return address;
 }
 
-void deallocate_buffer(Allocator &allocator, DeviceAddr address, const BufferType &buffer_type) {
+void deallocate_buffer(Allocator &allocator, Buffer *buffer) {
+    auto address = buffer->address();
+    auto buffer_type = buffer->buffer_type();
     switch (buffer_type) {
         case BufferType::DRAM: allocator.dram_manager.deallocate_buffer(address); break;
         case BufferType::L1: allocator.l1_manager.deallocate_buffer(address); break;
@@ -418,6 +425,7 @@ void deallocate_buffer(Allocator &allocator, DeviceAddr address, const BufferTyp
             TT_THROW("Unsupported buffer type!");
         }
     }
+    allocator.allocated_buffers.erase(buffer);
 }
 
 void deallocate_buffers(Allocator &allocator) {
@@ -425,6 +433,7 @@ void deallocate_buffers(Allocator &allocator) {
     allocator.l1_manager.deallocate_all();
     allocator.l1_small_manager.deallocate_all();
     allocator.trace_buffer_manager.deallocate_all();
+    allocator.allocated_buffers.clear();
 }
 
 void clear(Allocator &allocator) {
@@ -432,6 +441,7 @@ void clear(Allocator &allocator) {
     allocator.l1_manager.clear();
     allocator.l1_small_manager.clear();
     allocator.trace_buffer_manager.clear();
+    allocator.allocated_buffers.clear();
 }
 
 }  // namespace allocator
@@ -460,6 +470,7 @@ void Allocator::reset() {
     l1_manager.clear();
     l1_small_manager.clear();
     trace_buffer_manager.clear();
+    allocated_buffers.clear();
     config.reset();
 }
 
diff --git a/tt_metal/impl/allocator/allocator.hpp b/tt_metal/impl/allocator/allocator.hpp
index ecb31dfb5c8..60e4c97f0b9 100644
--- a/tt_metal/impl/allocator/allocator.hpp
+++ b/tt_metal/impl/allocator/allocator.hpp
@@ -19,6 +19,12 @@ namespace tt {
 
 namespace tt_metal {
 
+inline namespace v0 {
+
+class Buffer;
+
+}  // namespace v0
+
 // Fwd declares
 enum class BufferType;
 struct Allocator;
@@ -99,15 +105,17 @@ std::optional<DeviceAddr> lowest_occupied_l1_address(const Allocator &allocator,
 
 DeviceAddr base_alloc(const AllocatorConfig & config, BankManager &bank_manager, DeviceAddr size, DeviceAddr page_size, bool bottom_up, std::optional<uint32_t> num_shards);
 
-DeviceAddr allocate_buffer(Allocator &allocator, DeviceAddr size, DeviceAddr page_size, const BufferType &buffer_type, bool bottom_up, std::optional<uint32_t> num_shards = std::nullopt);
+DeviceAddr allocate_buffer(Allocator &allocator, DeviceAddr size, Buffer *buffer);
 
 void mark_allocations_unsafe(Allocator &allocator);
 
 void mark_allocations_safe(Allocator &allocator);
 
-void deallocate_buffer(Allocator &allocator, DeviceAddr address, const BufferType &buffer_type);
+void deallocate_buffer(Allocator &allocator, Buffer *buffer);
 void deallocate_buffers(Allocator &allocator);
 
+const std::unordered_set<Buffer *> &get_allocated_buffers(const Allocator &allocator);
+
 void clear(Allocator &allocatator);
 
 }  // namespace allocator
@@ -127,6 +135,7 @@ struct Allocator {
     std::unordered_map<uint32_t, std::vector<uint32_t>> dram_channel_to_bank_ids;
     std::unordered_map<uint32_t, CoreCoord> bank_id_to_logical_core;
     std::unordered_map<BufferType, std::unordered_map<CoreCoord, std::vector<uint32_t>>> logical_core_to_bank_ids;
+    std::unordered_set<Buffer *> allocated_buffers;
 
     AllocatorConfig config;
     // Callbacks to invoke during initialization and allocation
diff --git a/tt_metal/impl/buffers/buffer.cpp b/tt_metal/impl/buffers/buffer.cpp
index 0403a82af98..d4cfcf88be3 100644
--- a/tt_metal/impl/buffers/buffer.cpp
+++ b/tt_metal/impl/buffers/buffer.cpp
@@ -44,9 +44,15 @@ void validate_buffer_size_and_page_size(
         "Page size must be divisible by sizeof(uint32_t) because buffers hold uint32_t values");
 
     if (is_sharded(buffer_layout)) {
-        TT_FATAL(shard_parameters != std::nullopt, "Sharded buffers must have a core grid assigned");
-    } else if (buffer_layout == TensorMemoryLayout::SINGLE_BANK) {
-        TT_FATAL(page_size == size, "Contiguous buffer must be one contiguous page");
+        TT_FATAL(
+            shard_parameters != std::nullopt,
+            "Buffer was specified as sharded but does not have shard_parameters specified");
+    } else {
+        TT_FATAL(
+            shard_parameters == std::nullopt, "Buffer was specified as not sharded but has shard_parameters specified");
+        if (buffer_layout == TensorMemoryLayout::SINGLE_BANK) {
+            TT_FATAL(page_size == size, "Contiguous buffer must be one contiguous page");
+        }
     }
 }
 
@@ -125,7 +131,7 @@ BufferPageMapping generate_buffer_page_mapping(const Buffer& buffer) {
     auto shard_spec = buffer.shard_spec();
 
     bool row_major = shard_spec.orientation() == ShardOrientation::ROW_MAJOR;
-    uint32_t num_cores = buffer.num_cores();
+    uint32_t num_cores = buffer.num_cores().value();
 
     buffer_page_mapping.all_cores_ = corerange_to_cores(shard_spec.grid(), num_cores, row_major);
     TT_FATAL(num_cores == buffer_page_mapping.all_cores_.size(), "Buffer has {} cores, but page mapping expects {} cores", num_cores, buffer_page_mapping.all_cores_.size());
@@ -196,7 +202,7 @@ Buffer::Buffer(
     buffer_type_(buffer_type),
     buffer_layout_(buffer_layout),
     shard_parameters_(shard_parameters),
-    bottom_up_(bottom_up),
+    bottom_up_(bottom_up.value_or(this->is_dram())),
     buffer_page_mapping_(nullptr) {
     TT_FATAL(this->device_ != nullptr && this->device_->allocator_ != nullptr, "Device and allocator need to not be null.");
 
@@ -223,9 +229,7 @@ std::shared_ptr<Buffer> Buffer::create(
     }
 
     buffer->device_->push_work([buffer] {
-        bool bottom_up = buffer->bottom_up_.value_or(buffer->is_dram());
-        buffer->address_ = detail::AllocateBuffer(buffer.get(), bottom_up);
-        detail::BUFFER_MAP.insert({buffer->device_->id(), buffer->address_}, buffer.get());
+        buffer->address_ = detail::AllocateBuffer(buffer.get());
 
         std::unique_lock lock(buffer->allocation_mutex_);
         buffer->allocation_status_.store(AllocationStatus::ALLOCATED, std::memory_order::relaxed);
@@ -257,7 +261,6 @@ void Buffer::deallocate_impl() {
 
     if (device_->initialized_ && size_ != 0) {
         // address_ is only modified from this thread, no sync required
-        detail::BUFFER_MAP.erase({device_->id(), address_});
         detail::DeallocateBuffer(this);
     }
 
@@ -306,7 +309,7 @@ uint32_t Buffer::num_dev_pages() const {
         return this->num_pages();
     }
 
-    return this->shard_spec().size() * this->num_cores();
+    return this->shard_spec().size() * this->num_cores().value();
 }
 
 CoreType Buffer::core_type() const {
@@ -399,9 +402,9 @@ void Buffer::set_shard_spec(const ShardSpecBuffer& shard_spec) {
     this->buffer_page_mapping_ = nullptr;
 }
 
-uint32_t Buffer::num_cores() const {
+std::optional<uint32_t> Buffer::num_cores() const {
     if (!is_sharded(this->buffer_layout_))
-        return 1;
+        return std::nullopt;
 
     return this->shard_spec().tensor_shard_spec.grid.num_cores();
 }
@@ -433,10 +436,6 @@ DeviceAddr ShardSpecBuffer::size() const {
     return shape_in_pages_[0] * shape_in_pages_[1];
 }
 
-namespace detail {
-buffer_map_t BUFFER_MAP = {};
-}
-
 }  // namespace tt_metal
 }  // namespace tt
 
diff --git a/tt_metal/impl/buffers/buffer.hpp b/tt_metal/impl/buffers/buffer.hpp
index 8c4332de0cb..ec3cbb22aec 100644
--- a/tt_metal/impl/buffers/buffer.hpp
+++ b/tt_metal/impl/buffers/buffer.hpp
@@ -176,6 +176,8 @@ class Buffer final {
 
     TensorMemoryLayout buffer_layout() const { return buffer_layout_; }
 
+    bool bottom_up() const { return bottom_up_; }
+
     uint32_t dram_channel_from_bank_id(uint32_t bank_id) const;
 
     CoreCoord logical_core_from_bank_id(uint32_t bank_id) const;
@@ -199,7 +201,7 @@ class Buffer final {
     ShardSpecBuffer shard_spec() const;
     void set_shard_spec(const ShardSpecBuffer& shard_spec);
 
-    uint32_t num_cores() const;
+    std::optional<uint32_t> num_cores() const;
 
     const std::shared_ptr<const BufferPageMapping>& get_buffer_page_mapping();
 
@@ -231,7 +233,7 @@ class Buffer final {
     const DeviceAddr size_; // Size in bytes
     const BufferType buffer_type_;
     const TensorMemoryLayout buffer_layout_;
-    const std::optional<bool> bottom_up_;
+    const bool bottom_up_;
 
     std::atomic<AllocationStatus> allocation_status_ = AllocationStatus::ALLOCATION_REQUESTED;
     DeviceAddr address_ = 0;
@@ -252,36 +254,6 @@ class Buffer final {
 
 BufferPageMapping generate_buffer_page_mapping(const Buffer &buffer);
 
-namespace detail {
-using Deviceid = uint32_t;
-
-class buffer_map_t {
-   public:
-    void insert(std::tuple<Deviceid, DeviceAddr> buf_attr, Buffer *buffer) {
-        std::scoped_lock<std::mutex> lock(this->map_mutex);
-        this->map.insert({buf_attr, buffer});
-    }
-
-    void erase(std::tuple<Deviceid, DeviceAddr> buf_attr) {
-        std::scoped_lock<std::mutex> lock(this->map_mutex);
-        this->map.erase(buf_attr);
-    }
-
-    std::map<std::tuple<Deviceid, DeviceAddr>, Buffer *> value() {
-        std::scoped_lock<std::mutex> lock(this->map_mutex);
-        return this->map;
-    }
-
-    ~buffer_map_t() { TT_ASSERT(this->map.empty(), "Not all buffers deallocated by runtime!"); }
-
-   private:
-    std::mutex map_mutex;
-    std::map<std::tuple<Deviceid, DeviceAddr>, Buffer *> map = {};
-};
-
-extern buffer_map_t BUFFER_MAP;
-}  // namespace detail
-
 inline namespace v0 {
 
 using HostDataType = std::variant<
diff --git a/tt_metal/impl/device/device.cpp b/tt_metal/impl/device/device.cpp
index 0479807018f..8afff15103b 100644
--- a/tt_metal/impl/device/device.cpp
+++ b/tt_metal/impl/device/device.cpp
@@ -2949,10 +2949,8 @@ bool Device::close() {
     tt::Cluster::instance().l1_barrier(id_);
     allocator::clear(*this->allocator_);
     // After device close, no buffers on this device should be used
-    for (const auto &[buf_attr, buf] : detail::BUFFER_MAP.value()) {
-        if (std::get<0>(buf_attr) == this->id()) {
-            DeallocateBuffer(*buf);
-        }
+    for (const auto &buf : this->get_allocated_buffers()) {
+        DeallocateBuffer(*buf);
     }
 
     this->compute_cores_.clear();
@@ -3174,6 +3172,11 @@ void Device::dump_memory_blocks(const BufferType &buffer_type, std::ofstream &ou
     return allocator::dump_memory_blocks(*this->allocator_, buffer_type, out);
 }
 
+const std::unordered_set<Buffer *> &Device::get_allocated_buffers() const {
+    this->check_allocator_is_initialized();
+    return allocator::get_allocated_buffers(*this->allocator_);
+}
+
 void Device::deallocate_buffers(){
     allocator::deallocate_buffers(*allocator_);
 }
diff --git a/tt_metal/impl/device/device.hpp b/tt_metal/impl/device/device.hpp
index 7beb58f3ea8..dce53a1eae8 100644
--- a/tt_metal/impl/device/device.hpp
+++ b/tt_metal/impl/device/device.hpp
@@ -197,6 +197,8 @@ class Device {
     uint32_t get_noc_unicast_encoding(uint8_t noc_index, const CoreCoord& physical_core) const;
     uint32_t get_noc_multicast_encoding(uint8_t noc_index, const CoreRange& physical_cores) const;
 
+    const std::unordered_set<Buffer *> &get_allocated_buffers() const;
+
     void deallocate_buffers();
 
     // machine epsilon
diff --git a/tt_metal/impl/dispatch/command_queue.hpp b/tt_metal/impl/dispatch/command_queue.hpp
index 64f6c5407b7..a840fd19b8a 100644
--- a/tt_metal/impl/dispatch/command_queue.hpp
+++ b/tt_metal/impl/dispatch/command_queue.hpp
@@ -478,9 +478,6 @@ using CompletionReaderQueue = LockFreeQueue<CompletionReaderVariant>;
 struct AllocBufferMetadata {
     Buffer* buffer;
     std::reference_wrapper<Allocator> allocator;
-    BufferType buffer_type;
-    uint32_t device_address;
-    bool bottom_up;
 };
 
 struct RuntimeArgsMetadata {
diff --git a/tt_metal/tt_metal.cpp b/tt_metal/tt_metal.cpp
index f71c5c49302..001cec165e1 100644
--- a/tt_metal/tt_metal.cpp
+++ b/tt_metal/tt_metal.cpp
@@ -794,33 +794,29 @@ void CompileProgram(Device *device, Program &program, bool fd_bootloader_mode) {
     program.compile(device, fd_bootloader_mode);
 }
 
-DeviceAddr AllocateBuffer(const Buffer *buffer, bool bottom_up) {
-    if(GraphTracker::instance().hook_allocate(buffer, bottom_up)) {
-        GraphTracker::instance().track_allocate(buffer, bottom_up);
+DeviceAddr AllocateBuffer(Buffer *buffer) {
+    if(GraphTracker::instance().hook_allocate(buffer)) {
+        GraphTracker::instance().track_allocate(buffer);
         return 0;
     }
 
-    uint32_t allocated_addr;
+    DeviceAddr allocated_addr;
     if (is_sharded(buffer->buffer_layout())) {
         allocated_addr = allocator::allocate_buffer(
             *(buffer->device()->allocator_),
-            buffer->shard_spec().size() * buffer->num_cores() * buffer->page_size(),
-            buffer->page_size(),
-            buffer->buffer_type(),
-            bottom_up,
-            buffer->num_cores());
+            buffer->shard_spec().size() * buffer->num_cores().value() * buffer->page_size(),
+            buffer);
     } else {
         allocated_addr = allocator::allocate_buffer(
             *(buffer->device()->allocator_),
             buffer->size(),
-            buffer->page_size(),
-            buffer->buffer_type(),
-            bottom_up,
-            std::nullopt);
+            buffer);
     }
+    // Assertion here because buffer class returns a u32 when address is queried
+    // Requires updating all use cases of buffer address to accept a u64 to remove
     TT_ASSERT(allocated_addr <= std::numeric_limits<uint32_t>::max());
 
-    GraphTracker::instance().track_allocate(buffer, bottom_up);
+    GraphTracker::instance().track_allocate(buffer);
 
     return allocated_addr;
 }
@@ -831,7 +827,7 @@ void DeallocateBuffer(Buffer *buffer) {
         return;
     }
 
-    allocator::deallocate_buffer(*buffer->device()->allocator_, buffer->address(), buffer->buffer_type());
+    allocator::deallocate_buffer(*buffer->device()->allocator_, buffer);
 }
 
 }  // namespace detail
diff --git a/ttnn/cpp/ttnn/graph/graph_processor.cpp b/ttnn/cpp/ttnn/graph/graph_processor.cpp
index bebeadebd9d..882f9588a22 100644
--- a/ttnn/cpp/ttnn/graph/graph_processor.cpp
+++ b/ttnn/cpp/ttnn/graph/graph_processor.cpp
@@ -90,7 +90,7 @@ GraphProcessor::GraphProcessor(RunMode mode) : run_mode(mode) {
     end_function_any_map[typeid(std::reference_wrapper<Tensor>)] = [ptr = this] (const std::any& val) mutable {ptr->end_function_process_tensor(val);};
 
 }
-void GraphProcessor::track_allocate(const tt::tt_metal::Buffer* buffer, bool bottom_up) {
+void GraphProcessor::track_allocate(const tt::tt_metal::Buffer* buffer) {
     const std::lock_guard<std::mutex> lock(mutex);
     auto buf_id = add_buffer(buffer);
 
@@ -478,7 +478,7 @@ nlohmann::json GraphProcessor::end_graph_capture() {
         return res;
 }
 
-bool ProcessorHooks::hook_allocate(const tt::tt_metal::Buffer* buffer, bool bottom_up) {
+bool ProcessorHooks::hook_allocate(const tt::tt_metal::Buffer* buffer) {
     return do_block;
 }
 
diff --git a/ttnn/cpp/ttnn/graph/graph_processor.hpp b/ttnn/cpp/ttnn/graph/graph_processor.hpp
index 4f7d6f1b6e7..83179dabe59 100644
--- a/ttnn/cpp/ttnn/graph/graph_processor.hpp
+++ b/ttnn/cpp/ttnn/graph/graph_processor.hpp
@@ -22,7 +22,7 @@ namespace ttnn::graph {
 
     public:
         ProcessorHooks() = default;
-        bool hook_allocate(const tt::tt_metal::Buffer* buffer, bool bottom_up) override;
+        bool hook_allocate(const tt::tt_metal::Buffer* buffer) override;
 
         bool hook_deallocate(tt::tt_metal::Buffer* buffer) override;
 
@@ -40,7 +40,7 @@ namespace ttnn::graph {
         GraphProcessor(tt::tt_metal::IGraphProcessor::RunMode mode);
         ~GraphProcessor() override;
 
-        void track_allocate(const tt::tt_metal::Buffer* buffer, bool bottom_up) override;
+        void track_allocate(const tt::tt_metal::Buffer* buffer) override;
 
         void track_deallocate(tt::tt_metal::Buffer* buffer) override;
 
diff --git a/ttnn/cpp/ttnn/reports.hpp b/ttnn/cpp/ttnn/reports.hpp
index 9392f8eda7c..0eee2efedbc 100644
--- a/ttnn/cpp/ttnn/reports.hpp
+++ b/ttnn/cpp/ttnn/reports.hpp
@@ -7,6 +7,7 @@
 #include <optional>
 
 #include "tt_metal/impl/buffers/buffer.hpp"
+#include "tt_metal/impl/device/device_pool.hpp"
 
 namespace ttnn {
 
@@ -64,49 +65,52 @@ struct BufferInfo {
 
 std::vector<BufferInfo> get_buffers() {
     std::vector<BufferInfo> buffer_infos;
-    for (const auto &[key, buffer] : tt::tt_metal::detail::BUFFER_MAP.value()) {
-        auto [device_id, address] = key;
-        auto device = buffer->device();
-
-        auto num_pages = buffer->num_pages();
-        auto page_size = buffer->page_size();
-        auto num_banks = device->num_banks(buffer->buffer_type());
-
-        std::map<uint32_t, uint32_t> bank_to_num_pages;
-        if (buffer->buffer_layout() == tt::tt_metal::TensorMemoryLayout::INTERLEAVED) {
-            uint32_t bank_id = 0;
-            for (int page_index = 0; page_index < num_pages; page_index++) {
-                if (bank_to_num_pages.find(bank_id) == bank_to_num_pages.end()) {
-                    bank_to_num_pages[bank_id] = 0;
+    for (const auto &device : tt::DevicePool::instance().get_all_active_devices()) {
+        for (const auto &buffer : device->get_allocated_buffers()) {
+            auto device_id = device->id();
+            auto address = buffer->address();
+
+            auto num_pages = buffer->num_pages();
+            auto page_size = buffer->page_size();
+            auto num_banks = device->num_banks(buffer->buffer_type());
+
+            std::map<uint32_t, uint32_t> bank_to_num_pages;
+            if (buffer->buffer_layout() == tt::tt_metal::TensorMemoryLayout::INTERLEAVED) {
+                uint32_t bank_id = 0;
+                for (int page_index = 0; page_index < num_pages; page_index++) {
+                    if (bank_to_num_pages.find(bank_id) == bank_to_num_pages.end()) {
+                        bank_to_num_pages[bank_id] = 0;
+                    }
+                    bank_to_num_pages[bank_id]++;
+                    bank_id = (bank_id + 1) % num_banks;
                 }
-                bank_to_num_pages[bank_id]++;
-                bank_id = (bank_id + 1) % num_banks;
-            }
-        } else {
-            const auto& buffer_page_mapping = *buffer->get_buffer_page_mapping();
-            for (int page_index = 0; page_index < num_pages; page_index++) {
-                auto dev_page_index = buffer_page_mapping.host_page_to_dev_page_mapping_[page_index];
-                auto core = buffer_page_mapping.all_cores_[buffer_page_mapping.dev_page_to_core_mapping_[dev_page_index]];
-                auto bank_id = device->bank_ids_from_logical_core(buffer->buffer_type(), core)[0];
-
-                if (bank_to_num_pages.find(bank_id) == bank_to_num_pages.end()) {
-                    bank_to_num_pages[bank_id] = 0;
+            } else {
+                const auto &buffer_page_mapping = *buffer->get_buffer_page_mapping();
+                for (int page_index = 0; page_index < num_pages; page_index++) {
+                    auto dev_page_index = buffer_page_mapping.host_page_to_dev_page_mapping_[page_index];
+                    auto core =
+                        buffer_page_mapping.all_cores_[buffer_page_mapping.dev_page_to_core_mapping_[dev_page_index]];
+                    auto bank_id = device->bank_ids_from_logical_core(buffer->buffer_type(), core)[0];
+
+                    if (bank_to_num_pages.find(bank_id) == bank_to_num_pages.end()) {
+                        bank_to_num_pages[bank_id] = 0;
+                    }
+                    bank_to_num_pages[bank_id]++;
                 }
-                bank_to_num_pages[bank_id]++;
             }
-        }
 
-        auto max_num_pages =
-            std::max_element(bank_to_num_pages.begin(), bank_to_num_pages.end(), [](const auto &a, const auto &b) {
-                return a.second < b.second;
-            });
-
-        BufferInfo buffer_info = {};
-        buffer_info.device_id = device_id;
-        buffer_info.address = address;
-        buffer_info.max_size_per_bank = (*max_num_pages).second * page_size;
-        buffer_info.buffer_type = buffer->buffer_type();
-        buffer_infos.push_back(buffer_info);
+            auto max_num_pages =
+                std::max_element(bank_to_num_pages.begin(), bank_to_num_pages.end(), [](const auto &a, const auto &b) {
+                    return a.second < b.second;
+                });
+
+            BufferInfo buffer_info = {};
+            buffer_info.device_id = device_id;
+            buffer_info.address = address;
+            buffer_info.max_size_per_bank = (*max_num_pages).second * page_size;
+            buffer_info.buffer_type = buffer->buffer_type();
+            buffer_infos.push_back(buffer_info);
+        }
     }
     return buffer_infos;
 }
@@ -125,23 +129,35 @@ struct BufferPageInfo {
 
 std::vector<BufferPageInfo> get_buffer_pages() {
     std::vector<BufferPageInfo> buffer_page_infos;
-    for (const auto &[key, buffer] : tt::tt_metal::detail::BUFFER_MAP.value()) {
-        if (not buffer->is_l1()) {
-            continue;
-        }
+    for (const auto &device : tt::DevicePool::instance().get_all_active_devices()) {
+        for (const auto &buffer : device->get_allocated_buffers()) {
+            if (not buffer->is_l1()) {
+                continue;
+            }
 
-        auto [device_id, address] = key;
-        auto device = buffer->device();
+            auto device_id = device->id();
+            auto address = buffer->address();
 
-        uint32_t page_size = buffer->page_size();
-        auto num_pages = buffer->num_pages();
-        auto num_banks = device->num_banks(buffer->buffer_type());
+            auto page_size = buffer->page_size();
+            auto num_pages = buffer->num_pages();
+            auto num_banks = device->num_banks(buffer->buffer_type());
 
-        if (buffer->buffer_layout() == tt::tt_metal::TensorMemoryLayout::INTERLEAVED) {
             uint32_t bank_id = 0;
             for (int page_index = 0; page_index < num_pages; page_index++) {
-                auto page_address = buffer->page_address(bank_id, page_index);
-                auto core = buffer->logical_core_from_bank_id(bank_id);
+                CoreCoord core;
+                DeviceAddr page_address = 0;
+
+                if (buffer->buffer_layout() == tt::tt_metal::TensorMemoryLayout::INTERLEAVED) {
+                    page_address = buffer->page_address(bank_id, page_index);
+                    core = buffer->logical_core_from_bank_id(bank_id);
+                    bank_id = (bank_id + 1) % num_banks;
+                } else {
+                    const auto &buffer_page_mapping = *buffer->get_buffer_page_mapping();
+                    auto dev_page_index = buffer_page_mapping.host_page_to_dev_page_mapping_[page_index];
+                    core = buffer_page_mapping.all_cores_[buffer_page_mapping.dev_page_to_core_mapping_[dev_page_index]];
+                    bank_id = device->bank_ids_from_logical_core(buffer->buffer_type(), core)[0];
+                    page_address = buffer->sharded_page_address(bank_id, dev_page_index);
+                }
 
                 BufferPageInfo buffer_page_info = {};
                 buffer_page_info.device_id = device_id;
@@ -153,28 +169,7 @@ std::vector<BufferPageInfo> get_buffer_pages() {
                 buffer_page_info.page_address = page_address;
                 buffer_page_info.page_size = page_size;
                 buffer_page_info.buffer_type = buffer->buffer_type();
-                buffer_page_infos.push_back(buffer_page_info);
 
-                bank_id = (bank_id + 1) % num_banks;
-            }
-        } else {
-            const auto& buffer_page_mapping = *buffer->get_buffer_page_mapping();
-            for (int page_index = 0; page_index < num_pages; page_index++) {
-                auto dev_page_index = buffer_page_mapping.host_page_to_dev_page_mapping_[page_index];
-                auto core = buffer_page_mapping.all_cores_[buffer_page_mapping.dev_page_to_core_mapping_[dev_page_index]];
-                auto bank_id = device->bank_ids_from_logical_core(buffer->buffer_type(), core)[0];
-                auto page_address = buffer->sharded_page_address(bank_id, dev_page_index);
-
-                BufferPageInfo buffer_page_info = {};
-                buffer_page_info.device_id = device_id;
-                buffer_page_info.address = address;
-                buffer_page_info.core_y = core.y;
-                buffer_page_info.core_x = core.x;
-                buffer_page_info.bank_id = bank_id;
-                buffer_page_info.page_index = page_index;
-                buffer_page_info.page_address = page_address;
-                buffer_page_info.page_size = page_size;
-                buffer_page_info.buffer_type = buffer->buffer_type();
                 buffer_page_infos.push_back(buffer_page_info);
             }
         }