From 561e2fd349ad4598150190ef531288cf6c21104f Mon Sep 17 00:00:00 2001 From: Austin Ho Date: Mon, 21 Oct 2024 13:45:40 +0000 Subject: [PATCH] #14038: Remove global BUFFER_MAP and make the tracking of buffers local to an allocator --- tt_metal/detail/tt_metal.hpp | 2 +- tt_metal/graph/graph_tracking.cpp | 8 +- tt_metal/graph/graph_tracking.hpp | 8 +- tt_metal/impl/allocator/allocator.cpp | 41 ++++--- tt_metal/impl/allocator/allocator.hpp | 13 ++- tt_metal/impl/buffers/buffer.cpp | 31 +++-- tt_metal/impl/buffers/buffer.hpp | 36 +----- tt_metal/impl/device/device.cpp | 11 +- tt_metal/impl/device/device.hpp | 2 + tt_metal/impl/dispatch/command_queue.hpp | 3 - tt_metal/tt_metal.cpp | 26 ++--- ttnn/cpp/ttnn/graph/graph_processor.cpp | 4 +- ttnn/cpp/ttnn/graph/graph_processor.hpp | 4 +- ttnn/cpp/ttnn/reports.hpp | 139 +++++++++++------------ 14 files changed, 156 insertions(+), 172 deletions(-) diff --git a/tt_metal/detail/tt_metal.hpp b/tt_metal/detail/tt_metal.hpp index d6168102a5e..e5464e721a6 100644 --- a/tt_metal/detail/tt_metal.hpp +++ b/tt_metal/detail/tt_metal.hpp @@ -276,7 +276,7 @@ inline namespace v0 { void SetLazyCommandQueueMode(bool lazy); - DeviceAddr AllocateBuffer(const Buffer* buffer, bool bottom_up); + DeviceAddr AllocateBuffer(Buffer* buffer); void DeallocateBuffer(Buffer *buffer); } // namespace detail diff --git a/tt_metal/graph/graph_tracking.cpp b/tt_metal/graph/graph_tracking.cpp index 17a72ddd5ee..c12eff0d7ec 100644 --- a/tt_metal/graph/graph_tracking.cpp +++ b/tt_metal/graph/graph_tracking.cpp @@ -27,12 +27,12 @@ bool GraphTracker::add_hook(const std::shared_ptr& new_hook) { return true; } -void GraphTracker::track_allocate(const Buffer* buffer, bool bottom_up) { +void GraphTracker::track_allocate(const Buffer* buffer) { if (processors.empty()) { return; } for (auto& it : processors) { - it->track_allocate(buffer, bottom_up); + it->track_allocate(buffer); } } @@ -73,11 +73,11 @@ void GraphTracker::track_program(Program* program) { } } -bool GraphTracker::hook_allocate(const Buffer* buffer, bool bottom_up) { +bool GraphTracker::hook_allocate(const Buffer* buffer) { if (hook == nullptr) return false; - return hook->hook_allocate(buffer, bottom_up); + return hook->hook_allocate(buffer); } bool GraphTracker::hook_deallocate(Buffer* buffer) { diff --git a/tt_metal/graph/graph_tracking.hpp b/tt_metal/graph/graph_tracking.hpp index 54ee8eef41d..712373ab005 100644 --- a/tt_metal/graph/graph_tracking.hpp +++ b/tt_metal/graph/graph_tracking.hpp @@ -28,7 +28,7 @@ inline namespace v0 { IGraphProcessor() = default; - virtual void track_allocate(const tt::tt_metal::Buffer* buffer, bool bottom_up) {}; + virtual void track_allocate(const tt::tt_metal::Buffer* buffer) {}; virtual void track_deallocate(tt::tt_metal::Buffer* buffer) {}; @@ -54,7 +54,7 @@ inline namespace v0 { class IGraphHooks { public: IGraphHooks() = default; - virtual bool hook_allocate(const tt::tt_metal::Buffer* buffer, bool bottom_up) = 0; + virtual bool hook_allocate(const tt::tt_metal::Buffer* buffer) = 0; virtual bool hook_deallocate(tt::tt_metal::Buffer* buffer) = 0; @@ -77,7 +77,7 @@ inline namespace v0 { bool add_hook(const std::shared_ptr& hook); - void track_allocate(const Buffer* buffer, bool bottom_up); + void track_allocate(const Buffer* buffer); void track_deallocate(Buffer* buffer); @@ -118,7 +118,7 @@ inline namespace v0 { } } - bool hook_allocate(const Buffer* buffer, bool bottom_up); + bool hook_allocate(const Buffer* buffer); bool hook_deallocate(Buffer* buffer); diff --git a/tt_metal/impl/allocator/allocator.cpp b/tt_metal/impl/allocator/allocator.cpp index 023826e5cd9..7e760b3bf37 100644 --- a/tt_metal/impl/allocator/allocator.cpp +++ b/tt_metal/impl/allocator/allocator.cpp @@ -377,38 +377,45 @@ void verify_safe_allocation(Allocator& allocator) { } } -uint64_t allocate_buffer( - Allocator &allocator, - DeviceAddr size, - DeviceAddr page_size, - const BufferType &buffer_type, - bool bottom_up, - std::optional num_shards) { - uint64_t address = 0; +const std::unordered_set &get_allocated_buffers(const Allocator &allocator) { return allocator.allocated_buffers; } + +DeviceAddr allocate_buffer(Allocator &allocator, DeviceAddr size, Buffer *buffer) { + DeviceAddr address = 0; + auto page_size = buffer->page_size(); + auto buffer_type = buffer->buffer_type(); + auto bottom_up = buffer->bottom_up(); + auto num_shards = buffer->num_cores(); verify_safe_allocation(allocator); switch (buffer_type) { case BufferType::DRAM: - return allocator.descriptor.dram.alloc( + address = allocator.descriptor.dram.alloc( allocator.config, allocator.dram_manager, size, page_size, bottom_up, num_shards); + break; case BufferType::L1: - return allocator.descriptor.l1.alloc( + address = allocator.descriptor.l1.alloc( allocator.config, allocator.l1_manager, size, page_size, bottom_up, num_shards); + break; case BufferType::L1_SMALL: { TT_FATAL(num_shards.has_value(), "L1_SMALL only supports sharded allocations, see validate_num_banks"); - return allocator.descriptor.l1.alloc( + address = allocator.descriptor.l1.alloc( allocator.config, allocator.l1_small_manager, size, page_size, bottom_up, num_shards); - case BufferType::TRACE: - return allocator.descriptor.dram.alloc( - allocator.config, allocator.trace_buffer_manager, size, page_size, bottom_up, num_shards); + break; } + case BufferType::TRACE: + address = allocator.descriptor.dram.alloc( + allocator.config, allocator.trace_buffer_manager, size, page_size, bottom_up, num_shards); + break; default: { TT_THROW("Unsupported buffer type!"); } } + allocator.allocated_buffers.insert(buffer); return address; } -void deallocate_buffer(Allocator &allocator, DeviceAddr address, const BufferType &buffer_type) { +void deallocate_buffer(Allocator &allocator, Buffer *buffer) { + auto address = buffer->address(); + auto buffer_type = buffer->buffer_type(); switch (buffer_type) { case BufferType::DRAM: allocator.dram_manager.deallocate_buffer(address); break; case BufferType::L1: allocator.l1_manager.deallocate_buffer(address); break; @@ -418,6 +425,7 @@ void deallocate_buffer(Allocator &allocator, DeviceAddr address, const BufferTyp TT_THROW("Unsupported buffer type!"); } } + allocator.allocated_buffers.erase(buffer); } void deallocate_buffers(Allocator &allocator) { @@ -425,6 +433,7 @@ void deallocate_buffers(Allocator &allocator) { allocator.l1_manager.deallocate_all(); allocator.l1_small_manager.deallocate_all(); allocator.trace_buffer_manager.deallocate_all(); + allocator.allocated_buffers.clear(); } void clear(Allocator &allocator) { @@ -432,6 +441,7 @@ void clear(Allocator &allocator) { allocator.l1_manager.clear(); allocator.l1_small_manager.clear(); allocator.trace_buffer_manager.clear(); + allocator.allocated_buffers.clear(); } } // namespace allocator @@ -460,6 +470,7 @@ void Allocator::reset() { l1_manager.clear(); l1_small_manager.clear(); trace_buffer_manager.clear(); + allocated_buffers.clear(); config.reset(); } diff --git a/tt_metal/impl/allocator/allocator.hpp b/tt_metal/impl/allocator/allocator.hpp index ecb31dfb5c8..60e4c97f0b9 100644 --- a/tt_metal/impl/allocator/allocator.hpp +++ b/tt_metal/impl/allocator/allocator.hpp @@ -19,6 +19,12 @@ namespace tt { namespace tt_metal { +inline namespace v0 { + +class Buffer; + +} // namespace v0 + // Fwd declares enum class BufferType; struct Allocator; @@ -99,15 +105,17 @@ std::optional lowest_occupied_l1_address(const Allocator &allocator, DeviceAddr base_alloc(const AllocatorConfig & config, BankManager &bank_manager, DeviceAddr size, DeviceAddr page_size, bool bottom_up, std::optional num_shards); -DeviceAddr allocate_buffer(Allocator &allocator, DeviceAddr size, DeviceAddr page_size, const BufferType &buffer_type, bool bottom_up, std::optional num_shards = std::nullopt); +DeviceAddr allocate_buffer(Allocator &allocator, DeviceAddr size, Buffer *buffer); void mark_allocations_unsafe(Allocator &allocator); void mark_allocations_safe(Allocator &allocator); -void deallocate_buffer(Allocator &allocator, DeviceAddr address, const BufferType &buffer_type); +void deallocate_buffer(Allocator &allocator, Buffer *buffer); void deallocate_buffers(Allocator &allocator); +const std::unordered_set &get_allocated_buffers(const Allocator &allocator); + void clear(Allocator &allocatator); } // namespace allocator @@ -127,6 +135,7 @@ struct Allocator { std::unordered_map> dram_channel_to_bank_ids; std::unordered_map bank_id_to_logical_core; std::unordered_map>> logical_core_to_bank_ids; + std::unordered_set allocated_buffers; AllocatorConfig config; // Callbacks to invoke during initialization and allocation diff --git a/tt_metal/impl/buffers/buffer.cpp b/tt_metal/impl/buffers/buffer.cpp index 0403a82af98..d4cfcf88be3 100644 --- a/tt_metal/impl/buffers/buffer.cpp +++ b/tt_metal/impl/buffers/buffer.cpp @@ -44,9 +44,15 @@ void validate_buffer_size_and_page_size( "Page size must be divisible by sizeof(uint32_t) because buffers hold uint32_t values"); if (is_sharded(buffer_layout)) { - TT_FATAL(shard_parameters != std::nullopt, "Sharded buffers must have a core grid assigned"); - } else if (buffer_layout == TensorMemoryLayout::SINGLE_BANK) { - TT_FATAL(page_size == size, "Contiguous buffer must be one contiguous page"); + TT_FATAL( + shard_parameters != std::nullopt, + "Buffer was specified as sharded but does not have shard_parameters specified"); + } else { + TT_FATAL( + shard_parameters == std::nullopt, "Buffer was specified as not sharded but has shard_parameters specified"); + if (buffer_layout == TensorMemoryLayout::SINGLE_BANK) { + TT_FATAL(page_size == size, "Contiguous buffer must be one contiguous page"); + } } } @@ -125,7 +131,7 @@ BufferPageMapping generate_buffer_page_mapping(const Buffer& buffer) { auto shard_spec = buffer.shard_spec(); bool row_major = shard_spec.orientation() == ShardOrientation::ROW_MAJOR; - uint32_t num_cores = buffer.num_cores(); + uint32_t num_cores = buffer.num_cores().value(); buffer_page_mapping.all_cores_ = corerange_to_cores(shard_spec.grid(), num_cores, row_major); TT_FATAL(num_cores == buffer_page_mapping.all_cores_.size(), "Buffer has {} cores, but page mapping expects {} cores", num_cores, buffer_page_mapping.all_cores_.size()); @@ -196,7 +202,7 @@ Buffer::Buffer( buffer_type_(buffer_type), buffer_layout_(buffer_layout), shard_parameters_(shard_parameters), - bottom_up_(bottom_up), + bottom_up_(bottom_up.value_or(this->is_dram())), buffer_page_mapping_(nullptr) { TT_FATAL(this->device_ != nullptr && this->device_->allocator_ != nullptr, "Device and allocator need to not be null."); @@ -223,9 +229,7 @@ std::shared_ptr Buffer::create( } buffer->device_->push_work([buffer] { - bool bottom_up = buffer->bottom_up_.value_or(buffer->is_dram()); - buffer->address_ = detail::AllocateBuffer(buffer.get(), bottom_up); - detail::BUFFER_MAP.insert({buffer->device_->id(), buffer->address_}, buffer.get()); + buffer->address_ = detail::AllocateBuffer(buffer.get()); std::unique_lock lock(buffer->allocation_mutex_); buffer->allocation_status_.store(AllocationStatus::ALLOCATED, std::memory_order::relaxed); @@ -257,7 +261,6 @@ void Buffer::deallocate_impl() { if (device_->initialized_ && size_ != 0) { // address_ is only modified from this thread, no sync required - detail::BUFFER_MAP.erase({device_->id(), address_}); detail::DeallocateBuffer(this); } @@ -306,7 +309,7 @@ uint32_t Buffer::num_dev_pages() const { return this->num_pages(); } - return this->shard_spec().size() * this->num_cores(); + return this->shard_spec().size() * this->num_cores().value(); } CoreType Buffer::core_type() const { @@ -399,9 +402,9 @@ void Buffer::set_shard_spec(const ShardSpecBuffer& shard_spec) { this->buffer_page_mapping_ = nullptr; } -uint32_t Buffer::num_cores() const { +std::optional Buffer::num_cores() const { if (!is_sharded(this->buffer_layout_)) - return 1; + return std::nullopt; return this->shard_spec().tensor_shard_spec.grid.num_cores(); } @@ -433,10 +436,6 @@ DeviceAddr ShardSpecBuffer::size() const { return shape_in_pages_[0] * shape_in_pages_[1]; } -namespace detail { -buffer_map_t BUFFER_MAP = {}; -} - } // namespace tt_metal } // namespace tt diff --git a/tt_metal/impl/buffers/buffer.hpp b/tt_metal/impl/buffers/buffer.hpp index 8c4332de0cb..ec3cbb22aec 100644 --- a/tt_metal/impl/buffers/buffer.hpp +++ b/tt_metal/impl/buffers/buffer.hpp @@ -176,6 +176,8 @@ class Buffer final { TensorMemoryLayout buffer_layout() const { return buffer_layout_; } + bool bottom_up() const { return bottom_up_; } + uint32_t dram_channel_from_bank_id(uint32_t bank_id) const; CoreCoord logical_core_from_bank_id(uint32_t bank_id) const; @@ -199,7 +201,7 @@ class Buffer final { ShardSpecBuffer shard_spec() const; void set_shard_spec(const ShardSpecBuffer& shard_spec); - uint32_t num_cores() const; + std::optional num_cores() const; const std::shared_ptr& get_buffer_page_mapping(); @@ -231,7 +233,7 @@ class Buffer final { const DeviceAddr size_; // Size in bytes const BufferType buffer_type_; const TensorMemoryLayout buffer_layout_; - const std::optional bottom_up_; + const bool bottom_up_; std::atomic allocation_status_ = AllocationStatus::ALLOCATION_REQUESTED; DeviceAddr address_ = 0; @@ -252,36 +254,6 @@ class Buffer final { BufferPageMapping generate_buffer_page_mapping(const Buffer &buffer); -namespace detail { -using Deviceid = uint32_t; - -class buffer_map_t { - public: - void insert(std::tuple buf_attr, Buffer *buffer) { - std::scoped_lock lock(this->map_mutex); - this->map.insert({buf_attr, buffer}); - } - - void erase(std::tuple buf_attr) { - std::scoped_lock lock(this->map_mutex); - this->map.erase(buf_attr); - } - - std::map, Buffer *> value() { - std::scoped_lock lock(this->map_mutex); - return this->map; - } - - ~buffer_map_t() { TT_ASSERT(this->map.empty(), "Not all buffers deallocated by runtime!"); } - - private: - std::mutex map_mutex; - std::map, Buffer *> map = {}; -}; - -extern buffer_map_t BUFFER_MAP; -} // namespace detail - inline namespace v0 { using HostDataType = std::variant< diff --git a/tt_metal/impl/device/device.cpp b/tt_metal/impl/device/device.cpp index 0479807018f..8afff15103b 100644 --- a/tt_metal/impl/device/device.cpp +++ b/tt_metal/impl/device/device.cpp @@ -2949,10 +2949,8 @@ bool Device::close() { tt::Cluster::instance().l1_barrier(id_); allocator::clear(*this->allocator_); // After device close, no buffers on this device should be used - for (const auto &[buf_attr, buf] : detail::BUFFER_MAP.value()) { - if (std::get<0>(buf_attr) == this->id()) { - DeallocateBuffer(*buf); - } + for (const auto &buf : this->get_allocated_buffers()) { + DeallocateBuffer(*buf); } this->compute_cores_.clear(); @@ -3174,6 +3172,11 @@ void Device::dump_memory_blocks(const BufferType &buffer_type, std::ofstream &ou return allocator::dump_memory_blocks(*this->allocator_, buffer_type, out); } +const std::unordered_set &Device::get_allocated_buffers() const { + this->check_allocator_is_initialized(); + return allocator::get_allocated_buffers(*this->allocator_); +} + void Device::deallocate_buffers(){ allocator::deallocate_buffers(*allocator_); } diff --git a/tt_metal/impl/device/device.hpp b/tt_metal/impl/device/device.hpp index 7beb58f3ea8..dce53a1eae8 100644 --- a/tt_metal/impl/device/device.hpp +++ b/tt_metal/impl/device/device.hpp @@ -197,6 +197,8 @@ class Device { uint32_t get_noc_unicast_encoding(uint8_t noc_index, const CoreCoord& physical_core) const; uint32_t get_noc_multicast_encoding(uint8_t noc_index, const CoreRange& physical_cores) const; + const std::unordered_set &get_allocated_buffers() const; + void deallocate_buffers(); // machine epsilon diff --git a/tt_metal/impl/dispatch/command_queue.hpp b/tt_metal/impl/dispatch/command_queue.hpp index 64f6c5407b7..a840fd19b8a 100644 --- a/tt_metal/impl/dispatch/command_queue.hpp +++ b/tt_metal/impl/dispatch/command_queue.hpp @@ -478,9 +478,6 @@ using CompletionReaderQueue = LockFreeQueue; struct AllocBufferMetadata { Buffer* buffer; std::reference_wrapper allocator; - BufferType buffer_type; - uint32_t device_address; - bool bottom_up; }; struct RuntimeArgsMetadata { diff --git a/tt_metal/tt_metal.cpp b/tt_metal/tt_metal.cpp index f71c5c49302..001cec165e1 100644 --- a/tt_metal/tt_metal.cpp +++ b/tt_metal/tt_metal.cpp @@ -794,33 +794,29 @@ void CompileProgram(Device *device, Program &program, bool fd_bootloader_mode) { program.compile(device, fd_bootloader_mode); } -DeviceAddr AllocateBuffer(const Buffer *buffer, bool bottom_up) { - if(GraphTracker::instance().hook_allocate(buffer, bottom_up)) { - GraphTracker::instance().track_allocate(buffer, bottom_up); +DeviceAddr AllocateBuffer(Buffer *buffer) { + if(GraphTracker::instance().hook_allocate(buffer)) { + GraphTracker::instance().track_allocate(buffer); return 0; } - uint32_t allocated_addr; + DeviceAddr allocated_addr; if (is_sharded(buffer->buffer_layout())) { allocated_addr = allocator::allocate_buffer( *(buffer->device()->allocator_), - buffer->shard_spec().size() * buffer->num_cores() * buffer->page_size(), - buffer->page_size(), - buffer->buffer_type(), - bottom_up, - buffer->num_cores()); + buffer->shard_spec().size() * buffer->num_cores().value() * buffer->page_size(), + buffer); } else { allocated_addr = allocator::allocate_buffer( *(buffer->device()->allocator_), buffer->size(), - buffer->page_size(), - buffer->buffer_type(), - bottom_up, - std::nullopt); + buffer); } + // Assertion here because buffer class returns a u32 when address is queried + // Requires updating all use cases of buffer address to accept a u64 to remove TT_ASSERT(allocated_addr <= std::numeric_limits::max()); - GraphTracker::instance().track_allocate(buffer, bottom_up); + GraphTracker::instance().track_allocate(buffer); return allocated_addr; } @@ -831,7 +827,7 @@ void DeallocateBuffer(Buffer *buffer) { return; } - allocator::deallocate_buffer(*buffer->device()->allocator_, buffer->address(), buffer->buffer_type()); + allocator::deallocate_buffer(*buffer->device()->allocator_, buffer); } } // namespace detail diff --git a/ttnn/cpp/ttnn/graph/graph_processor.cpp b/ttnn/cpp/ttnn/graph/graph_processor.cpp index bebeadebd9d..882f9588a22 100644 --- a/ttnn/cpp/ttnn/graph/graph_processor.cpp +++ b/ttnn/cpp/ttnn/graph/graph_processor.cpp @@ -90,7 +90,7 @@ GraphProcessor::GraphProcessor(RunMode mode) : run_mode(mode) { end_function_any_map[typeid(std::reference_wrapper)] = [ptr = this] (const std::any& val) mutable {ptr->end_function_process_tensor(val);}; } -void GraphProcessor::track_allocate(const tt::tt_metal::Buffer* buffer, bool bottom_up) { +void GraphProcessor::track_allocate(const tt::tt_metal::Buffer* buffer) { const std::lock_guard lock(mutex); auto buf_id = add_buffer(buffer); @@ -478,7 +478,7 @@ nlohmann::json GraphProcessor::end_graph_capture() { return res; } -bool ProcessorHooks::hook_allocate(const tt::tt_metal::Buffer* buffer, bool bottom_up) { +bool ProcessorHooks::hook_allocate(const tt::tt_metal::Buffer* buffer) { return do_block; } diff --git a/ttnn/cpp/ttnn/graph/graph_processor.hpp b/ttnn/cpp/ttnn/graph/graph_processor.hpp index 4f7d6f1b6e7..83179dabe59 100644 --- a/ttnn/cpp/ttnn/graph/graph_processor.hpp +++ b/ttnn/cpp/ttnn/graph/graph_processor.hpp @@ -22,7 +22,7 @@ namespace ttnn::graph { public: ProcessorHooks() = default; - bool hook_allocate(const tt::tt_metal::Buffer* buffer, bool bottom_up) override; + bool hook_allocate(const tt::tt_metal::Buffer* buffer) override; bool hook_deallocate(tt::tt_metal::Buffer* buffer) override; @@ -40,7 +40,7 @@ namespace ttnn::graph { GraphProcessor(tt::tt_metal::IGraphProcessor::RunMode mode); ~GraphProcessor() override; - void track_allocate(const tt::tt_metal::Buffer* buffer, bool bottom_up) override; + void track_allocate(const tt::tt_metal::Buffer* buffer) override; void track_deallocate(tt::tt_metal::Buffer* buffer) override; diff --git a/ttnn/cpp/ttnn/reports.hpp b/ttnn/cpp/ttnn/reports.hpp index 9392f8eda7c..0eee2efedbc 100644 --- a/ttnn/cpp/ttnn/reports.hpp +++ b/ttnn/cpp/ttnn/reports.hpp @@ -7,6 +7,7 @@ #include #include "tt_metal/impl/buffers/buffer.hpp" +#include "tt_metal/impl/device/device_pool.hpp" namespace ttnn { @@ -64,49 +65,52 @@ struct BufferInfo { std::vector get_buffers() { std::vector buffer_infos; - for (const auto &[key, buffer] : tt::tt_metal::detail::BUFFER_MAP.value()) { - auto [device_id, address] = key; - auto device = buffer->device(); - - auto num_pages = buffer->num_pages(); - auto page_size = buffer->page_size(); - auto num_banks = device->num_banks(buffer->buffer_type()); - - std::map bank_to_num_pages; - if (buffer->buffer_layout() == tt::tt_metal::TensorMemoryLayout::INTERLEAVED) { - uint32_t bank_id = 0; - for (int page_index = 0; page_index < num_pages; page_index++) { - if (bank_to_num_pages.find(bank_id) == bank_to_num_pages.end()) { - bank_to_num_pages[bank_id] = 0; + for (const auto &device : tt::DevicePool::instance().get_all_active_devices()) { + for (const auto &buffer : device->get_allocated_buffers()) { + auto device_id = device->id(); + auto address = buffer->address(); + + auto num_pages = buffer->num_pages(); + auto page_size = buffer->page_size(); + auto num_banks = device->num_banks(buffer->buffer_type()); + + std::map bank_to_num_pages; + if (buffer->buffer_layout() == tt::tt_metal::TensorMemoryLayout::INTERLEAVED) { + uint32_t bank_id = 0; + for (int page_index = 0; page_index < num_pages; page_index++) { + if (bank_to_num_pages.find(bank_id) == bank_to_num_pages.end()) { + bank_to_num_pages[bank_id] = 0; + } + bank_to_num_pages[bank_id]++; + bank_id = (bank_id + 1) % num_banks; } - bank_to_num_pages[bank_id]++; - bank_id = (bank_id + 1) % num_banks; - } - } else { - const auto& buffer_page_mapping = *buffer->get_buffer_page_mapping(); - for (int page_index = 0; page_index < num_pages; page_index++) { - auto dev_page_index = buffer_page_mapping.host_page_to_dev_page_mapping_[page_index]; - auto core = buffer_page_mapping.all_cores_[buffer_page_mapping.dev_page_to_core_mapping_[dev_page_index]]; - auto bank_id = device->bank_ids_from_logical_core(buffer->buffer_type(), core)[0]; - - if (bank_to_num_pages.find(bank_id) == bank_to_num_pages.end()) { - bank_to_num_pages[bank_id] = 0; + } else { + const auto &buffer_page_mapping = *buffer->get_buffer_page_mapping(); + for (int page_index = 0; page_index < num_pages; page_index++) { + auto dev_page_index = buffer_page_mapping.host_page_to_dev_page_mapping_[page_index]; + auto core = + buffer_page_mapping.all_cores_[buffer_page_mapping.dev_page_to_core_mapping_[dev_page_index]]; + auto bank_id = device->bank_ids_from_logical_core(buffer->buffer_type(), core)[0]; + + if (bank_to_num_pages.find(bank_id) == bank_to_num_pages.end()) { + bank_to_num_pages[bank_id] = 0; + } + bank_to_num_pages[bank_id]++; } - bank_to_num_pages[bank_id]++; } - } - auto max_num_pages = - std::max_element(bank_to_num_pages.begin(), bank_to_num_pages.end(), [](const auto &a, const auto &b) { - return a.second < b.second; - }); - - BufferInfo buffer_info = {}; - buffer_info.device_id = device_id; - buffer_info.address = address; - buffer_info.max_size_per_bank = (*max_num_pages).second * page_size; - buffer_info.buffer_type = buffer->buffer_type(); - buffer_infos.push_back(buffer_info); + auto max_num_pages = + std::max_element(bank_to_num_pages.begin(), bank_to_num_pages.end(), [](const auto &a, const auto &b) { + return a.second < b.second; + }); + + BufferInfo buffer_info = {}; + buffer_info.device_id = device_id; + buffer_info.address = address; + buffer_info.max_size_per_bank = (*max_num_pages).second * page_size; + buffer_info.buffer_type = buffer->buffer_type(); + buffer_infos.push_back(buffer_info); + } } return buffer_infos; } @@ -125,23 +129,35 @@ struct BufferPageInfo { std::vector get_buffer_pages() { std::vector buffer_page_infos; - for (const auto &[key, buffer] : tt::tt_metal::detail::BUFFER_MAP.value()) { - if (not buffer->is_l1()) { - continue; - } + for (const auto &device : tt::DevicePool::instance().get_all_active_devices()) { + for (const auto &buffer : device->get_allocated_buffers()) { + if (not buffer->is_l1()) { + continue; + } - auto [device_id, address] = key; - auto device = buffer->device(); + auto device_id = device->id(); + auto address = buffer->address(); - uint32_t page_size = buffer->page_size(); - auto num_pages = buffer->num_pages(); - auto num_banks = device->num_banks(buffer->buffer_type()); + auto page_size = buffer->page_size(); + auto num_pages = buffer->num_pages(); + auto num_banks = device->num_banks(buffer->buffer_type()); - if (buffer->buffer_layout() == tt::tt_metal::TensorMemoryLayout::INTERLEAVED) { uint32_t bank_id = 0; for (int page_index = 0; page_index < num_pages; page_index++) { - auto page_address = buffer->page_address(bank_id, page_index); - auto core = buffer->logical_core_from_bank_id(bank_id); + CoreCoord core; + DeviceAddr page_address = 0; + + if (buffer->buffer_layout() == tt::tt_metal::TensorMemoryLayout::INTERLEAVED) { + page_address = buffer->page_address(bank_id, page_index); + core = buffer->logical_core_from_bank_id(bank_id); + bank_id = (bank_id + 1) % num_banks; + } else { + const auto &buffer_page_mapping = *buffer->get_buffer_page_mapping(); + auto dev_page_index = buffer_page_mapping.host_page_to_dev_page_mapping_[page_index]; + core = buffer_page_mapping.all_cores_[buffer_page_mapping.dev_page_to_core_mapping_[dev_page_index]]; + bank_id = device->bank_ids_from_logical_core(buffer->buffer_type(), core)[0]; + page_address = buffer->sharded_page_address(bank_id, dev_page_index); + } BufferPageInfo buffer_page_info = {}; buffer_page_info.device_id = device_id; @@ -153,28 +169,7 @@ std::vector get_buffer_pages() { buffer_page_info.page_address = page_address; buffer_page_info.page_size = page_size; buffer_page_info.buffer_type = buffer->buffer_type(); - buffer_page_infos.push_back(buffer_page_info); - bank_id = (bank_id + 1) % num_banks; - } - } else { - const auto& buffer_page_mapping = *buffer->get_buffer_page_mapping(); - for (int page_index = 0; page_index < num_pages; page_index++) { - auto dev_page_index = buffer_page_mapping.host_page_to_dev_page_mapping_[page_index]; - auto core = buffer_page_mapping.all_cores_[buffer_page_mapping.dev_page_to_core_mapping_[dev_page_index]]; - auto bank_id = device->bank_ids_from_logical_core(buffer->buffer_type(), core)[0]; - auto page_address = buffer->sharded_page_address(bank_id, dev_page_index); - - BufferPageInfo buffer_page_info = {}; - buffer_page_info.device_id = device_id; - buffer_page_info.address = address; - buffer_page_info.core_y = core.y; - buffer_page_info.core_x = core.x; - buffer_page_info.bank_id = bank_id; - buffer_page_info.page_index = page_index; - buffer_page_info.page_address = page_address; - buffer_page_info.page_size = page_size; - buffer_page_info.buffer_type = buffer->buffer_type(); buffer_page_infos.push_back(buffer_page_info); } }