From ea957ac5e465b464b6ef3026256873c2a034c671 Mon Sep 17 00:00:00 2001 From: Kyle Mabee Date: Thu, 29 Aug 2024 21:37:46 +0000 Subject: [PATCH] #12046: Populate/use Buffer ctor "allocate" field in CreateBuffer() & deallocate() - Buffer constructor already had "allocate" field which protected against Buffer::allocate() from being called, and BUFFER_MAP insert. - Now, it's driven by CreateBuffer, added to Shard/Interleave configs, stored as state in Buffer class, and respected by Buffer::deallocate() to early exit if !allocate. - Solves segfault on metal Device::close() in tt-mlir where Buffer is created as view over memory and lifetime (alloc/dealloc) is managed by tt-mlir, but attempted to be deallocated again during teardown, so will be created with allocate=false now. --- tt_metal/impl/buffers/buffer.cpp | 22 ++++++++++++++++------ tt_metal/impl/buffers/buffer.hpp | 6 +++++- tt_metal/tt_metal.cpp | 6 ++++-- 3 files changed, 25 insertions(+), 9 deletions(-) diff --git a/tt_metal/impl/buffers/buffer.cpp b/tt_metal/impl/buffers/buffer.cpp index d4a926416b7..402a90df85c 100644 --- a/tt_metal/impl/buffers/buffer.cpp +++ b/tt_metal/impl/buffers/buffer.cpp @@ -129,7 +129,8 @@ Buffer::Buffer( buffer_layout_(buffer_layout), shard_parameters_(shard_parameters), bottom_up_(bottom_up), - buffer_page_mapping_(nullptr) { + buffer_page_mapping_(nullptr), + allocate_(allocate) { TT_FATAL(this->device_ != nullptr and this->device_->allocator_ != nullptr); validate_buffer_size_and_page_size(size, page_size, buffer_type, buffer_layout, shard_parameters); if (allocate) { @@ -205,8 +206,11 @@ Buffer::Buffer(const Buffer &other) : buffer_layout_(other.buffer_layout_), shard_parameters_(other.shard_parameters_), bottom_up_(other.bottom_up_), - buffer_page_mapping_(other.buffer_page_mapping_) { - this->allocate(); + buffer_page_mapping_(other.buffer_page_mapping_), + allocate_(other.allocate_) { + if (this->allocate_) { + this->allocate(); + } } Buffer &Buffer::operator=(const Buffer &other) { @@ -219,7 +223,10 @@ Buffer &Buffer::operator=(const Buffer &other) { this->shard_parameters_ = other.shard_parameters_; this->bottom_up_ = other.bottom_up_; this->buffer_page_mapping_ = other.buffer_page_mapping_; - this->allocate(); + this->allocate_ = other.allocate_; + if (this->allocate_) { + this->allocate(); + } } return *this; } @@ -233,7 +240,8 @@ Buffer::Buffer(Buffer &&other) : buffer_layout_(other.buffer_layout_), shard_parameters_(std::move(other.shard_parameters_)), bottom_up_(other.bottom_up_), - buffer_page_mapping_(std::move(other.buffer_page_mapping_)) { + buffer_page_mapping_(std::move(other.buffer_page_mapping_)), + allocate_(other.allocate_) { // Set `other.device_` to be nullptr so destroying other does not deallocate reserved address space that is // transferred to `this` other.device_ = nullptr; @@ -250,6 +258,7 @@ Buffer &Buffer::operator=(Buffer &&other) { this->shard_parameters_ = std::move(other.shard_parameters_); this->bottom_up_ = other.bottom_up_; this->buffer_page_mapping_ = std::move(other.buffer_page_mapping_); + this->allocate_ = other.allocate_; // Set `other.device_` to be nullptr so destroying other does not deallocate reserved address space that is // transferred to `this` other.device_ = nullptr; @@ -325,7 +334,8 @@ const std::shared_ptr& Buffer::get_buffer_page_mapping( } void Buffer::deallocate() { - if (this->device_ == nullptr or not this->device_->initialized_ or this->size_ == 0) { + + if (this->device_ == nullptr or not this->device_->initialized_ or this->size_ == 0 or not this->allocate_) { return; } // Mark as deallocated diff --git a/tt_metal/impl/buffers/buffer.hpp b/tt_metal/impl/buffers/buffer.hpp index d02142fc4df..0eeaa459c92 100644 --- a/tt_metal/impl/buffers/buffer.hpp +++ b/tt_metal/impl/buffers/buffer.hpp @@ -114,6 +114,7 @@ struct BufferConfig { uint64_t page_size; // Size of unit being interleaved. For non-interleaved buffers: size == page_size BufferType buffer_type; TensorMemoryLayout buffer_layout = TensorMemoryLayout::INTERLEAVED; + bool allocate = true; }; typedef BufferConfig InterleavedBufferConfig; @@ -127,6 +128,7 @@ struct ShardedBufferConfig { BufferType buffer_type = BufferType::L1; TensorMemoryLayout buffer_layout = TensorMemoryLayout::HEIGHT_SHARDED; ShardSpecBuffer shard_parameters; + bool allocate = true; }; bool is_sharded(const TensorMemoryLayout &layout); @@ -152,7 +154,8 @@ class Buffer { buffer_type_(BufferType::DRAM), buffer_layout_(TensorMemoryLayout::INTERLEAVED), shard_parameters_(std::nullopt), - bottom_up_(std::nullopt) {} + bottom_up_(std::nullopt), + allocate_(true) {} Buffer( Device *device, @@ -277,6 +280,7 @@ class Buffer { TensorMemoryLayout buffer_layout_; std::optional shard_parameters_; std::shared_ptr buffer_page_mapping_; + bool allocate_ = true; protected: std::optional bottom_up_; }; diff --git a/tt_metal/tt_metal.cpp b/tt_metal/tt_metal.cpp index 3812ae082cb..4cffba7db5a 100644 --- a/tt_metal/tt_metal.cpp +++ b/tt_metal/tt_metal.cpp @@ -976,7 +976,7 @@ uint32_t CreateSemaphore( std::shared_ptr CreateBuffer(const InterleavedBufferConfig &config) { return std::make_shared( - config.device, config.size, config.page_size, config.buffer_type, config.buffer_layout); + config.device, config.size, config.page_size, config.buffer_type, config.buffer_layout, std::nullopt, std::nullopt, config.allocate); } std::shared_ptr CreateBuffer(const ShardedBufferConfig &config) { @@ -986,7 +986,9 @@ std::shared_ptr CreateBuffer(const ShardedBufferConfig &config) { config.page_size, config.buffer_type, config.buffer_layout, - config.shard_parameters); + config.shard_parameters, + std::nullopt, + config.allocate); } void DeallocateBuffer(Buffer &buffer) { buffer.deallocate(); }