Skip to content

Commit

Permalink
#0: Avoid using std::atomic::wait, it seems to have perf implications
Browse files Browse the repository at this point in the history
  • Loading branch information
sminakov-tt committed Oct 24, 2024
1 parent 16946d0 commit 0c159c7
Show file tree
Hide file tree
Showing 2 changed files with 16 additions and 9 deletions.
22 changes: 13 additions & 9 deletions tt_metal/impl/buffers/buffer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -227,8 +227,10 @@ std::shared_ptr<Buffer> Buffer::create(
buffer->address_ = detail::AllocateBuffer(buffer.get(), bottom_up);
detail::BUFFER_MAP.insert({buffer->device_->id(), buffer->address_}, buffer.get());

buffer->allocation_status_.store(AllocationStatus::ALLOCATED, std::memory_order::release);
buffer->allocation_status_.notify_all();
std::unique_lock lock(buffer->allocation_mutex_);
buffer->allocation_status_.store(AllocationStatus::ALLOCATED, std::memory_order::relaxed);
lock.unlock();
buffer->allocation_cv_.notify_all();
});

return buffer;
Expand Down Expand Up @@ -263,23 +265,25 @@ void Buffer::deallocate_impl() {
}

bool Buffer::is_allocated() const {
if (deallocation_requested_.load(std::memory_order::relaxed)) {
return false;
}

auto allocation_status = allocation_status_.load(std::memory_order::relaxed);

if (device_->can_use_passthrough_scheduling()) {
return allocation_status == AllocationStatus::ALLOCATED;
}

// For calls from different threads we consider buffer to be allocated even if it's just ALLOCATION_REQUESTED,
// because once the caller will try to access it, the buffer will already be fully allocated
return allocation_status == AllocationStatus::ALLOCATION_REQUESTED || allocation_status == AllocationStatus::ALLOCATED;
// because once the caller will try to access it, the buffer will already be fully allocated. For the same reason we need to check deallocation_requested_ too.
bool deallocation_requested = deallocation_requested_.load(std::memory_order::relaxed);
return (allocation_status == AllocationStatus::ALLOCATION_REQUESTED || allocation_status == AllocationStatus::ALLOCATED) && !deallocation_requested;
}

uint32_t Buffer::address() const {
allocation_status_.wait(AllocationStatus::ALLOCATION_REQUESTED, std::memory_order::acquire);
if (device_->can_use_passthrough_scheduling()) {
return address_;
}

std::unique_lock lock(allocation_mutex_);
allocation_cv_.wait(lock, [this] { return this->allocation_status_.load(std::memory_order::relaxed) != AllocationStatus::ALLOCATION_REQUESTED; });
return address_;
}

Expand Down
3 changes: 3 additions & 0 deletions tt_metal/impl/buffers/buffer.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@

#include <map>
#include <mutex>
#include <condition_variable>
#include <optional>

#include "common/bfloat16.hpp"
Expand Down Expand Up @@ -234,6 +235,8 @@ class Buffer final {

std::atomic<AllocationStatus> allocation_status_ = AllocationStatus::ALLOCATION_REQUESTED;
DeviceAddr address_ = 0;
mutable std::mutex allocation_mutex_;
mutable std::condition_variable allocation_cv_;
// Used exclusively for is_allocated() method
std::atomic<bool> deallocation_requested_ = false;

Expand Down

0 comments on commit 0c159c7

Please sign in to comment.