From 25a35e333ace1772d2c0ab1854162df78e9d66d3 Mon Sep 17 00:00:00 2001 From: Aditya Saigal <129097327+tt-asaigal@users.noreply.github.com> Date: Mon, 28 Oct 2024 19:12:01 -0400 Subject: [PATCH] #0: Move SynchronizeWorkerThreads to tt_metal::detail namespace (#14322) - This is a core tt_metal API and shouldn't be in tensor_ops - Also account for worker thread deadlock when calling this API --- tt_metal/detail/tt_metal.hpp | 2 ++ tt_metal/tt_metal.cpp | 17 +++++++++++++++++ ttnn/cpp/ttnn/tensor/tensor_ops.cpp | 16 +--------------- 3 files changed, 20 insertions(+), 15 deletions(-) diff --git a/tt_metal/detail/tt_metal.hpp b/tt_metal/detail/tt_metal.hpp index e5464e721a6..a4e84ef12b4 100644 --- a/tt_metal/detail/tt_metal.hpp +++ b/tt_metal/detail/tt_metal.hpp @@ -279,5 +279,7 @@ inline namespace v0 { DeviceAddr AllocateBuffer(Buffer* buffer); void DeallocateBuffer(Buffer *buffer); + + void SynchronizeWorkerThreads(const std::vector& workers); } // namespace detail } // namespace tt::tt_metal diff --git a/tt_metal/tt_metal.cpp b/tt_metal/tt_metal.cpp index 001cec165e1..5b56a11e844 100644 --- a/tt_metal/tt_metal.cpp +++ b/tt_metal/tt_metal.cpp @@ -830,6 +830,23 @@ void DeallocateBuffer(Buffer *buffer) { allocator::deallocate_buffer(*buffer->device()->allocator_, buffer); } +void SynchronizeWorkerThreads(const std::vector& workers) { + if (tt::tt_metal::detail::InWorkerThread()) { + // Early exit if in a worker thread, since waiting for the worker + // queue to become empty inside a worker thread leads to a deadlock + // Synchronizing in a worker thread should be a nop by definition + return; + } + // Push empty work to threads and ensure its been picked up + for (auto target_device : workers) { + target_device->work_executor.push_work([](){}); + } + // Block until work has been picked up, to flush the queue + for (auto target_device : workers) { + while(not target_device->work_executor.worker_queue.empty()); + } +} + } // namespace detail inline namespace v0 { diff --git a/ttnn/cpp/ttnn/tensor/tensor_ops.cpp b/ttnn/cpp/ttnn/tensor/tensor_ops.cpp index c133a1aff71..c2460932205 100644 --- a/ttnn/cpp/ttnn/tensor/tensor_ops.cpp +++ b/ttnn/cpp/ttnn/tensor/tensor_ops.cpp @@ -23,20 +23,6 @@ #include "ttnn/core.hpp" -namespace{ - inline void SynchronizeWorkerThreads(const std::vector& workers) { - // Push empty work to threads and ensure its been picked up - for (auto target_device : workers) { - target_device->work_executor.push_work([](){}); - } - // Block until work has been picked up, to flush the queue - for (auto target_device : workers) { - while(not target_device->work_executor.worker_queue.empty()); - } - } -} - - namespace tt::tt_metal::tensor_ops { Tensor tensor_to(const Tensor& input_tensor, Device* target_device, const MemoryConfig& mem_config) { @@ -147,7 +133,7 @@ Tensor tensor_cpu(const Tensor& input_tensor, bool blocking, uint8_t cq_id) { } if (blocking) { - SynchronizeWorkerThreads(workers); + tt::tt_metal::detail::SynchronizeWorkerThreads(workers); } // Update main_thread_ref_count for tensor after pushing to queue. input_tensor.tensor_attributes->update_main_thread_ref_count(workers.at(0), original_tensor_ref_count);