#0: Move SynchronizeWorkerThreads to tt_metal::detail namespace (#14322)

- This is a core tt_metal API and shouldn't be in tensor_ops - Also account for worker thread deadlock when calling this API
tenstorrent · Oct 28, 2024 · 25a35e3 · 25a35e3
1 parent cbcd5b3
commit 25a35e3
Show file tree

Hide file tree

Showing 3 changed files with 20 additions and 15 deletions.
diff --git a/tt_metal/detail/tt_metal.hpp b/tt_metal/detail/tt_metal.hpp
@@ -279,5 +279,7 @@ inline namespace v0 {
         DeviceAddr AllocateBuffer(Buffer* buffer);
 
         void DeallocateBuffer(Buffer *buffer);
+
+        void SynchronizeWorkerThreads(const std::vector<Device*>& workers);
     }  // namespace detail
 }  // namespace tt::tt_metal
diff --git a/tt_metal/tt_metal.cpp b/tt_metal/tt_metal.cpp
@@ -830,6 +830,23 @@ void DeallocateBuffer(Buffer *buffer) {
     allocator::deallocate_buffer(*buffer->device()->allocator_, buffer);
 }
 
+void SynchronizeWorkerThreads(const std::vector<Device*>& workers) {
+    if (tt::tt_metal::detail::InWorkerThread()) {
+        // Early exit if in a worker thread, since waiting for the worker
+        // queue to become empty inside a worker thread leads to a deadlock
+        // Synchronizing in a worker thread should be a nop by definition
+        return;
+    }
+    // Push empty work to threads and ensure its been picked up
+    for (auto target_device : workers) {
+        target_device->work_executor.push_work([](){});
+    }
+    // Block until work has been picked up, to flush the queue
+    for (auto target_device : workers) {
+        while(not target_device->work_executor.worker_queue.empty());
+    }
+}
+
 }  // namespace detail
 
 inline namespace v0 {

diff --git a/ttnn/cpp/ttnn/tensor/tensor_ops.cpp b/ttnn/cpp/ttnn/tensor/tensor_ops.cpp
@@ -23,20 +23,6 @@
 #include "ttnn/core.hpp"
 
 
-namespace{
-    inline void SynchronizeWorkerThreads(const std::vector<Device*>& workers) {
-        // Push empty work to threads and ensure its been picked up
-        for (auto target_device : workers) {
-            target_device->work_executor.push_work([](){});
-        }
-        // Block until work has been picked up, to flush the queue
-        for (auto target_device : workers) {
-            while(not target_device->work_executor.worker_queue.empty());
-        }
-    }
-}
-
-
 namespace tt::tt_metal::tensor_ops {
 
 Tensor tensor_to(const Tensor& input_tensor, Device* target_device, const MemoryConfig& mem_config) {
@@ -147,7 +133,7 @@ Tensor tensor_cpu(const Tensor& input_tensor, bool blocking, uint8_t cq_id) {
     }
 
     if (blocking) {
-        SynchronizeWorkerThreads(workers);
+        tt::tt_metal::detail::SynchronizeWorkerThreads(workers);
     }
     // Update main_thread_ref_count for tensor after pushing to queue.
     input_tensor.tensor_attributes->update_main_thread_ref_count(workers.at(0), original_tensor_ref_count);