From c92c08bc4e36eb1e28a6fc8421d95c9aac0d3e0b Mon Sep 17 00:00:00 2001 From: asaigal Date: Thu, 27 Jun 2024 22:07:21 +0000 Subject: [PATCH] #0: Use CV to wait for cq_reader in production mode. Remove enqueue_record_event for NB calls --- .../tools/profiler/test_device_profiler.py | 8 +++---- .../command_queue/test_events.cpp | 6 ++--- tt_metal/impl/dispatch/command_queue.cpp | 22 +++++++------------ tt_metal/impl/dispatch/command_queue.hpp | 2 ++ 4 files changed, 17 insertions(+), 21 deletions(-) diff --git a/tests/tt_metal/tools/profiler/test_device_profiler.py b/tests/tt_metal/tools/profiler/test_device_profiler.py index 05071a862a6e..6a88acd164b3 100644 --- a/tests/tt_metal/tools/profiler/test_device_profiler.py +++ b/tests/tt_metal/tools/profiler/test_device_profiler.py @@ -145,12 +145,12 @@ def test_dispatch_cores(): ZONE_COUNT = 37 REF_COUNT_DICT = { "grayskull": { - "Tensix CQ Dispatch": 33, - "Tensix CQ Prefetch": 36, + "Tensix CQ Dispatch": 18, + "Tensix CQ Prefetch": 21, }, "wormhole_b0": { - "Tensix CQ Dispatch": 33, - "Tensix CQ Prefetch": 36, + "Tensix CQ Dispatch": 18, + "Tensix CQ Prefetch": 21, }, } diff --git a/tests/tt_metal/tt_metal/unit_tests_fast_dispatch/command_queue/test_events.cpp b/tests/tt_metal/tt_metal/unit_tests_fast_dispatch/command_queue/test_events.cpp index f0165ecd210d..689b6c74a11b 100644 --- a/tests/tt_metal/tt_metal/unit_tests_fast_dispatch/command_queue/test_events.cpp +++ b/tests/tt_metal/tt_metal/unit_tests_fast_dispatch/command_queue/test_events.cpp @@ -40,9 +40,9 @@ TEST_F(CommandQueueFixture, TestEventsDataMovementWrittenToCompletionQueueInOrde buffers.push_back(std::make_shared(this->device_, page_size, page_size, BufferType::DRAM)); if (data_movement_mode == DataMovementMode::WRITE) { - EnqueueWriteBuffer(this->device_->command_queue(), buffers.back(), page, false); + EnqueueWriteBuffer(this->device_->command_queue(), buffers.back(), page, true); } else if (data_movement_mode == DataMovementMode::READ) { - EnqueueReadBuffer(this->device_->command_queue(), buffers.back(), page, false); + EnqueueReadBuffer(this->device_->command_queue(), buffers.back(), page, true); } } Finish(this->device_->command_queue()); @@ -280,7 +280,7 @@ TEST_F(CommandQueueFixture, TestEventsMixedWriteBufferRecordWaitSynchronize) { EXPECT_EQ(event->event_id, cmds_issued_per_cq); std::shared_ptr buf = std::make_shared(this->device_, page_size, page_size, BufferType::DRAM); - EnqueueWriteBuffer(this->device_->command_queue(), buf, page, false); + EnqueueWriteBuffer(this->device_->command_queue(), buf, page, true); EnqueueWaitForEvent(this->device_->command_queue(), event); if (i % 10 == 0) { diff --git a/tt_metal/impl/dispatch/command_queue.cpp b/tt_metal/impl/dispatch/command_queue.cpp index 89452692984d..0a2d94fcc170 100644 --- a/tt_metal/impl/dispatch/command_queue.cpp +++ b/tt_metal/impl/dispatch/command_queue.cpp @@ -1537,9 +1537,6 @@ void HWCommandQueue::enqueue_read_buffer(Buffer& buffer, void* dst, bool blockin } if (blocking) { this->finish(); - } else { - std::shared_ptr event = std::make_shared(); - this->enqueue_record_event(event); } } else { // this is a streaming command so we don't need to break down to multiple @@ -1564,10 +1561,6 @@ void HWCommandQueue::enqueue_read_buffer(Buffer& buffer, void* dst, bool blockin src_page_index)); this->enqueue_command(command, blocking); this->increment_num_entries_in_completion_q(); - if (not blocking) { // should this be unconditional? - std::shared_ptr event = std::make_shared(); - this->enqueue_record_event(event); - } } } @@ -1797,9 +1790,6 @@ void HWCommandQueue::enqueue_write_buffer(const Buffer& buffer, const void* src, if (blocking) { this->finish(); - } else { - std::shared_ptr event = std::make_shared(); - this->enqueue_record_event(event); } } @@ -1927,9 +1917,6 @@ void HWCommandQueue::enqueue_trace(const uint32_t trace_id, bool blocking) { if (blocking) { this->finish(); - } else { - std::shared_ptr event = std::make_shared(); - this->enqueue_record_event(event); } } @@ -2184,6 +2171,10 @@ void HWCommandQueue::read_completion_queue() { read_descriptor); } this->num_completed_completion_q_reads += num_events_to_read; + { + std::unique_lock lock(this->reads_processed_cv_mutex); + this->reads_processed_cv.notify_one(); + } } else if (this->exit_condition) { return; } @@ -2210,7 +2201,10 @@ void HWCommandQueue::finish() { } } } else { - while (this->num_entries_in_completion_q > this->num_completed_completion_q_reads); + std::unique_lock lock(this->reads_processed_cv_mutex); + this->reads_processed_cv.wait(lock, [this] { + return this->num_entries_in_completion_q == this->num_completed_completion_q_reads; + }); } } diff --git a/tt_metal/impl/dispatch/command_queue.hpp b/tt_metal/impl/dispatch/command_queue.hpp index a371e8c8768f..9238e199c1ac 100644 --- a/tt_metal/impl/dispatch/command_queue.hpp +++ b/tt_metal/impl/dispatch/command_queue.hpp @@ -528,6 +528,8 @@ class HWCommandQueue { std::condition_variable reader_thread_cv; std::mutex reader_thread_cv_mutex; + std::condition_variable reads_processed_cv; + std::mutex reads_processed_cv_mutex; CoreType get_dispatch_core_type(); void copy_into_user_space(