diff --git a/conftest.py b/conftest.py index 6962e216c02..f872bce2998 100644 --- a/conftest.py +++ b/conftest.py @@ -274,7 +274,6 @@ def device(request, device_params): yield device ttl.device.DumpDeviceProfiler(device) - ttl.device.DeallocateBuffers(device) ttl.device.Synchronize(device) ttl.device.CloseDevice(device) @@ -293,7 +292,6 @@ def pcie_devices(request, device_params): for device in devices.values(): ttl.device.DumpDeviceProfiler(device) - ttl.device.DeallocateBuffers(device) ttl.device.CloseDevices(devices) @@ -311,7 +309,6 @@ def all_devices(request, device_params): for device in devices.values(): ttl.device.DumpDeviceProfiler(device) - ttl.device.DeallocateBuffers(device) ttl.device.CloseDevices(devices) @@ -337,7 +334,6 @@ def device_mesh(request, silicon_arch_name, silicon_arch_wormhole_b0, device_par for device in device_mesh.get_devices(): ttl.device.DumpDeviceProfiler(device) - ttl.device.DeallocateBuffers(device) ttnn.close_device_mesh(device_mesh) del device_mesh @@ -364,7 +360,6 @@ def pcie_device_mesh(request, silicon_arch_name, silicon_arch_wormhole_b0, devic for device in device_mesh.get_devices(): ttl.device.DumpDeviceProfiler(device) - ttl.device.DeallocateBuffers(device) ttnn.close_device_mesh(device_mesh) del device_mesh @@ -393,7 +388,6 @@ def t3k_device_mesh(request, silicon_arch_name, silicon_arch_wormhole_b0, device for device in device_mesh.get_devices(): ttl.device.DumpDeviceProfiler(device) - ttl.device.DeallocateBuffers(device) ttnn.close_device_mesh(device_mesh) del device_mesh diff --git a/tt_eager/tt_lib/csrc/tt_lib_bindings.cpp b/tt_eager/tt_lib/csrc/tt_lib_bindings.cpp index fd536c6d4dc..915276dfa39 100644 --- a/tt_eager/tt_lib/csrc/tt_lib_bindings.cpp +++ b/tt_eager/tt_lib/csrc/tt_lib_bindings.cpp @@ -211,7 +211,12 @@ void DeviceModule(py::module &m_device) { | last_dump | Last dump before process dies | bool | | No | +------------------+----------------------------------+-----------------------+-------------+----------+ )doc"); - m_device.def("DeallocateBuffers", &detail::DeallocateBuffers, R"doc( + m_device.def("DeallocateBuffers", + [] (Device* device) { + device->push_work([device] () mutable { + device->deallocate_buffers(); + }); + }, R"doc( Deallocate all buffers associated with Device handle )doc"); m_device.def("BeginTraceCapture", diff --git a/tt_metal/impl/device/device.cpp b/tt_metal/impl/device/device.cpp index dcee0f81305..74d02a2f205 100644 --- a/tt_metal/impl/device/device.cpp +++ b/tt_metal/impl/device/device.cpp @@ -1651,7 +1651,7 @@ bool Device::close() { } hw_command_queue->terminate(); } - + this->work_executor.reset(); tt_metal::detail::DumpDeviceProfileResults(this, true); this->trace_buffer_pool_.clear(); @@ -1802,7 +1802,6 @@ bool Device::close() { this->sw_command_queues_.clear(); this->hw_command_queues_.clear(); this->sysmem_manager_.reset(); - this->work_executor.reset(); this->allocator_.reset(); this->initialized_ = false; diff --git a/tt_metal/impl/dispatch/work_executor.hpp b/tt_metal/impl/dispatch/work_executor.hpp index aaf00530402..9a7461569b4 100644 --- a/tt_metal/impl/dispatch/work_executor.hpp +++ b/tt_metal/impl/dispatch/work_executor.hpp @@ -116,6 +116,7 @@ class WorkExecutor { if (this->work_executor_mode == WorkExecutorMode::ASYNCHRONOUS) { stop_worker(); } + this->work_executor_mode = WorkExecutorMode::SYNCHRONOUS; } inline void run_worker() {