From 4235b0dd1cd2fd6ef2f3be8a3bc1b4785a7e299a Mon Sep 17 00:00:00 2001 From: Connor Fitzgerald Date: Tue, 5 Sep 2023 14:06:33 -0400 Subject: [PATCH] Fix D3D12 Surface Leak (#4106) --- wgpu-core/src/device/global.rs | 26 +++++++++++++++++++------- wgpu-core/src/device/mod.rs | 3 +-- wgpu-core/src/present.rs | 14 +++++++++++++- wgpu-hal/src/dx12/device.rs | 5 ++++- wgpu-hal/src/dx12/mod.rs | 33 ++++++++++++++++++++++++--------- wgpu-hal/src/lib.rs | 16 ++++++++++++++++ wgpu-hal/src/vulkan/device.rs | 2 +- wgpu-hal/src/vulkan/instance.rs | 9 +++++---- 8 files changed, 83 insertions(+), 25 deletions(-) diff --git a/wgpu-core/src/device/global.rs b/wgpu-core/src/device/global.rs index 8fe5a6fcc9..632c83e37f 100644 --- a/wgpu-core/src/device/global.rs +++ b/wgpu-core/src/device/global.rs @@ -2134,7 +2134,7 @@ impl Global { let (mut surface_guard, mut token) = self.surfaces.write(&mut token); let (adapter_guard, mut token) = hub.adapters.read(&mut token); - let (device_guard, _token) = hub.devices.read(&mut token); + let (device_guard, mut token) = hub.devices.read(&mut token); let error = 'outer: loop { let device = match device_guard.get(device_id) { @@ -2207,6 +2207,24 @@ impl Global { break error; } + // Wait for all work to finish before configuring the surface. + if let Err(e) = device.maintain(hub, wgt::Maintain::Wait, &mut token) { + break e.into(); + } + + // All textures must be destroyed before the surface can be re-configured. + if let Some(present) = surface.presentation.take() { + if present.acquired_texture.is_some() { + break E::PreviousOutputExists; + } + } + + // TODO: Texture views may still be alive that point to the texture. + // this will allow the user to render to the surface texture, long after + // it has been removed. + // + // https://github.com/gfx-rs/wgpu/issues/4105 + match unsafe { A::get_surface_mut(surface) .unwrap() @@ -2226,12 +2244,6 @@ impl Global { } } - if let Some(present) = surface.presentation.take() { - if present.acquired_texture.is_some() { - break E::PreviousOutputExists; - } - } - surface.presentation = Some(present::Presentation { device_id: Stored { value: id::Valid(device_id), diff --git a/wgpu-core/src/device/mod.rs b/wgpu-core/src/device/mod.rs index 0ae6d7a2dd..9a77bf9536 100644 --- a/wgpu-core/src/device/mod.rs +++ b/wgpu-core/src/device/mod.rs @@ -1,6 +1,5 @@ use crate::{ binding_model, - device::life::WaitIdleError, hal_api::HalApi, hub::Hub, id, @@ -24,7 +23,7 @@ pub mod queue; pub mod resource; #[cfg(any(feature = "trace", feature = "replay"))] pub mod trace; -pub use resource::Device; +pub use {life::WaitIdleError, resource::Device}; pub const SHADER_STAGE_COUNT: usize = 3; // Should be large enough for the largest possible texture row. This diff --git a/wgpu-core/src/present.rs b/wgpu-core/src/present.rs index 1303769d29..7366934d27 100644 --- a/wgpu-core/src/present.rs +++ b/wgpu-core/src/present.rs @@ -15,7 +15,7 @@ use std::borrow::Borrow; use crate::device::trace::Action; use crate::{ conv, - device::{DeviceError, MissingDownlevelFlags}, + device::{DeviceError, MissingDownlevelFlags, WaitIdleError}, global::Global, hal_api::HalApi, hub::Token, @@ -96,6 +96,18 @@ pub enum ConfigureSurfaceError { }, #[error("Requested usage is not supported")] UnsupportedUsage, + #[error("Gpu got stuck :(")] + StuckGpu, +} + +impl From for ConfigureSurfaceError { + fn from(e: WaitIdleError) -> Self { + match e { + WaitIdleError::Device(d) => ConfigureSurfaceError::Device(d), + WaitIdleError::WrongSubmissionIndex(..) => unreachable!(), + WaitIdleError::StuckGpu => ConfigureSurfaceError::StuckGpu, + } + } } #[repr(C)] diff --git a/wgpu-hal/src/dx12/device.rs b/wgpu-hal/src/dx12/device.rs index 1a44d98f0d..4ad43cc165 100644 --- a/wgpu-hal/src/dx12/device.rs +++ b/wgpu-hal/src/dx12/device.rs @@ -181,7 +181,10 @@ impl super::Device { }) } - pub(super) unsafe fn wait_idle(&self) -> Result<(), crate::DeviceError> { + // Blocks until the dedicated present queue is finished with all of its work. + // + // Once this method completes, the surface is able to be resized or deleted. + pub(super) unsafe fn wait_for_present_queue_idle(&self) -> Result<(), crate::DeviceError> { let cur_value = self.idler.fence.get_value(); if cur_value == !0 { return Err(crate::DeviceError::Lost); diff --git a/wgpu-hal/src/dx12/mod.rs b/wgpu-hal/src/dx12/mod.rs index 564bc349c6..a231619512 100644 --- a/wgpu-hal/src/dx12/mod.rs +++ b/wgpu-hal/src/dx12/mod.rs @@ -613,19 +613,23 @@ impl crate::Surface for Surface { let mut flags = dxgi::DXGI_SWAP_CHAIN_FLAG_FRAME_LATENCY_WAITABLE_OBJECT; // We always set ALLOW_TEARING on the swapchain no matter // what kind of swapchain we want because ResizeBuffers - // cannot change if ALLOW_TEARING is applied to the swapchain. + // cannot change the swapchain's ALLOW_TEARING flag. + // + // This does not change the behavior of the swapchain, just + // allow present calls to use tearing. if self.supports_allow_tearing { flags |= dxgi::DXGI_SWAP_CHAIN_FLAG_ALLOW_TEARING; } + // While `configure`s contract ensures that no work on the GPU's main queues + // are in flight, we still need to wait for the present queue to be idle. + unsafe { device.wait_for_present_queue_idle() }?; + let non_srgb_format = auxil::dxgi::conv::map_texture_format_nosrgb(config.format); let swap_chain = match self.swap_chain.take() { //Note: this path doesn't properly re-initialize all of the things Some(sc) => { - // can't have image resources in flight used by GPU - let _ = unsafe { device.wait_idle() }; - let raw = unsafe { sc.release_resources() }; let result = unsafe { raw.ResizeBuffers( @@ -773,12 +777,16 @@ impl crate::Surface for Surface { } unsafe fn unconfigure(&mut self, device: &Device) { - if let Some(mut sc) = self.swap_chain.take() { + if let Some(sc) = self.swap_chain.take() { unsafe { - let _ = sc.wait(None); - //TODO: this shouldn't be needed, - // but it complains that the queue is still used otherwise - let _ = device.wait_idle(); + // While `unconfigure`s contract ensures that no work on the GPU's main queues + // are in flight, we still need to wait for the present queue to be idle. + + // The major failure mode of this function is device loss, + // which if we have lost the device, we should just continue + // cleaning up, without error. + let _ = device.wait_for_present_queue_idle(); + let _raw = sc.release_resources(); } } @@ -837,6 +845,13 @@ impl crate::Queue for Queue { .signal(&fence.raw, value) .into_device_result("Signal fence")?; } + + // Note the lack of synchronization here between the main Direct queue + // and the dedicated presentation queue. This is automatically handled + // by the D3D runtime by detecting uses of resources derived from the + // swapchain. This automatic detection is why you cannot use a swapchain + // as an UAV in D3D12. + Ok(()) } unsafe fn present( diff --git a/wgpu-hal/src/lib.rs b/wgpu-hal/src/lib.rs index 4bff6b8d8f..f1f4b2109e 100644 --- a/wgpu-hal/src/lib.rs +++ b/wgpu-hal/src/lib.rs @@ -227,12 +227,28 @@ pub trait Instance: Sized + WasmNotSend + WasmNotSync { } pub trait Surface: WasmNotSend + WasmNotSync { + /// Configures the surface to use the given device. + /// + /// # Safety + /// + /// - All gpu work that uses the surface must have been completed. + /// - All [`AcquiredSurfaceTexture`]s must have been destroyed. + /// - All [`Api::TextureView`]s derived from the [`AcquiredSurfaceTexture`]s must have been destroyed. + /// - All surfaces created using other devices must have been unconfigured before this call. unsafe fn configure( &mut self, device: &A::Device, config: &SurfaceConfiguration, ) -> Result<(), SurfaceError>; + /// Unconfigures the surface on the given device. + /// + /// # Safety + /// + /// - All gpu work that uses the surface must have been completed. + /// - All [`AcquiredSurfaceTexture`]s must have been destroyed. + /// - All [`Api::TextureView`]s derived from the [`AcquiredSurfaceTexture`]s must have been destroyed. + /// - The surface must have been configured on the given device. unsafe fn unconfigure(&mut self, device: &A::Device); /// Returns the next texture to be presented by the swapchain for drawing diff --git a/wgpu-hal/src/vulkan/device.rs b/wgpu-hal/src/vulkan/device.rs index 4f2a0feb8a..cb955e8318 100644 --- a/wgpu-hal/src/vulkan/device.rs +++ b/wgpu-hal/src/vulkan/device.rs @@ -1143,7 +1143,7 @@ impl crate::Device for super::Device { } if desc.anisotropy_clamp != 1 { - // We only enable anisotropy if it is supported, and wgpu-hal interface guarentees + // We only enable anisotropy if it is supported, and wgpu-hal interface guarantees // the clamp is in the range [1, 16] which is always supported if anisotropy is. vk_info = vk_info .anisotropy_enable(true) diff --git a/wgpu-hal/src/vulkan/instance.rs b/wgpu-hal/src/vulkan/instance.rs index 34a2c4f23c..18269fff77 100644 --- a/wgpu-hal/src/vulkan/instance.rs +++ b/wgpu-hal/src/vulkan/instance.rs @@ -152,12 +152,11 @@ unsafe extern "system" fn debug_utils_messenger_callback( } impl super::Swapchain { + /// # Safety + /// + /// - The device must have been made idle before calling this function. unsafe fn release_resources(self, device: &ash::Device) -> Self { profiling::scope!("Swapchain::release_resources"); - { - profiling::scope!("vkDeviceWaitIdle"); - let _ = unsafe { device.device_wait_idle() }; - }; unsafe { device.destroy_fence(self.fence, None) }; self } @@ -829,6 +828,7 @@ impl crate::Surface for super::Surface { device: &super::Device, config: &crate::SurfaceConfiguration, ) -> Result<(), crate::SurfaceError> { + // Safety: `configure`'s contract guarantees there are no resources derived from the swapchain in use. let old = self .swapchain .take() @@ -842,6 +842,7 @@ impl crate::Surface for super::Surface { unsafe fn unconfigure(&mut self, device: &super::Device) { if let Some(sc) = self.swapchain.take() { + // Safety: `unconfigure`'s contract guarantees there are no resources derived from the swapchain in use. let swapchain = unsafe { sc.release_resources(&device.shared.raw) }; unsafe { swapchain.functor.destroy_swapchain(swapchain.raw, None) }; }