From c83503a06f93e183a71f9e4707a4346002f4123e Mon Sep 17 00:00:00 2001 From: Nicolas Silva Date: Thu, 18 Jul 2024 17:35:26 +0200 Subject: [PATCH] Expose GPU allocation reports in wgpu, wgpu-core and wgpu-hal --- wgpu-core/src/device/global.rs | 11 ++++ wgpu-core/src/device/resource.rs | 7 +++ wgpu-hal/src/dx12/device.rs | 37 +++++++++++++ wgpu-hal/src/lib.rs | 4 ++ wgpu-types/src/counters.rs | 90 +++++++++++++++++++++++++++++++- wgpu/src/backend/webgpu.rs | 8 +++ wgpu/src/backend/wgpu_core.rs | 8 +++ wgpu/src/context.rs | 22 ++++++++ wgpu/src/lib.rs | 9 ++++ 9 files changed, 195 insertions(+), 1 deletion(-) diff --git a/wgpu-core/src/device/global.rs b/wgpu-core/src/device/global.rs index 5ebd7c7de7..0942fda46c 100644 --- a/wgpu-core/src/device/global.rs +++ b/wgpu-core/src/device/global.rs @@ -2458,6 +2458,17 @@ impl Global { } } + pub fn device_generate_allocator_report( + &self, + device_id: DeviceId, + ) -> Option { + let hub = A::hub(self); + hub.devices + .get(device_id) + .ok() + .and_then(|device| device.generate_allocator_report()) + } + pub fn queue_drop(&self, queue_id: QueueId) { profiling::scope!("Queue::drop"); api_log!("Queue::drop {queue_id:?}"); diff --git a/wgpu-core/src/device/resource.rs b/wgpu-core/src/device/resource.rs index ee943d7fdc..25f95f8a2a 100644 --- a/wgpu-core/src/device/resource.rs +++ b/wgpu-core/src/device/resource.rs @@ -3598,6 +3598,13 @@ impl Device { .map(|raw| raw.get_internal_counters()) .unwrap_or_default() } + + pub fn generate_allocator_report(&self) -> Option { + self.raw + .as_ref() + .map(|raw| raw.generate_allocator_report()) + .unwrap_or_default() + } } impl Device { diff --git a/wgpu-hal/src/dx12/device.rs b/wgpu-hal/src/dx12/device.rs index 27b3002431..e886e2fd04 100644 --- a/wgpu-hal/src/dx12/device.rs +++ b/wgpu-hal/src/dx12/device.rs @@ -1801,4 +1801,41 @@ impl crate::Device for super::Device { fn get_internal_counters(&self) -> wgt::HalCounters { self.counters.clone() } + + #[cfg(feature = "windows_rs")] + fn generate_allocator_report(&self) -> Option { + let mut upstream = { + self.mem_allocator + .as_ref()? + .lock() + .allocator + .generate_report() + }; + + let allocations = upstream + .allocations + .iter_mut() + .map(|alloc| wgt::AllocationReport { + name: std::mem::take(&mut alloc.name), + offset: alloc.offset, + size: alloc.size, + }) + .collect(); + + let blocks = upstream + .blocks + .iter() + .map(|block| wgt::MemoryBlockReport { + size: block.size, + allocations: block.allocations.clone(), + }) + .collect(); + + Some(wgt::AllocatorReport { + allocations, + blocks, + total_allocated_bytes: upstream.total_allocated_bytes, + total_reserved_bytes: upstream.total_reserved_bytes, + }) + } } diff --git a/wgpu-hal/src/lib.rs b/wgpu-hal/src/lib.rs index b28a005a7a..bd047b5ff6 100644 --- a/wgpu-hal/src/lib.rs +++ b/wgpu-hal/src/lib.rs @@ -894,6 +894,10 @@ pub trait Device: WasmNotSendSync { ); fn get_internal_counters(&self) -> wgt::HalCounters; + + fn generate_allocator_report(&self) -> Option { + None + } } pub trait Queue: WasmNotSendSync { diff --git a/wgpu-types/src/counters.rs b/wgpu-types/src/counters.rs index 9dfa739f8b..d0f9a5ea18 100644 --- a/wgpu-types/src/counters.rs +++ b/wgpu-types/src/counters.rs @@ -1,5 +1,6 @@ #[cfg(feature = "counters")] use std::sync::atomic::{AtomicIsize, Ordering}; +use std::{fmt, ops::Range}; /// An internal counter for debugging purposes /// @@ -128,7 +129,7 @@ pub struct HalCounters { /// `wgpu-core`'s internal counters. #[derive(Clone, Default)] pub struct CoreCounters { - // TODO + // TODO #[cfg(features=)] } /// All internal counters, exposed for debugging purposes. @@ -139,3 +140,90 @@ pub struct InternalCounters { /// `wgpu-hal` counters. pub hal: HalCounters, } + +/// Describes an allocation in the [`AllocatorReport`]. +#[derive(Clone)] +pub struct AllocationReport { + /// The name provided to the `allocate()` function. + pub name: String, + /// The offset in bytes of the allocation in its memory block. + pub offset: u64, + /// The size in bytes of the allocation. + pub size: u64, +} + +/// Describes a memory block in the [`AllocatorReport`]. +#[derive(Clone)] +pub struct MemoryBlockReport { + /// The size in bytes of this memory block. + pub size: u64, + /// The range of allocations in [`AllocatorReport::allocations`] that are associated + /// to this memory block. + pub allocations: Range, +} + +/// A report that can be generated for informational purposes using `Allocator::generate_report()`. +#[derive(Clone)] +pub struct AllocatorReport { + /// All live allocations, sub-allocated from memory blocks. + pub allocations: Vec, + /// All memory blocks. + pub blocks: Vec, + /// Sum of the memory used by all allocations, in bytes. + pub total_allocated_bytes: u64, + /// Sum of the memory reserved by all memory blocks including unallocated regions, in bytes. + pub total_reserved_bytes: u64, +} + +impl fmt::Debug for AllocationReport { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + let name = if !self.name.is_empty() { + self.name.as_str() + } else { + "--" + }; + write!(f, "{name:?}: {}", FmtBytes(self.size)) + } +} + +impl fmt::Debug for AllocatorReport { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + let mut allocations = self.allocations.clone(); + allocations.sort_by_key(|alloc| std::cmp::Reverse(alloc.size)); + + let max_num_allocations_to_print = f.precision().unwrap_or(usize::MAX); + allocations.truncate(max_num_allocations_to_print); + + f.debug_struct("AllocatorReport") + .field( + "summary", + &std::format_args!( + "{} / {}", + FmtBytes(self.total_allocated_bytes), + FmtBytes(self.total_reserved_bytes) + ), + ) + .field("blocks", &self.blocks.len()) + .field("allocations", &self.allocations.len()) + .field("largest", &allocations.as_slice()) + .finish() + } +} + +struct FmtBytes(u64); + +impl fmt::Display for FmtBytes { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + const SUFFIX: [&str; 5] = ["B", "KB", "MB", "GB", "TB"]; + let mut idx = 0; + let mut amount = self.0 as f64; + loop { + if amount < 1024.0 || idx == SUFFIX.len() - 1 { + return write!(f, "{:.2} {}", amount, SUFFIX[idx]); + } + + amount /= 1024.0; + idx += 1; + } + } +} diff --git a/wgpu/src/backend/webgpu.rs b/wgpu/src/backend/webgpu.rs index 8e158359c2..be3d9b42cd 100644 --- a/wgpu/src/backend/webgpu.rs +++ b/wgpu/src/backend/webgpu.rs @@ -2986,6 +2986,14 @@ impl crate::context::Context for ContextWebGpu { Default::default() } + fn device_generate_allocator_report( + &self, + _device: &Self::DeviceId, + _device_data: &Self::DeviceData, + ) -> Option { + None + } + fn pipeline_cache_get_data( &self, _: &Self::PipelineCacheId, diff --git a/wgpu/src/backend/wgpu_core.rs b/wgpu/src/backend/wgpu_core.rs index 91629d638c..88e0a9f503 100644 --- a/wgpu/src/backend/wgpu_core.rs +++ b/wgpu/src/backend/wgpu_core.rs @@ -2367,6 +2367,14 @@ impl crate::Context for ContextWgpuCore { wgc::gfx_select!(device => self.0.device_get_internal_counters(*device)) } + fn device_generate_allocator_report( + &self, + device: &Self::DeviceId, + _device_data: &Self::DeviceData, + ) -> Option { + wgc::gfx_select!(device => self.0.device_generate_allocator_report(*device)) + } + fn pipeline_cache_get_data( &self, cache: &Self::PipelineCacheId, diff --git a/wgpu/src/context.rs b/wgpu/src/context.rs index 7ff2adbaf7..2c2c82c4bc 100644 --- a/wgpu/src/context.rs +++ b/wgpu/src/context.rs @@ -618,6 +618,12 @@ pub trait Context: Debug + WasmNotSendSync + Sized { _device_data: &Self::DeviceData, ) -> wgt::InternalCounters; + fn device_generate_allocator_report( + &self, + device: &Self::DeviceId, + _device_data: &Self::DeviceData, + ) -> Option; + fn pipeline_cache_get_data( &self, cache: &Self::PipelineCacheId, @@ -1617,6 +1623,12 @@ pub(crate) trait DynContext: Debug + WasmNotSendSync { device_data: &crate::Data, ) -> wgt::InternalCounters; + fn generate_allocator_report( + &self, + device: &ObjectId, + device_data: &crate::Data, + ) -> Option; + fn pipeline_cache_get_data( &self, cache: &ObjectId, @@ -3101,6 +3113,16 @@ where Context::device_get_internal_counters(self, &device, device_data) } + fn generate_allocator_report( + &self, + device: &ObjectId, + device_data: &crate::Data, + ) -> Option { + let device = ::from(*device); + let device_data = downcast_ref(device_data); + Context::device_generate_allocator_report(self, &device, device_data) + } + fn pipeline_cache_get_data( &self, cache: &ObjectId, diff --git a/wgpu/src/lib.rs b/wgpu/src/lib.rs index d895b696cf..04ce09aa7d 100644 --- a/wgpu/src/lib.rs +++ b/wgpu/src/lib.rs @@ -3238,6 +3238,15 @@ impl Device { DynContext::device_get_internal_counters(&*self.context, &self.id, self.data.as_ref()) } + /// Generate an GPU memory allocation report if the underlying backend supports it. + /// + /// Backends that do not support producing these reports return `None`. A backend may + /// Support it and still return `None` if it is not using performing sub-allocation, + /// for example as a workaround for driver issues. + pub fn generate_allocator_report(&self) -> Option { + DynContext::generate_allocator_report(&*self.context, &self.id, self.data.as_ref()) + } + /// Apply a callback to this `Device`'s underlying backend device. /// /// If this `Device` is implemented by the backend API given by `A` (Vulkan,