Skip to content

Commit

Permalink
Expose GPU allocation reports in wgpu, wgpu-core and wgpu-hal
Browse files Browse the repository at this point in the history
  • Loading branch information
nical authored and teoxoy committed Jul 19, 2024
1 parent 20973d1 commit c83503a
Show file tree
Hide file tree
Showing 9 changed files with 195 additions and 1 deletion.
11 changes: 11 additions & 0 deletions wgpu-core/src/device/global.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2458,6 +2458,17 @@ impl Global {
}
}

pub fn device_generate_allocator_report<A: HalApi>(
&self,
device_id: DeviceId,
) -> Option<wgt::AllocatorReport> {
let hub = A::hub(self);
hub.devices
.get(device_id)
.ok()
.and_then(|device| device.generate_allocator_report())
}

pub fn queue_drop<A: HalApi>(&self, queue_id: QueueId) {
profiling::scope!("Queue::drop");
api_log!("Queue::drop {queue_id:?}");
Expand Down
7 changes: 7 additions & 0 deletions wgpu-core/src/device/resource.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3598,6 +3598,13 @@ impl<A: HalApi> Device<A> {
.map(|raw| raw.get_internal_counters())
.unwrap_or_default()
}

pub fn generate_allocator_report(&self) -> Option<wgt::AllocatorReport> {
self.raw
.as_ref()
.map(|raw| raw.generate_allocator_report())
.unwrap_or_default()
}
}

impl<A: HalApi> Device<A> {
Expand Down
37 changes: 37 additions & 0 deletions wgpu-hal/src/dx12/device.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1801,4 +1801,41 @@ impl crate::Device for super::Device {
fn get_internal_counters(&self) -> wgt::HalCounters {
self.counters.clone()
}

#[cfg(feature = "windows_rs")]
fn generate_allocator_report(&self) -> Option<wgt::AllocatorReport> {
let mut upstream = {
self.mem_allocator
.as_ref()?
.lock()
.allocator
.generate_report()
};

let allocations = upstream
.allocations
.iter_mut()
.map(|alloc| wgt::AllocationReport {
name: std::mem::take(&mut alloc.name),
offset: alloc.offset,
size: alloc.size,
})
.collect();

let blocks = upstream
.blocks
.iter()
.map(|block| wgt::MemoryBlockReport {
size: block.size,
allocations: block.allocations.clone(),
})
.collect();

Some(wgt::AllocatorReport {
allocations,
blocks,
total_allocated_bytes: upstream.total_allocated_bytes,
total_reserved_bytes: upstream.total_reserved_bytes,
})
}
}
4 changes: 4 additions & 0 deletions wgpu-hal/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -894,6 +894,10 @@ pub trait Device: WasmNotSendSync {
);

fn get_internal_counters(&self) -> wgt::HalCounters;

fn generate_allocator_report(&self) -> Option<wgt::AllocatorReport> {
None
}
}

pub trait Queue: WasmNotSendSync {
Expand Down
90 changes: 89 additions & 1 deletion wgpu-types/src/counters.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
#[cfg(feature = "counters")]
use std::sync::atomic::{AtomicIsize, Ordering};
use std::{fmt, ops::Range};

/// An internal counter for debugging purposes
///
Expand Down Expand Up @@ -128,7 +129,7 @@ pub struct HalCounters {
/// `wgpu-core`'s internal counters.
#[derive(Clone, Default)]
pub struct CoreCounters {
// TODO
// TODO #[cfg(features=)]
}

/// All internal counters, exposed for debugging purposes.
Expand All @@ -139,3 +140,90 @@ pub struct InternalCounters {
/// `wgpu-hal` counters.
pub hal: HalCounters,
}

/// Describes an allocation in the [`AllocatorReport`].
#[derive(Clone)]
pub struct AllocationReport {
/// The name provided to the `allocate()` function.
pub name: String,
/// The offset in bytes of the allocation in its memory block.
pub offset: u64,
/// The size in bytes of the allocation.
pub size: u64,
}

/// Describes a memory block in the [`AllocatorReport`].
#[derive(Clone)]
pub struct MemoryBlockReport {
/// The size in bytes of this memory block.
pub size: u64,
/// The range of allocations in [`AllocatorReport::allocations`] that are associated
/// to this memory block.
pub allocations: Range<usize>,
}

/// A report that can be generated for informational purposes using `Allocator::generate_report()`.
#[derive(Clone)]
pub struct AllocatorReport {
/// All live allocations, sub-allocated from memory blocks.
pub allocations: Vec<AllocationReport>,
/// All memory blocks.
pub blocks: Vec<MemoryBlockReport>,
/// Sum of the memory used by all allocations, in bytes.
pub total_allocated_bytes: u64,
/// Sum of the memory reserved by all memory blocks including unallocated regions, in bytes.
pub total_reserved_bytes: u64,
}

impl fmt::Debug for AllocationReport {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
let name = if !self.name.is_empty() {
self.name.as_str()
} else {
"--"
};
write!(f, "{name:?}: {}", FmtBytes(self.size))
}
}

impl fmt::Debug for AllocatorReport {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
let mut allocations = self.allocations.clone();
allocations.sort_by_key(|alloc| std::cmp::Reverse(alloc.size));

let max_num_allocations_to_print = f.precision().unwrap_or(usize::MAX);
allocations.truncate(max_num_allocations_to_print);

f.debug_struct("AllocatorReport")
.field(
"summary",
&std::format_args!(
"{} / {}",
FmtBytes(self.total_allocated_bytes),
FmtBytes(self.total_reserved_bytes)
),
)
.field("blocks", &self.blocks.len())
.field("allocations", &self.allocations.len())
.field("largest", &allocations.as_slice())
.finish()
}
}

struct FmtBytes(u64);

impl fmt::Display for FmtBytes {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
const SUFFIX: [&str; 5] = ["B", "KB", "MB", "GB", "TB"];
let mut idx = 0;
let mut amount = self.0 as f64;
loop {
if amount < 1024.0 || idx == SUFFIX.len() - 1 {
return write!(f, "{:.2} {}", amount, SUFFIX[idx]);
}

amount /= 1024.0;
idx += 1;
}
}
}
8 changes: 8 additions & 0 deletions wgpu/src/backend/webgpu.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2986,6 +2986,14 @@ impl crate::context::Context for ContextWebGpu {
Default::default()
}

fn device_generate_allocator_report(
&self,
_device: &Self::DeviceId,
_device_data: &Self::DeviceData,
) -> Option<wgt::AllocatorReport> {
None
}

fn pipeline_cache_get_data(
&self,
_: &Self::PipelineCacheId,
Expand Down
8 changes: 8 additions & 0 deletions wgpu/src/backend/wgpu_core.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2367,6 +2367,14 @@ impl crate::Context for ContextWgpuCore {
wgc::gfx_select!(device => self.0.device_get_internal_counters(*device))
}

fn device_generate_allocator_report(
&self,
device: &Self::DeviceId,
_device_data: &Self::DeviceData,
) -> Option<wgt::AllocatorReport> {
wgc::gfx_select!(device => self.0.device_generate_allocator_report(*device))
}

fn pipeline_cache_get_data(
&self,
cache: &Self::PipelineCacheId,
Expand Down
22 changes: 22 additions & 0 deletions wgpu/src/context.rs
Original file line number Diff line number Diff line change
Expand Up @@ -618,6 +618,12 @@ pub trait Context: Debug + WasmNotSendSync + Sized {
_device_data: &Self::DeviceData,
) -> wgt::InternalCounters;

fn device_generate_allocator_report(
&self,
device: &Self::DeviceId,
_device_data: &Self::DeviceData,
) -> Option<wgt::AllocatorReport>;

fn pipeline_cache_get_data(
&self,
cache: &Self::PipelineCacheId,
Expand Down Expand Up @@ -1617,6 +1623,12 @@ pub(crate) trait DynContext: Debug + WasmNotSendSync {
device_data: &crate::Data,
) -> wgt::InternalCounters;

fn generate_allocator_report(
&self,
device: &ObjectId,
device_data: &crate::Data,
) -> Option<wgt::AllocatorReport>;

fn pipeline_cache_get_data(
&self,
cache: &ObjectId,
Expand Down Expand Up @@ -3101,6 +3113,16 @@ where
Context::device_get_internal_counters(self, &device, device_data)
}

fn generate_allocator_report(
&self,
device: &ObjectId,
device_data: &crate::Data,
) -> Option<wgt::AllocatorReport> {
let device = <T::DeviceId>::from(*device);
let device_data = downcast_ref(device_data);
Context::device_generate_allocator_report(self, &device, device_data)
}

fn pipeline_cache_get_data(
&self,
cache: &ObjectId,
Expand Down
9 changes: 9 additions & 0 deletions wgpu/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3238,6 +3238,15 @@ impl Device {
DynContext::device_get_internal_counters(&*self.context, &self.id, self.data.as_ref())
}

/// Generate an GPU memory allocation report if the underlying backend supports it.
///
/// Backends that do not support producing these reports return `None`. A backend may
/// Support it and still return `None` if it is not using performing sub-allocation,
/// for example as a workaround for driver issues.
pub fn generate_allocator_report(&self) -> Option<wgt::AllocatorReport> {
DynContext::generate_allocator_report(&*self.context, &self.id, self.data.as_ref())
}

/// Apply a callback to this `Device`'s underlying backend device.
///
/// If this `Device` is implemented by the backend API given by `A` (Vulkan,
Expand Down

0 comments on commit c83503a

Please sign in to comment.