From 7be0e51fb3b4d4e213bbfd7b07d41bdc02da4699 Mon Sep 17 00:00:00 2001 From: Connor Fitzgerald Date: Mon, 16 Dec 2024 00:50:29 -0500 Subject: [PATCH] Convert wgpu-hal to Use Argument Buffers for Binding Arrays --- Cargo.lock | 3 +- Cargo.toml | 2 +- wgpu-hal/src/metal/adapter.rs | 14 +- wgpu-hal/src/metal/command.rs | 15 +- wgpu-hal/src/metal/conv.rs | 28 ++++ wgpu-hal/src/metal/device.rs | 293 ++++++++++++++++++++++------------ wgpu-hal/src/metal/mod.rs | 34 ++++ 7 files changed, 268 insertions(+), 121 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 9170790dc2..1e6047d442 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1982,8 +1982,7 @@ dependencies = [ [[package]] name = "metal" version = "0.30.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9c3572083504c43e14aec05447f8a3d57cce0f66d7a3c1b9058572eca4d70ab9" +source = "git+https://github.com/gfx-rs/metal-rs.git?rev=ef768ff9d7#ef768ff9d742ae6a0f4e83ddc8031264e7d460c4" dependencies = [ "bitflags 2.6.0", "block", diff --git a/Cargo.toml b/Cargo.toml index 6092f2c818..b082e8abff 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -135,9 +135,9 @@ wgpu-types = { version = "23.0.0", path = "./wgpu-types" } winit = { version = "0.29", features = ["android-native-activity"] } # Metal dependencies +metal = { version = "0.30.0", git = "https://github.com/gfx-rs/metal-rs.git", rev = "ef768ff9d7" } block = "0.1" core-graphics-types = "0.1" -metal = { version = "0.30.0" } objc = "0.2.5" # Vulkan dependencies diff --git a/wgpu-hal/src/metal/adapter.rs b/wgpu-hal/src/metal/adapter.rs index d343d8881a..9b2c584705 100644 --- a/wgpu-hal/src/metal/adapter.rs +++ b/wgpu-hal/src/metal/adapter.rs @@ -905,18 +905,10 @@ impl super::PrivateCapabilities { features.set( F::TEXTURE_BINDING_ARRAY | F::SAMPLED_TEXTURE_AND_STORAGE_BUFFER_ARRAY_NON_UNIFORM_INDEXING - | F::UNIFORM_BUFFER_AND_STORAGE_TEXTURE_ARRAY_NON_UNIFORM_INDEXING, - self.msl_version >= MTLLanguageVersion::V2_0 && self.supports_arrays_of_textures, + | F::UNIFORM_BUFFER_AND_STORAGE_TEXTURE_ARRAY_NON_UNIFORM_INDEXING + | F::PARTIALLY_BOUND_BINDING_ARRAY, + self.msl_version >= MTLLanguageVersion::V3_0 && self.supports_arrays_of_textures, ); - //// XXX: this is technically not true, as read-only storage images can be used in arrays - //// on precisely the same conditions that sampled textures can. But texel fetch from a - //// sampled texture is a thing; should we bother introducing another feature flag? - if self.msl_version >= MTLLanguageVersion::V2_2 - && self.supports_arrays_of_textures - && self.supports_arrays_of_textures_write - { - features.insert(F::STORAGE_RESOURCE_BINDING_ARRAY); - } features.set( F::SHADER_INT64, self.int64 && self.msl_version >= MTLLanguageVersion::V2_3, diff --git a/wgpu-hal/src/metal/command.rs b/wgpu-hal/src/metal/command.rs index c0b8331fb5..f4f848bd5b 100644 --- a/wgpu-hal/src/metal/command.rs +++ b/wgpu-hal/src/metal/command.rs @@ -661,7 +661,7 @@ impl crate::CommandEncoder for super::CommandEncoder { offset += dynamic_offsets[dyn_index as usize] as wgt::BufferAddress; } encoder.set_vertex_buffer( - (bg_info.base_resource_indices.vs.buffers + index) as u64, + dbg!(bg_info.base_resource_indices.vs.buffers + index) as u64, Some(buf.ptr.as_native()), offset, ); @@ -750,6 +750,11 @@ impl crate::CommandEncoder for super::CommandEncoder { Some(res.as_native()), ); } + + // Call useResource on all textures and buffers used indirectly so they are alive + for (resource, use_info) in group.resources_to_use.iter() { + encoder.use_resource_at(resource.as_native(), use_info.uses, use_info.stages); + } } if let Some(ref encoder) = self.state.compute { @@ -807,6 +812,14 @@ impl crate::CommandEncoder for super::CommandEncoder { Some(res.as_native()), ); } + + // Call useResource on all textures and buffers used indirectly so they are alive + for (resource, use_info) in group.resources_to_use.iter() { + if !use_info.visible_in_compute { + continue; + } + encoder.use_resource(resource.as_native(), use_info.uses); + } } } diff --git a/wgpu-hal/src/metal/conv.rs b/wgpu-hal/src/metal/conv.rs index f56141d5a7..3ec1296b92 100644 --- a/wgpu-hal/src/metal/conv.rs +++ b/wgpu-hal/src/metal/conv.rs @@ -321,3 +321,31 @@ pub fn get_blit_option( metal::MTLBlitOption::None } } + +pub fn map_render_stages(stage: wgt::ShaderStages) -> metal::MTLRenderStages { + let mut raw_stages = metal::MTLRenderStages::empty(); + + if stage.contains(wgt::ShaderStages::VERTEX) { + raw_stages |= metal::MTLRenderStages::Vertex; + } + if stage.contains(wgt::ShaderStages::FRAGMENT) { + raw_stages |= metal::MTLRenderStages::Fragment; + } + + raw_stages +} + +pub fn map_resource_usage(ty: &wgt::BindingType) -> metal::MTLResourceUsage { + match ty { + wgt::BindingType::Texture { .. } => metal::MTLResourceUsage::Sample, + wgt::BindingType::StorageTexture { access, .. } => match access { + wgt::StorageTextureAccess::WriteOnly => metal::MTLResourceUsage::Write, + wgt::StorageTextureAccess::ReadOnly => metal::MTLResourceUsage::Read, + wgt::StorageTextureAccess::ReadWrite => { + metal::MTLResourceUsage::Read | metal::MTLResourceUsage::Write + } + }, + wgt::BindingType::Sampler(..) => metal::MTLResourceUsage::empty(), + _ => unreachable!(), + } +} diff --git a/wgpu-hal/src/metal/device.rs b/wgpu-hal/src/metal/device.rs index c281317099..603b86bc92 100644 --- a/wgpu-hal/src/metal/device.rs +++ b/wgpu-hal/src/metal/device.rs @@ -1,7 +1,6 @@ use parking_lot::Mutex; use std::{ - num::NonZeroU32, - ptr, + ptr::{self, NonNull}, sync::{atomic, Arc}, thread, time, }; @@ -10,6 +9,8 @@ use super::conv; use crate::auxil::map_naga_stage; use crate::TlasInstance; +use metal::foreign_types::ForeignType; + type DeviceResult = Result; struct CompiledShader { @@ -563,6 +564,9 @@ impl crate::Device for super::Device { if let Some(label) = desc.label { descriptor.set_label(label); } + if self.features.contains(wgt::Features::TEXTURE_BINDING_ARRAY) { + descriptor.set_support_argument_buffers(true); + } let raw = self.shared.device.lock().new_sampler(&descriptor); self.counters.samplers.add(1); @@ -681,36 +685,41 @@ impl crate::Device for super::Device { } let mut target = naga::back::msl::BindTarget::default(); - let count = entry.count.map_or(1, NonZeroU32::get); - target.binding_array_size = entry.count.map(NonZeroU32::get); - match entry.ty { - wgt::BindingType::Buffer { ty, .. } => { - target.buffer = Some(info.counters.buffers as _); - info.counters.buffers += count; - if let wgt::BufferBindingType::Storage { read_only } = ty { - target.mutable = !read_only; + // Bindless path + if let Some(_) = entry.count { + target.buffer = Some(info.counters.buffers as _); + info.counters.buffers += 1; + } else { + match entry.ty { + wgt::BindingType::Buffer { ty, .. } => { + target.buffer = Some(info.counters.buffers as _); + info.counters.buffers += 1; + if let wgt::BufferBindingType::Storage { read_only } = ty { + target.mutable = !read_only; + } } + wgt::BindingType::Sampler { .. } => { + target.sampler = + Some(naga::back::msl::BindSamplerTarget::Resource( + info.counters.samplers as _, + )); + info.counters.samplers += 1; + } + wgt::BindingType::Texture { .. } => { + target.texture = Some(info.counters.textures as _); + info.counters.textures += 1; + } + wgt::BindingType::StorageTexture { access, .. } => { + target.texture = Some(info.counters.textures as _); + info.counters.textures += 1; + target.mutable = match access { + wgt::StorageTextureAccess::ReadOnly => false, + wgt::StorageTextureAccess::WriteOnly => true, + wgt::StorageTextureAccess::ReadWrite => true, + }; + } + wgt::BindingType::AccelerationStructure => unimplemented!(), } - wgt::BindingType::Sampler { .. } => { - target.sampler = Some(naga::back::msl::BindSamplerTarget::Resource( - info.counters.samplers as _, - )); - info.counters.samplers += count; - } - wgt::BindingType::Texture { .. } => { - target.texture = Some(info.counters.textures as _); - info.counters.textures += count; - } - wgt::BindingType::StorageTexture { access, .. } => { - target.texture = Some(info.counters.textures as _); - info.counters.textures += count; - target.mutable = match access { - wgt::StorageTextureAccess::ReadOnly => false, - wgt::StorageTextureAccess::WriteOnly => true, - wgt::StorageTextureAccess::ReadWrite => true, - }; - } - wgt::BindingType::AccelerationStructure => unimplemented!(), } let br = naga::ResourceBinding { @@ -788,90 +797,162 @@ impl crate::Device for super::Device { super::AccelerationStructure, >, ) -> DeviceResult { - let mut bg = super::BindGroup::default(); - for (&stage, counter) in super::NAGA_STAGES.iter().zip(bg.counters.iter_mut()) { - let stage_bit = map_naga_stage(stage); - let mut dynamic_offsets_count = 0u32; - let layout_and_entry_iter = desc.entries.iter().map(|entry| { - let layout = desc - .layout - .entries - .iter() - .find(|layout_entry| layout_entry.binding == entry.binding) - .expect("internal error: no layout entry found with binding slot"); - (entry, layout) - }); - for (entry, layout) in layout_and_entry_iter { - let size = layout.count.map_or(1, |c| c.get()); - if let wgt::BindingType::Buffer { - has_dynamic_offset: true, - .. - } = layout.ty - { - dynamic_offsets_count += size; - } - if !layout.visibility.contains(stage_bit) { - continue; - } - match layout.ty { - wgt::BindingType::Buffer { - ty, - has_dynamic_offset, - .. - } => { - let start = entry.resource_index as usize; - let end = start + size as usize; - bg.buffers - .extend(desc.buffers[start..end].iter().map(|source| { - // Given the restrictions on `BufferBinding::offset`, - // this should never be `None`. - let remaining_size = - wgt::BufferSize::new(source.buffer.size - source.offset); - let binding_size = match ty { - wgt::BufferBindingType::Storage { .. } => { - source.size.or(remaining_size) - } - _ => None, - }; - super::BufferResource { - ptr: source.buffer.as_raw(), - offset: source.offset, - dynamic_index: if has_dynamic_offset { - Some(dynamic_offsets_count - 1) - } else { - None - }, - binding_size, - binding_location: layout.binding, + objc::rc::autoreleasepool(|| { + let mut bg = super::BindGroup::default(); + for (&stage, counter) in super::NAGA_STAGES.iter().zip(bg.counters.iter_mut()) { + let stage_bit = map_naga_stage(stage); + let mut dynamic_offsets_count = 0u32; + let layout_and_entry_iter = desc.entries.iter().map(|entry| { + let layout = desc + .layout + .entries + .iter() + .find(|layout_entry| layout_entry.binding == entry.binding) + .expect("internal error: no layout entry found with binding slot"); + (entry, layout) + }); + for (entry, layout) in layout_and_entry_iter { + // Bindless path + if layout.count.is_some() { + let count = entry.count; + + let stages = conv::map_render_stages(layout.visibility); + let uses = conv::map_resource_usage(&layout.ty); + + // Create argument buffer for this array + let buffer = self.shared.device.lock().new_buffer( + 8 * count as u64, + metal::MTLResourceOptions::HazardTrackingModeUntracked + | metal::MTLResourceOptions::StorageModeShared, + ); + + let contents: &mut [metal::MTLResourceID] = unsafe { + std::slice::from_raw_parts_mut(buffer.contents().cast(), count as usize) + }; + + match layout.ty { + wgt::BindingType::Texture { .. } + | wgt::BindingType::StorageTexture { .. } => { + let start = entry.resource_index as usize; + let end = start + count as usize; + let textures = &desc.textures[start..end]; + + for (idx, tex) in textures.iter().enumerate() { + contents[idx] = tex.view.raw.gpu_resource_id(); + + let use_info = bg + .resources_to_use + .entry(tex.view.as_raw().cast()) + .or_default(); + use_info.stages |= stages; + use_info.uses |= uses; + use_info.visible_in_compute |= + layout.visibility.contains(wgt::ShaderStages::COMPUTE); } - })); + } + wgt::BindingType::Sampler { .. } => { + let start = entry.resource_index as usize; + let end = start + count as usize; + let samplers = &desc.samplers[start..end]; + + for (idx, &sampler) in samplers.iter().enumerate() { + contents[idx] = sampler.raw.gpu_resource_id(); + // Samplers aren't resources like buffers and textures, so don't + // need to be passed to useResource + } + } + _ => { + unimplemented!(); + } + } + + bg.buffers.push(super::BufferResource { + ptr: unsafe { NonNull::new_unchecked(buffer.as_ptr()) }, + offset: 0, + dynamic_index: None, + binding_size: None, + binding_location: layout.binding, + }); counter.buffers += 1; + + bg.argument_buffers.push(buffer) } - wgt::BindingType::Sampler { .. } => { - let start = entry.resource_index as usize; - let end = start + size as usize; - bg.samplers - .extend(desc.samplers[start..end].iter().map(|samp| samp.as_raw())); - counter.samplers += size; - } - wgt::BindingType::Texture { .. } | wgt::BindingType::StorageTexture { .. } => { - let start = entry.resource_index as usize; - let end = start + size as usize; - bg.textures.extend( - desc.textures[start..end] - .iter() - .map(|tex| tex.view.as_raw()), - ); - counter.textures += size; + // Bindfull path + else { + if let wgt::BindingType::Buffer { + has_dynamic_offset: true, + .. + } = layout.ty + { + dynamic_offsets_count += 1; + } + if !layout.visibility.contains(stage_bit) { + continue; + } + match layout.ty { + wgt::BindingType::Buffer { + ty, + has_dynamic_offset, + .. + } => { + let start = entry.resource_index as usize; + let end = start + 1; + bg.buffers + .extend(desc.buffers[start..end].iter().map(|source| { + // Given the restrictions on `BufferBinding::offset`, + // this should never be `None`. + let remaining_size = wgt::BufferSize::new( + source.buffer.size - source.offset, + ); + let binding_size = match ty { + wgt::BufferBindingType::Storage { .. } => { + source.size.or(remaining_size) + } + _ => None, + }; + super::BufferResource { + ptr: source.buffer.as_raw(), + offset: source.offset, + dynamic_index: if has_dynamic_offset { + Some(dynamic_offsets_count - 1) + } else { + None + }, + binding_size, + binding_location: layout.binding, + } + })); + counter.buffers += 1; + } + wgt::BindingType::Sampler { .. } => { + let start = entry.resource_index as usize; + let end = start + 1; + bg.samplers.extend( + desc.samplers[start..end].iter().map(|samp| samp.as_raw()), + ); + counter.samplers += 1; + } + wgt::BindingType::Texture { .. } + | wgt::BindingType::StorageTexture { .. } => { + let start = entry.resource_index as usize; + let end = start + 1; + bg.textures.extend( + desc.textures[start..end] + .iter() + .map(|tex| tex.view.as_raw()), + ); + counter.textures += 1; + } + wgt::BindingType::AccelerationStructure => unimplemented!(), + } } - wgt::BindingType::AccelerationStructure => unimplemented!(), } } - } - self.counters.bind_groups.add(1); + self.counters.bind_groups.add(1); - Ok(bg) + Ok(bg) + }) } unsafe fn destroy_bind_group(&self, _group: super::BindGroup) { diff --git a/wgpu-hal/src/metal/mod.rs b/wgpu-hal/src/metal/mod.rs index 767216225a..39645f2bca 100644 --- a/wgpu-hal/src/metal/mod.rs +++ b/wgpu-hal/src/metal/mod.rs @@ -26,6 +26,7 @@ mod surface; mod time; use std::{ + collections::HashMap, fmt, iter, ops, ptr::NonNull, sync::{atomic, Arc}, @@ -651,10 +652,23 @@ trait AsNative { fn as_native(&self) -> &Self::Native; } +type ResourcePtr = NonNull; type BufferPtr = NonNull; type TexturePtr = NonNull; type SamplerPtr = NonNull; +impl AsNative for ResourcePtr { + type Native = metal::ResourceRef; + #[inline] + fn from(native: &Self::Native) -> Self { + unsafe { NonNull::new_unchecked(native.as_ptr()) } + } + #[inline] + fn as_native(&self) -> &Self::Native { + unsafe { Self::Native::from_ptr(self.as_ptr()) } + } +} + impl AsNative for BufferPtr { type Native = metal::BufferRef; #[inline] @@ -710,12 +724,32 @@ struct BufferResource { binding_location: u32, } +#[derive(Debug)] +struct UseResourceInfo { + uses: metal::MTLResourceUsage, + stages: metal::MTLRenderStages, + visible_in_compute: bool, +} + +impl Default for UseResourceInfo { + fn default() -> Self { + Self { + uses: metal::MTLResourceUsage::empty(), + stages: metal::MTLRenderStages::empty(), + visible_in_compute: false, + } + } +} + #[derive(Debug, Default)] pub struct BindGroup { counters: MultiStageResourceCounters, buffers: Vec, samplers: Vec, textures: Vec, + + argument_buffers: Vec, + resources_to_use: HashMap, } impl crate::DynBindGroup for BindGroup {}