diff --git a/.cargo/config.toml b/.cargo/config.toml new file mode 100644 index 0000000000..95d2a35175 --- /dev/null +++ b/.cargo/config.toml @@ -0,0 +1,7 @@ +[alias] +xtask = "run --manifest-path xtask/Cargo.toml --" + +[build] +rustflags = [ +"--cfg=web_sys_unstable_apis" +] diff --git a/.deny.toml b/.deny.toml index 5c214bbc28..f7c233c5d4 100644 --- a/.deny.toml +++ b/.deny.toml @@ -27,6 +27,7 @@ allow = [ [sources] allow-git = [ "https://github.com/grovesNL/glow", + "https://github.com/gfx-rs/metal-rs", ] unknown-registry = "deny" unknown-git = "deny" diff --git a/.github/CODEOWNERS b/.github/CODEOWNERS index 7fefad320c..1d6e147803 100644 --- a/.github/CODEOWNERS +++ b/.github/CODEOWNERS @@ -1,2 +1,4 @@ -/cts_runner/ @crowlKats -/deno_webgpu/ @crowlKats +* @gfx-rs/wgpu + +/cts_runner/ @gfx-rs/deno @gfx-rs/wgpu +/deno_webgpu/ @gfx-rs/deno @gfx-rs/wgpu diff --git a/CHANGELOG.md b/CHANGELOG.md index db91b89718..08d20c09fa 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -64,6 +64,28 @@ let render_pass = encoder.begin_render_pass(&wgpu::RenderPassDescriptor { By @Valaphee in [#3402](https://github.com/gfx-rs/wgpu/pull/3402) + +#### Render pass store operation is now an enum + +`wgpu::Operations::store` used to be an underdocumented boolean value, +causing misunderstandings of the effect of setting it to `false`. + +The API now more closely resembles WebGPU which distinguishes between `store` and `discard`, +see [WebGPU spec on GPUStoreOp](https://gpuweb.github.io/gpuweb/#enumdef-gpustoreop). + +```diff +// ... +depth_ops: Some(wgpu::Operations { + load: wgpu::LoadOp::Clear(1.0), +- store: false, ++ store: wgpu::StoreOp::Discard, +}), +// ... +``` + +By @wumpf in [#4147](https://github.com/gfx-rs/wgpu/pull/4147) + + ### Added/New Features - Add `gles_minor_version` field to `wgpu::InstanceDescriptor`. By @PJB3005 in [#3998](https://github.com/gfx-rs/wgpu/pull/3998) @@ -72,11 +94,13 @@ By @Valaphee in [#3402](https://github.com/gfx-rs/wgpu/pull/3402) #### General +- Update Naga to df8107b7 (2023-9-15). By @jimblandy in [#4149](https://github.com/gfx-rs/wgpu/pull/4149) - Omit texture store bound checks since they are no-ops if out of bounds on all APIs. By @teoxoy in [#3975](https://github.com/gfx-rs/wgpu/pull/3975) - Validate `DownlevelFlags::READ_ONLY_DEPTH_STENCIL`. By @teoxoy in [#4031](https://github.com/gfx-rs/wgpu/pull/4031) - Add validation in accordance with WebGPU `setViewport` valid usage for `x`, `y` and `this.[[attachment_size]]`. By @James2022-rgb in [#4058](https://github.com/gfx-rs/wgpu/pull/4058) -- `wgpu::CreateSurfaceError` now gives details of the failure, but no longer implements `PartialEq`. By @kpreid in [#4066](https://github.com/gfx-rs/wgpu/pull/4066) +- `wgpu::CreateSurfaceError` and `wgpu::RequestDeviceError` now give details of the failure, but no longer implement `PartialEq` and cannot be constructed. By @kpreid in [#4066](https://github.com/gfx-rs/wgpu/pull/4066) and [#4145](https://github.com/gfx-rs/wgpu/pull/4145) - Make `WGPU_POWER_PREF=none` a valid value. By @fornwall in [4076](https://github.com/gfx-rs/wgpu/pull/4076) +- Support dual source blending in OpenGL ES, Metal, Vulkan & DX12. By @freqmod in [4022](https://github.com/gfx-rs/wgpu/pull/4022) #### Vulkan @@ -86,7 +110,12 @@ By @Valaphee in [#3402](https://github.com/gfx-rs/wgpu/pull/3402) ### Documentation -- Use WGSL for VertexFormat example types. By @ScanMountGoat in [#4305](https://github.com/gfx-rs/wgpu/pull/4035) +- Use WGSL for VertexFormat example types. By @ScanMountGoat in [#4035](https://github.com/gfx-rs/wgpu/pull/4035) +- Fix description of `Features::TEXTURE_COMPRESSION_ASTC_HDR` in [#4157](https://github.com/gfx-rs/wgpu/pull/4157) + +#### Metal + +- Support for timestamp queries on encoders and passes. By @wumpf in [#4008](https://github.com/gfx-rs/wgpu/pull/4008) ### Bug Fixes @@ -95,6 +124,7 @@ By @Valaphee in [#3402](https://github.com/gfx-rs/wgpu/pull/3402) - Derive storage bindings via `naga::StorageAccess` instead of `naga::GlobalUse`. By @teoxoy in [#3985](https://github.com/gfx-rs/wgpu/pull/3985). - `Queue::on_submitted_work_done` callbacks will now always be called after all previous `BufferSlice::map_async` callbacks, even when there are no active submissions. By @cwfitzgerald in [#4036](https://github.com/gfx-rs/wgpu/pull/4036). - Fix `clear` texture views being leaked when `wgpu::SurfaceTexture` is dropped before it is presented. By @rajveermalviya in [#4057](https://github.com/gfx-rs/wgpu/pull/4057). +- Add `Feature::SHADER_UNUSED_VERTEX_OUTPUT` to allow unused vertex shader outputs. By @Aaron1011 in [#4116](https://github.com/gfx-rs/wgpu/pull/4116). #### Vulkan - Fix enabling `wgpu::Features::PARTIALLY_BOUND_BINDING_ARRAY` not being actually enabled in vulkan backend. By @39ali in[#3772](https://github.com/gfx-rs/wgpu/pull/3772). diff --git a/Cargo.lock b/Cargo.lock index ba1a403628..942cf10970 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1551,8 +1551,7 @@ dependencies = [ [[package]] name = "metal" version = "0.26.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "623b5e6cefd76e58f774bd3cc0c6f5c7615c58c03a97815245a25c3c9bdee318" +source = "git+https://github.com/gfx-rs/metal-rs/?rev=d24f1a4#d24f1a4ae92470bf87a0c65ecfe78c9299835505" dependencies = [ "bitflags 2.4.0", "block", @@ -1603,7 +1602,7 @@ dependencies = [ [[package]] name = "naga" version = "0.13.0" -source = "git+https://github.com/gfx-rs/naga?rev=cc87b8f9eb30bb55d0735b89d3df3e099e1a6e7c#cc87b8f9eb30bb55d0735b89d3df3e099e1a6e7c" +source = "git+https://github.com/gfx-rs/naga?rev=df8107b7#df8107b78812cc2b1e3d5de35279cedc1f0da3fb" dependencies = [ "bit-set", "bitflags 2.4.0", @@ -2127,9 +2126,9 @@ dependencies = [ [[package]] name = "profiling" -version = "1.0.10" +version = "1.0.11" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "45f10e75d83c7aec79a6aa46f897075890e156b105eebe51cfa0abce51af025f" +checksum = "f89dff0959d98c9758c88826cc002e2c3d0b9dfac4139711d1f30de442f1139b" [[package]] name = "quote" @@ -2532,9 +2531,9 @@ dependencies = [ [[package]] name = "smallvec" -version = "1.11.0" +version = "1.11.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "62bb4feee49fdd9f707ef802e22365a35de4b7b299de4763d44bfea899442ff9" +checksum = "942b4a808e05215192e39f4ab80813e599068285906cc91aa64f923db842bd5a" [[package]] name = "smithay-client-toolkit" @@ -2633,9 +2632,9 @@ dependencies = [ [[package]] name = "termcolor" -version = "1.2.0" +version = "1.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "be55cf8942feac5c765c2c993422806843c9a9a45d4d5c407ad6dd2ea95eb9b6" +checksum = "6093bad37da69aab9d123a8091e4be0aa4a03e4d601ec641c327398315f62b64" dependencies = [ "winapi-util", ] diff --git a/Cargo.toml b/Cargo.toml index 55c6048b86..10378fc231 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -51,7 +51,7 @@ version = "0.17" [workspace.dependencies.naga] git = "https://github.com/gfx-rs/naga" -rev = "cc87b8f9eb30bb55d0735b89d3df3e099e1a6e7c" +rev = "df8107b7" version = "0.13.0" [workspace.dependencies] @@ -142,7 +142,7 @@ deno_web = "0.137.0" deno_webidl = "0.106.0" deno_webgpu = { path = "./deno_webgpu" } tokio = "1.32.0" -termcolor = "1.2.0" +termcolor = "1.3.0" [patch."https://github.com/gfx-rs/naga"] #naga = { path = "../naga" } @@ -158,6 +158,8 @@ termcolor = "1.2.0" #glow = { path = "../glow" } #d3d12 = { path = "../d3d12-rs" } #metal = { path = "../metal-rs" } +#metal = { path = "../metal-rs" } +metal = { git = "https://github.com/gfx-rs/metal-rs/", rev = "d24f1a4" } # More timer support via https://github.com/gfx-rs/metal-rs/pull/280 #web-sys = { path = "../wasm-bindgen/crates/web-sys" } #js-sys = { path = "../wasm-bindgen/crates/js-sys" } #wasm-bindgen = { path = "../wasm-bindgen" } diff --git a/deno_webgpu/lib.rs b/deno_webgpu/lib.rs index 92a6a51334..bf61e42517 100644 --- a/deno_webgpu/lib.rs +++ b/deno_webgpu/lib.rs @@ -365,6 +365,9 @@ fn deserialize_features(features: &wgpu_types::Features) -> Vec<&'static str> { if features.contains(wgpu_types::Features::SHADER_EARLY_DEPTH_TEST) { return_features.push("shader-early-depth-test"); } + if features.contains(wgpu_types::Features::SHADER_UNUSED_VERTEX_OUTPUT) { + return_features.push("shader-unused-vertex-output"); + } return_features } @@ -625,6 +628,10 @@ impl From for wgpu_types::Features { wgpu_types::Features::SHADER_EARLY_DEPTH_TEST, required_features.0.contains("shader-early-depth-test"), ); + features.set( + wgpu_types::Features::SHADER_UNUSED_VERTEX_OUTPUT, + required_features.0.contains("shader-unused-vertex-output"), + ); features } diff --git a/examples/boids/src/main.rs b/examples/boids/src/main.rs index e8aa2f71fd..eb5146f8bd 100644 --- a/examples/boids/src/main.rs +++ b/examples/boids/src/main.rs @@ -276,7 +276,7 @@ impl wgpu_example::framework::Example for Example { // Not clearing here in order to test wgpu's zero texture initialization on a surface texture. // Users should avoid loading uninitialized memory since this can cause additional overhead. load: wgpu::LoadOp::Load, - store: true, + store: wgpu::StoreOp::Store, }, })]; let render_pass_descriptor = wgpu::RenderPassDescriptor { diff --git a/examples/bunnymark/src/main.rs b/examples/bunnymark/src/main.rs index 256083eebb..ca8dbbee06 100644 --- a/examples/bunnymark/src/main.rs +++ b/examples/bunnymark/src/main.rs @@ -332,7 +332,7 @@ impl wgpu_example::framework::Example for Example { resolve_target: None, ops: wgpu::Operations { load: wgpu::LoadOp::Clear(clear_color), - store: true, + store: wgpu::StoreOp::Store, }, })], depth_stencil_attachment: None, diff --git a/examples/capture/src/main.rs b/examples/capture/src/main.rs index b783b3af80..47a453de6b 100644 --- a/examples/capture/src/main.rs +++ b/examples/capture/src/main.rs @@ -101,7 +101,7 @@ async fn create_red_image_with_dimensions( resolve_target: None, ops: wgpu::Operations { load: wgpu::LoadOp::Clear(wgpu::Color::RED), - store: true, + store: wgpu::StoreOp::Store, }, })], depth_stencil_attachment: None, diff --git a/examples/conservative-raster/src/main.rs b/examples/conservative-raster/src/main.rs index e5cfb4d775..093740a206 100644 --- a/examples/conservative-raster/src/main.rs +++ b/examples/conservative-raster/src/main.rs @@ -269,7 +269,7 @@ impl wgpu_example::framework::Example for Example { resolve_target: None, ops: wgpu::Operations { load: wgpu::LoadOp::Clear(wgpu::Color::BLACK), - store: true, + store: wgpu::StoreOp::Store, }, })], depth_stencil_attachment: None, @@ -290,7 +290,7 @@ impl wgpu_example::framework::Example for Example { resolve_target: None, ops: wgpu::Operations { load: wgpu::LoadOp::Clear(wgpu::Color::BLACK), - store: true, + store: wgpu::StoreOp::Store, }, })], depth_stencil_attachment: None, diff --git a/examples/cube/src/main.rs b/examples/cube/src/main.rs index a10dfd0fd0..b031e1004c 100644 --- a/examples/cube/src/main.rs +++ b/examples/cube/src/main.rs @@ -375,7 +375,7 @@ impl wgpu_example::framework::Example for Example { b: 0.3, a: 1.0, }), - store: true, + store: wgpu::StoreOp::Store, }, })], depth_stencil_attachment: None, diff --git a/examples/hello-triangle/src/main.rs b/examples/hello-triangle/src/main.rs index c5432acd07..ebb8f6b736 100644 --- a/examples/hello-triangle/src/main.rs +++ b/examples/hello-triangle/src/main.rs @@ -118,7 +118,7 @@ async fn run(event_loop: EventLoop<()>, window: Window) { resolve_target: None, ops: wgpu::Operations { load: wgpu::LoadOp::Clear(wgpu::Color::GREEN), - store: true, + store: wgpu::StoreOp::Store, }, })], depth_stencil_attachment: None, diff --git a/examples/hello-windows/src/main.rs b/examples/hello-windows/src/main.rs index f368804c36..ba28341395 100644 --- a/examples/hello-windows/src/main.rs +++ b/examples/hello-windows/src/main.rs @@ -131,7 +131,7 @@ async fn run(event_loop: EventLoop<()>, viewports: Vec<(Window, wgpu::Color)>) { resolve_target: None, ops: wgpu::Operations { load: wgpu::LoadOp::Clear(viewport.desc.background), - store: true, + store: wgpu::StoreOp::Store, }, })], depth_stencil_attachment: None, diff --git a/examples/mipmap/src/main.rs b/examples/mipmap/src/main.rs index a85110ff14..5536579b0b 100644 --- a/examples/mipmap/src/main.rs +++ b/examples/mipmap/src/main.rs @@ -163,7 +163,7 @@ impl Example { resolve_target: None, ops: wgpu::Operations { load: wgpu::LoadOp::Clear(wgpu::Color::WHITE), - store: true, + store: wgpu::StoreOp::Store, }, })], depth_stencil_attachment: None, @@ -490,7 +490,7 @@ impl wgpu_example::framework::Example for Example { resolve_target: None, ops: wgpu::Operations { load: wgpu::LoadOp::Clear(clear_color), - store: true, + store: wgpu::StoreOp::Store, }, })], depth_stencil_attachment: None, diff --git a/examples/msaa-line/src/main.rs b/examples/msaa-line/src/main.rs index aa7a277418..07cc4eaf57 100644 --- a/examples/msaa-line/src/main.rs +++ b/examples/msaa-line/src/main.rs @@ -282,7 +282,7 @@ impl wgpu_example::framework::Example for Example { resolve_target: None, ops: wgpu::Operations { load: wgpu::LoadOp::Clear(wgpu::Color::BLACK), - store: true, + store: wgpu::StoreOp::Store, }, } } else { @@ -293,7 +293,7 @@ impl wgpu_example::framework::Example for Example { load: wgpu::LoadOp::Clear(wgpu::Color::BLACK), // Storing pre-resolve MSAA data is unnecessary if it isn't used later. // On tile-based GPU, avoid store can reduce your app's memory footprint. - store: false, + store: wgpu::StoreOp::Discard, }, } }; diff --git a/examples/shadow/src/main.rs b/examples/shadow/src/main.rs index 3f963d0c53..c63076e6ac 100644 --- a/examples/shadow/src/main.rs +++ b/examples/shadow/src/main.rs @@ -773,7 +773,7 @@ impl wgpu_example::framework::Example for Example { view: &light.target_view, depth_ops: Some(wgpu::Operations { load: wgpu::LoadOp::Clear(1.0), - store: true, + store: wgpu::StoreOp::Store, }), stencil_ops: None, }), @@ -810,14 +810,14 @@ impl wgpu_example::framework::Example for Example { b: 0.3, a: 1.0, }), - store: true, + store: wgpu::StoreOp::Store, }, })], depth_stencil_attachment: Some(wgpu::RenderPassDepthStencilAttachment { view: &self.forward_depth, depth_ops: Some(wgpu::Operations { load: wgpu::LoadOp::Clear(1.0), - store: false, + store: wgpu::StoreOp::Discard, }), stencil_ops: None, }), diff --git a/examples/skybox/src/main.rs b/examples/skybox/src/main.rs index d09622f53c..5d91c62865 100644 --- a/examples/skybox/src/main.rs +++ b/examples/skybox/src/main.rs @@ -428,14 +428,14 @@ impl wgpu_example::framework::Example for Skybox { b: 0.3, a: 1.0, }), - store: true, + store: wgpu::StoreOp::Store, }, })], depth_stencil_attachment: Some(wgpu::RenderPassDepthStencilAttachment { view: &self.depth_view, depth_ops: Some(wgpu::Operations { load: wgpu::LoadOp::Clear(1.0), - store: false, + store: wgpu::StoreOp::Discard, }), stencil_ops: None, }), diff --git a/examples/stencil-triangles/src/main.rs b/examples/stencil-triangles/src/main.rs index 55aad9c9ba..9d918500d5 100644 --- a/examples/stencil-triangles/src/main.rs +++ b/examples/stencil-triangles/src/main.rs @@ -200,7 +200,7 @@ impl wgpu_example::framework::Example for Triangles { b: 0.3, a: 1.0, }), - store: true, + store: wgpu::StoreOp::Store, }, })], depth_stencil_attachment: Some(wgpu::RenderPassDepthStencilAttachment { @@ -208,7 +208,7 @@ impl wgpu_example::framework::Example for Triangles { depth_ops: None, stencil_ops: Some(wgpu::Operations { load: wgpu::LoadOp::Clear(0), - store: true, + store: wgpu::StoreOp::Store, }), }), timestamp_writes: None, diff --git a/examples/texture-arrays/src/main.rs b/examples/texture-arrays/src/main.rs index 373c2396ae..af9cfa56ff 100644 --- a/examples/texture-arrays/src/main.rs +++ b/examples/texture-arrays/src/main.rs @@ -379,7 +379,7 @@ impl wgpu_example::framework::Example for Example { resolve_target: None, ops: wgpu::Operations { load: wgpu::LoadOp::Clear(wgpu::Color::BLACK), - store: true, + store: wgpu::StoreOp::Store, }, })], depth_stencil_attachment: None, diff --git a/examples/timestamp-queries/src/main.rs b/examples/timestamp-queries/src/main.rs index 3479122c79..d463ea6579 100644 --- a/examples/timestamp-queries/src/main.rs +++ b/examples/timestamp-queries/src/main.rs @@ -47,6 +47,7 @@ impl QueryResults { // * compute end const NUM_QUERIES: u64 = 8; + #[allow(clippy::redundant_closure)] // False positive fn from_raw_results(timestamps: Vec, timestamps_inside_passes: bool) -> Self { assert_eq!(timestamps.len(), Self::NUM_QUERIES as usize); @@ -60,9 +61,9 @@ impl QueryResults { let mut encoder_timestamps = [0, 0]; encoder_timestamps[0] = get_next_slot(); let render_start_end_timestamps = [get_next_slot(), get_next_slot()]; - let render_inside_timestamp = timestamps_inside_passes.then_some(get_next_slot()); + let render_inside_timestamp = timestamps_inside_passes.then(|| get_next_slot()); let compute_start_end_timestamps = [get_next_slot(), get_next_slot()]; - let compute_inside_timestamp = timestamps_inside_passes.then_some(get_next_slot()); + let compute_inside_timestamp = timestamps_inside_passes.then(|| get_next_slot()); encoder_timestamps[1] = get_next_slot(); QueryResults { @@ -79,8 +80,8 @@ impl QueryResults { let elapsed_us = |start, end: u64| end.wrapping_sub(start) as f64 * period as f64 / 1000.0; println!( - "Elapsed time render + compute: {:.2} μs", - elapsed_us(self.encoder_timestamps[0], self.encoder_timestamps[1]) + "Elapsed time before render until after compute: {:.2} μs", + elapsed_us(self.encoder_timestamps[0], self.encoder_timestamps[1]), ); println!( "Elapsed time render pass: {:.2} μs", @@ -374,7 +375,7 @@ fn render_pass( resolve_target: None, ops: wgpu::Operations { load: wgpu::LoadOp::Clear(wgpu::Color::GREEN), - store: true, + store: wgpu::StoreOp::Store, }, })], depth_stencil_attachment: None, @@ -464,13 +465,10 @@ mod tests { render_start_end_timestamps[1].wrapping_sub(render_start_end_timestamps[0]); let compute_delta = compute_start_end_timestamps[1].wrapping_sub(compute_start_end_timestamps[0]); + let encoder_delta = encoder_timestamps[1].wrapping_sub(encoder_timestamps[0]); - // TODO: Metal encoder timestamps aren't implemented yet. - if ctx.adapter.get_info().backend != wgpu::Backend::Metal { - let encoder_delta = encoder_timestamps[1].wrapping_sub(encoder_timestamps[0]); - assert!(encoder_delta > 0); - assert!(encoder_delta >= render_delta + compute_delta); - } + assert!(encoder_delta > 0); + assert!(encoder_delta >= render_delta + compute_delta); if let Some(render_inside_timestamp) = render_inside_timestamp { assert!(render_inside_timestamp >= render_start_end_timestamps[0]); diff --git a/examples/water/src/main.rs b/examples/water/src/main.rs index 5d5daa1f59..da9f1238ab 100644 --- a/examples/water/src/main.rs +++ b/examples/water/src/main.rs @@ -739,7 +739,7 @@ impl wgpu_example::framework::Example for Example { resolve_target: None, ops: wgpu::Operations { load: wgpu::LoadOp::Clear(back_color), - store: true, + store: wgpu::StoreOp::Store, }, })], // We still need to use the depth buffer here @@ -748,7 +748,7 @@ impl wgpu_example::framework::Example for Example { view: &self.depth_buffer, depth_ops: Some(wgpu::Operations { load: wgpu::LoadOp::Clear(1.0), - store: true, + store: wgpu::StoreOp::Store, }), stencil_ops: None, }), @@ -768,14 +768,14 @@ impl wgpu_example::framework::Example for Example { resolve_target: None, ops: wgpu::Operations { load: wgpu::LoadOp::Clear(back_color), - store: true, + store: wgpu::StoreOp::Store, }, })], depth_stencil_attachment: Some(wgpu::RenderPassDepthStencilAttachment { view: &self.depth_buffer, depth_ops: Some(wgpu::Operations { load: wgpu::LoadOp::Clear(1.0), - store: true, + store: wgpu::StoreOp::Store, }), stencil_ops: None, }), @@ -797,7 +797,7 @@ impl wgpu_example::framework::Example for Example { resolve_target: None, ops: wgpu::Operations { load: wgpu::LoadOp::Load, - store: true, + store: wgpu::StoreOp::Store, }, })], depth_stencil_attachment: Some(wgpu::RenderPassDepthStencilAttachment { diff --git a/tests/src/lib.rs b/tests/src/lib.rs index 236b353386..c506126708 100644 --- a/tests/src/lib.rs +++ b/tests/src/lib.rs @@ -449,7 +449,7 @@ pub fn initialize_test(parameters: TestParameters, test_function: impl FnOnce(Te } } -fn initialize_adapter() -> (Adapter, Option) { +pub fn initialize_adapter() -> (Adapter, Option) { let instance = initialize_instance(); let surface_guard: Option; let compatible_surface; diff --git a/tests/tests/device.rs b/tests/tests/device.rs index f43791f86e..7964f2afdb 100644 --- a/tests/tests/device.rs +++ b/tests/tests/device.rs @@ -40,3 +40,54 @@ fn device_mismatch() { }, ); } + +#[cfg(not(all(target_arch = "wasm32", not(target_os = "emscripten"))))] +#[test] +fn request_device_error_on_native() { + pollster::block_on(request_device_error_message()); +} + +/// Check that `RequestDeviceError`s produced have some diagnostic information. +/// +/// Note: this is a wasm *and* native test. On wasm it is run directly; on native, indirectly +#[wasm_bindgen_test::wasm_bindgen_test] +async fn request_device_error_message() { + // Not using initialize_test() because that doesn't let us catch the error + // nor .await anything + let (adapter, _surface_guard) = wgpu_test::initialize_adapter(); + + let device_error = adapter + .request_device( + &wgpu::DeviceDescriptor { + // Force a failure by requesting absurd limits. + features: wgpu::Features::all(), + limits: wgpu::Limits { + max_texture_dimension_1d: u32::MAX, + max_texture_dimension_2d: u32::MAX, + max_texture_dimension_3d: u32::MAX, + max_bind_groups: u32::MAX, + max_push_constant_size: u32::MAX, + ..Default::default() + }, + ..Default::default() + }, + None, + ) + .await + .unwrap_err(); + + let device_error = device_error.to_string(); + cfg_if::cfg_if! { + if #[cfg(all(target_arch = "wasm32", not(feature = "webgl")))] { + // On WebGPU, so the error we get will be from the browser WebGPU API. + // Per the WebGPU specification this should be a `TypeError` when features are not + // available, , + // and the stringification it goes through for Rust should put that in the message. + let expected = "TypeError"; + } else { + // This message appears whenever wgpu-core is used as the implementation. + let expected = "Unsupported features were requested: Features("; + } + } + assert!(device_error.contains(expected), "{device_error}"); +} diff --git a/tests/tests/occlusion_query/mod.rs b/tests/tests/occlusion_query/mod.rs index eab0828e41..7747eaa624 100644 --- a/tests/tests/occlusion_query/mod.rs +++ b/tests/tests/occlusion_query/mod.rs @@ -69,7 +69,7 @@ fn occlusion_query() { view: &depth_texture_view, depth_ops: Some(wgpu::Operations { load: wgpu::LoadOp::Clear(1.0), - store: true, + store: wgpu::StoreOp::Store, }), stencil_ops: None, }), diff --git a/tests/tests/query_set.rs b/tests/tests/query_set.rs new file mode 100644 index 0000000000..16e5094089 --- /dev/null +++ b/tests/tests/query_set.rs @@ -0,0 +1,26 @@ +use wgpu_test::{initialize_test, FailureCase, TestParameters}; + +#[test] +fn drop_failed_timestamp_query_set() { + let parameters = TestParameters::default() + // https://github.com/gfx-rs/wgpu/issues/4139 + .expect_fail(FailureCase::always()); + initialize_test(parameters, |ctx| { + // Enter an error scope, so the validation catch-all doesn't + // report the error too early. + ctx.device.push_error_scope(wgpu::ErrorFilter::Validation); + + // Creating this query set should fail, since we didn't include + // TIMESTAMP_QUERY in our required features. + let bad_query_set = ctx.device.create_query_set(&wgpu::QuerySetDescriptor { + label: Some("doomed query set"), + ty: wgpu::QueryType::Timestamp, + count: 1, + }); + + // Dropping this should not panic. + drop(bad_query_set); + + assert!(pollster::block_on(ctx.device.pop_error_scope()).is_some()); + }); +} diff --git a/tests/tests/regression/issue_3457.rs b/tests/tests/regression/issue_3457.rs index 2dccd3d427..0d2c086ed5 100644 --- a/tests/tests/regression/issue_3457.rs +++ b/tests/tests/regression/issue_3457.rs @@ -140,7 +140,7 @@ fn pass_reset_vertex_buffer() { resolve_target: None, ops: Operations { load: LoadOp::Clear(Color::BLACK), - store: false, + store: StoreOp::Discard, }, })], depth_stencil_attachment: None, @@ -175,7 +175,7 @@ fn pass_reset_vertex_buffer() { resolve_target: None, ops: Operations { load: LoadOp::Clear(Color::BLACK), - store: false, + store: StoreOp::Discard, }, })], depth_stencil_attachment: None, diff --git a/tests/tests/root.rs b/tests/tests/root.rs index 85901ae491..b2695fd827 100644 --- a/tests/tests/root.rs +++ b/tests/tests/root.rs @@ -20,6 +20,7 @@ mod instance; mod occlusion_query; mod partially_bounded_arrays; mod poll; +mod query_set; mod queue_transfer; mod resource_descriptor_accessor; mod resource_error; diff --git a/tests/tests/scissor_tests/mod.rs b/tests/tests/scissor_tests/mod.rs index da050cb61f..a921827e0d 100644 --- a/tests/tests/scissor_tests/mod.rs +++ b/tests/tests/scissor_tests/mod.rs @@ -75,7 +75,7 @@ fn scissor_test_impl(ctx: &TestingContext, scissor_rect: Rect, expected_data: [u b: 0.0, a: 0.0, }), - store: true, + store: wgpu::StoreOp::Store, }, })], depth_stencil_attachment: None, diff --git a/tests/tests/shader_primitive_index/mod.rs b/tests/tests/shader_primitive_index/mod.rs index a05d1cd5f0..2739b2e77d 100644 --- a/tests/tests/shader_primitive_index/mod.rs +++ b/tests/tests/shader_primitive_index/mod.rs @@ -180,7 +180,7 @@ fn pulling_common( color_attachments: &[Some(wgpu::RenderPassColorAttachment { ops: wgpu::Operations { load: wgpu::LoadOp::Clear(wgpu::Color::WHITE), - store: true, + store: wgpu::StoreOp::Store, }, resolve_target: None, view: &color_view, diff --git a/tests/tests/zero_init_texture_after_discard.rs b/tests/tests/zero_init_texture_after_discard.rs index 2b757e069a..e47f1aa0fa 100644 --- a/tests/tests/zero_init_texture_after_discard.rs +++ b/tests/tests/zero_init_texture_after_discard.rs @@ -155,11 +155,11 @@ impl<'ctx> TestCase<'ctx> { view: &texture.create_view(&TextureViewDescriptor::default()), depth_ops: format.has_depth_aspect().then_some(Operations { load: LoadOp::Clear(1.0), - store: true, + store: StoreOp::Store, }), stencil_ops: format.has_stencil_aspect().then_some(Operations { load: LoadOp::Clear(0xFFFFFFFF), - store: true, + store: StoreOp::Store, }), }), timestamp_writes: None, @@ -230,7 +230,7 @@ impl<'ctx> TestCase<'ctx> { resolve_target: None, ops: Operations { load: LoadOp::Load, - store: false, // discard! + store: StoreOp::Discard, }, }, )], @@ -239,11 +239,11 @@ impl<'ctx> TestCase<'ctx> { view: &self.texture.create_view(&TextureViewDescriptor::default()), depth_ops: self.format.has_depth_aspect().then_some(Operations { load: LoadOp::Load, - store: false, // discard! + store: StoreOp::Discard, }), stencil_ops: self.format.has_stencil_aspect().then_some(Operations { load: LoadOp::Load, - store: false, // discard! + store: StoreOp::Discard, }), }, ), @@ -264,11 +264,11 @@ impl<'ctx> TestCase<'ctx> { view: &self.texture.create_view(&TextureViewDescriptor::default()), depth_ops: Some(Operations { load: LoadOp::Load, - store: false, // discard! + store: StoreOp::Discard, }), stencil_ops: self.format.has_stencil_aspect().then_some(Operations { load: LoadOp::Clear(0), - store: true, + store: StoreOp::Store, }), }, ), @@ -289,11 +289,11 @@ impl<'ctx> TestCase<'ctx> { view: &self.texture.create_view(&TextureViewDescriptor::default()), depth_ops: self.format.has_depth_aspect().then_some(Operations { load: LoadOp::Clear(0.0), - store: true, + store: StoreOp::Store, }), stencil_ops: Some(Operations { load: LoadOp::Load, - store: false, // discard! + store: StoreOp::Discard, }), }, ), diff --git a/wgpu-core/Cargo.toml b/wgpu-core/Cargo.toml index 5487a8bdc0..19bc3ad64d 100644 --- a/wgpu-core/Cargo.toml +++ b/wgpu-core/Cargo.toml @@ -72,7 +72,7 @@ thiserror = "1" [dependencies.naga] git = "https://github.com/gfx-rs/naga" -rev = "cc87b8f9eb30bb55d0735b89d3df3e099e1a6e7c" +rev = "df8107b7" version = "0.13.0" features = ["clone", "span", "validate"] diff --git a/wgpu-core/src/device/resource.rs b/wgpu-core/src/device/resource.rs index 73f1887e10..8acc34acf4 100644 --- a/wgpu-core/src/device/resource.rs +++ b/wgpu-core/src/device/resource.rs @@ -1276,6 +1276,10 @@ impl Device { .flags .contains(wgt::DownlevelFlags::MULTISAMPLED_SHADING), ); + caps.set( + Caps::DUAL_SOURCE_BLENDING, + self.features.contains(wgt::Features::DUAL_SOURCE_BLENDING), + ); let info = naga::valid::Validator::new(naga::valid::ValidationFlags::all(), caps) .validate(&module) @@ -1286,7 +1290,8 @@ impl Device { inner: Box::new(inner), }) })?; - let interface = validation::Interface::new(&module, &info, self.limits.clone()); + let interface = + validation::Interface::new(&module, &info, self.limits.clone(), self.features); let hal_shader = hal::ShaderInput::Naga(hal::NagaShader { module, info }); let hal_desc = hal::ShaderModuleDescriptor { @@ -2560,6 +2565,8 @@ impl Device { let mut vertex_steps = Vec::with_capacity(desc.vertex.buffers.len()); let mut vertex_buffers = Vec::with_capacity(desc.vertex.buffers.len()); let mut total_attributes = 0; + let mut shader_expects_dual_source_blending = false; + let mut pipeline_expects_dual_source_blending = false; for (i, vb_state) in desc.vertex.buffers.iter().enumerate() { vertex_steps.push(pipeline::VertexStep { stride: vb_state.array_stride, @@ -2700,7 +2707,25 @@ impl Device { { break Some(pipeline::ColorStateError::FormatNotMultisampled(cs.format)); } - + if let Some(blend_mode) = cs.blend { + for factor in [ + blend_mode.color.src_factor, + blend_mode.color.dst_factor, + blend_mode.alpha.src_factor, + blend_mode.alpha.dst_factor, + ] { + if factor.ref_second_blend_source() { + self.require_features(wgt::Features::DUAL_SOURCE_BLENDING)?; + if i == 0 { + pipeline_expects_dual_source_blending = true; + break; + } else { + return Err(crate::pipeline::CreateRenderPipelineError + ::BlendFactorOnUnsupportedTarget { factor, target: i as u32 }); + } + } + } + } break None; }; if let Some(e) = error { @@ -2857,6 +2882,15 @@ impl Device { } } + if let Some(ref interface) = shader_module.interface { + shader_expects_dual_source_blending = interface + .fragment_uses_dual_source_blending(&fragment.stage.entry_point) + .map_err(|error| pipeline::CreateRenderPipelineError::Stage { + stage: flag, + error, + })?; + } + Some(hal::ProgrammableStage { module: &shader_module.raw, entry_point: fragment.stage.entry_point.as_ref(), @@ -2865,6 +2899,17 @@ impl Device { None => None, }; + if !pipeline_expects_dual_source_blending && shader_expects_dual_source_blending { + return Err( + pipeline::CreateRenderPipelineError::ShaderExpectsPipelineToUseDualSourceBlending, + ); + } + if pipeline_expects_dual_source_blending && !shader_expects_dual_source_blending { + return Err( + pipeline::CreateRenderPipelineError::PipelineExpectsShaderToUseDualSourceBlending, + ); + } + if validated_stages.contains(wgt::ShaderStages::FRAGMENT) { for (i, output) in io.iter() { match color_targets.get(*i as usize) { diff --git a/wgpu-core/src/pipeline.rs b/wgpu-core/src/pipeline.rs index da06b652ea..c78a79820d 100644 --- a/wgpu-core/src/pipeline.rs +++ b/wgpu-core/src/pipeline.rs @@ -384,6 +384,15 @@ pub enum CreateRenderPipelineError { }, #[error("In the provided shader, the type given for group {group} binding {binding} has a size of {size}. As the device does not support `DownlevelFlags::BUFFER_BINDINGS_NOT_16_BYTE_ALIGNED`, the type must have a size that is a multiple of 16 bytes.")] UnalignedShader { group: u32, binding: u32, size: u64 }, + #[error("Using the blend factor {factor:?} for render target {target} is not possible. Only the first render target may be used when dual-source blending.")] + BlendFactorOnUnsupportedTarget { + factor: wgt::BlendFactor, + target: u32, + }, + #[error("Pipeline expects the shader entry point to make use of dual-source blending.")] + PipelineExpectsShaderToUseDualSourceBlending, + #[error("Shader entry point expects the pipeline to make use of dual-source blending.")] + ShaderExpectsPipelineToUseDualSourceBlending, } bitflags::bitflags! { diff --git a/wgpu-core/src/validation.rs b/wgpu-core/src/validation.rs index e3ecb916d3..ef5c65ed00 100644 --- a/wgpu-core/src/validation.rs +++ b/wgpu-core/src/validation.rs @@ -116,11 +116,13 @@ struct EntryPoint { spec_constants: Vec, sampling_pairs: FastHashSet<(naga::Handle, naga::Handle)>, workgroup_size: [u32; 3], + dual_source_blending: bool, } #[derive(Debug)] pub struct Interface { limits: wgt::Limits, + features: wgt::Features, resources: naga::Arena, entry_points: FastHashMap<(naga::ShaderStage, String), EntryPoint>, } @@ -830,7 +832,12 @@ impl Interface { list.push(varying); } - pub fn new(module: &naga::Module, info: &naga::valid::ModuleInfo, limits: wgt::Limits) -> Self { + pub fn new( + module: &naga::Module, + info: &naga::valid::ModuleInfo, + limits: wgt::Limits, + features: wgt::Features, + ) -> Self { let mut resources = naga::Arena::new(); let mut resource_mapping = FastHashMap::default(); for (var_handle, var) in module.global_variables.iter() { @@ -903,7 +910,7 @@ impl Interface { ep.sampling_pairs .insert((resource_mapping[&key.image], resource_mapping[&key.sampler])); } - + ep.dual_source_blending = info.dual_source_blending; ep.workgroup_size = entry_point.workgroup_size; entry_points.insert((entry_point.stage, entry_point.name.clone()), ep); @@ -911,6 +918,7 @@ impl Interface { Self { limits, + features, resources, entry_points, } @@ -1120,7 +1128,12 @@ impl Interface { } // Check all vertex outputs and make sure the fragment shader consumes them. - if shader_stage == naga::ShaderStage::Fragment { + // This requirement is removed if the `SHADER_UNUSED_VERTEX_OUTPUT` feature is enabled. + if shader_stage == naga::ShaderStage::Fragment + && !self + .features + .contains(wgt::Features::SHADER_UNUSED_VERTEX_OUTPUT) + { for &index in inputs.keys() { // This is a linear scan, but the count should be low enough // that this should be fine. @@ -1177,4 +1190,15 @@ impl Interface { .collect(); Ok(outputs) } + + pub fn fragment_uses_dual_source_blending( + &self, + entry_point_name: &str, + ) -> Result { + let pair = (naga::ShaderStage::Fragment, entry_point_name.to_string()); + self.entry_points + .get(&pair) + .ok_or(StageError::MissingEntryPoint(pair.1)) + .map(|ep| ep.dual_source_blending) + } } diff --git a/wgpu-hal/Cargo.toml b/wgpu-hal/Cargo.toml index 225f18256a..3db7363616 100644 --- a/wgpu-hal/Cargo.toml +++ b/wgpu-hal/Cargo.toml @@ -120,14 +120,14 @@ android_system_properties = "0.1.1" [dependencies.naga] git = "https://github.com/gfx-rs/naga" -rev = "cc87b8f9eb30bb55d0735b89d3df3e099e1a6e7c" +rev = "df8107b7" version = "0.13.0" features = ["clone"] # DEV dependencies [dev-dependencies.naga] git = "https://github.com/gfx-rs/naga" -rev = "cc87b8f9eb30bb55d0735b89d3df3e099e1a6e7c" +rev = "df8107b7" version = "0.13.0" features = ["wgsl-in"] diff --git a/wgpu-hal/src/dx12/adapter.rs b/wgpu-hal/src/dx12/adapter.rs index 2e4dff09fb..10c067efb5 100644 --- a/wgpu-hal/src/dx12/adapter.rs +++ b/wgpu-hal/src/dx12/adapter.rs @@ -250,7 +250,9 @@ impl super::Adapter { | wgt::Features::TEXTURE_FORMAT_16BIT_NORM | wgt::Features::PUSH_CONSTANTS | wgt::Features::SHADER_PRIMITIVE_INDEX - | wgt::Features::RG11B10UFLOAT_RENDERABLE; + | wgt::Features::RG11B10UFLOAT_RENDERABLE + | wgt::Features::DUAL_SOURCE_BLENDING; + //TODO: in order to expose this, we need to run a compute shader // that extract the necessary statistics out of the D3D12 result. // Alternatively, we could allocate a buffer for the query set, diff --git a/wgpu-hal/src/dx12/conv.rs b/wgpu-hal/src/dx12/conv.rs index 8b44ae9c4b..f484d1a9e2 100644 --- a/wgpu-hal/src/dx12/conv.rs +++ b/wgpu-hal/src/dx12/conv.rs @@ -222,6 +222,10 @@ pub fn map_polygon_mode(mode: wgt::PolygonMode) -> d3d12_ty::D3D12_FILL_MODE { } } +/// D3D12 doesn't support passing factors ending in `_COLOR` for alpha blending +/// (see https://learn.microsoft.com/en-us/windows/win32/api/d3d12/ns-d3d12-d3d12_render_target_blend_desc). +/// Therefore this function takes an additional `is_alpha` argument +/// which if set will return an equivalent `_ALPHA` factor. fn map_blend_factor(factor: wgt::BlendFactor, is_alpha: bool) -> d3d12_ty::D3D12_BLEND { use wgt::BlendFactor as Bf; match factor { @@ -242,12 +246,12 @@ fn map_blend_factor(factor: wgt::BlendFactor, is_alpha: bool) -> d3d12_ty::D3D12 Bf::Constant => d3d12_ty::D3D12_BLEND_BLEND_FACTOR, Bf::OneMinusConstant => d3d12_ty::D3D12_BLEND_INV_BLEND_FACTOR, Bf::SrcAlphaSaturated => d3d12_ty::D3D12_BLEND_SRC_ALPHA_SAT, - //Bf::Src1Color if is_alpha => d3d12_ty::D3D12_BLEND_SRC1_ALPHA, - //Bf::Src1Color => d3d12_ty::D3D12_BLEND_SRC1_COLOR, - //Bf::OneMinusSrc1Color if is_alpha => d3d12_ty::D3D12_BLEND_INV_SRC1_ALPHA, - //Bf::OneMinusSrc1Color => d3d12_ty::D3D12_BLEND_INV_SRC1_COLOR, - //Bf::Src1Alpha => d3d12_ty::D3D12_BLEND_SRC1_ALPHA, - //Bf::OneMinusSrc1Alpha => d3d12_ty::D3D12_BLEND_INV_SRC1_ALPHA, + Bf::Src1 if is_alpha => d3d12_ty::D3D12_BLEND_SRC1_ALPHA, + Bf::Src1 => d3d12_ty::D3D12_BLEND_SRC1_COLOR, + Bf::OneMinusSrc1 if is_alpha => d3d12_ty::D3D12_BLEND_INV_SRC1_ALPHA, + Bf::OneMinusSrc1 => d3d12_ty::D3D12_BLEND_INV_SRC1_COLOR, + Bf::Src1Alpha => d3d12_ty::D3D12_BLEND_SRC1_ALPHA, + Bf::OneMinusSrc1Alpha => d3d12_ty::D3D12_BLEND_INV_SRC1_ALPHA, } } diff --git a/wgpu-hal/src/gles/adapter.rs b/wgpu-hal/src/gles/adapter.rs index 80558ba5a9..96215ecbd9 100644 --- a/wgpu-hal/src/gles/adapter.rs +++ b/wgpu-hal/src/gles/adapter.rs @@ -363,11 +363,16 @@ impl super::Adapter { wgt::Features::MULTIVIEW, extensions.contains("OVR_multiview2"), ); + features.set( + wgt::Features::DUAL_SOURCE_BLENDING, + extensions.contains("GL_EXT_blend_func_extended"), + ); features.set( wgt::Features::SHADER_PRIMITIVE_INDEX, ver >= (3, 2) || extensions.contains("OES_geometry_shader"), ); features.set(wgt::Features::SHADER_EARLY_DEPTH_TEST, ver >= (3, 1)); + features.set(wgt::Features::SHADER_UNUSED_VERTEX_OUTPUT, true); let gles_bcn_exts = [ "GL_EXT_texture_compression_s3tc_srgb", "GL_EXT_texture_compression_rgtc", diff --git a/wgpu-hal/src/gles/conv.rs b/wgpu-hal/src/gles/conv.rs index dd5d764c6a..9bfac022a1 100644 --- a/wgpu-hal/src/gles/conv.rs +++ b/wgpu-hal/src/gles/conv.rs @@ -376,6 +376,10 @@ fn map_blend_factor(factor: wgt::BlendFactor) -> u32 { Bf::Constant => glow::CONSTANT_COLOR, Bf::OneMinusConstant => glow::ONE_MINUS_CONSTANT_COLOR, Bf::SrcAlphaSaturated => glow::SRC_ALPHA_SATURATE, + Bf::Src1 => glow::SRC1_COLOR, + Bf::OneMinusSrc1 => glow::ONE_MINUS_SRC1_COLOR, + Bf::Src1Alpha => glow::SRC1_ALPHA, + Bf::OneMinusSrc1Alpha => glow::ONE_MINUS_SRC1_ALPHA, } } diff --git a/wgpu-hal/src/metal/adapter.rs b/wgpu-hal/src/metal/adapter.rs index e0c9040a01..dfcde3b240 100644 --- a/wgpu-hal/src/metal/adapter.rs +++ b/wgpu-hal/src/metal/adapter.rs @@ -5,6 +5,8 @@ use wgt::{AstcBlock, AstcChannel}; use std::{sync::Arc, thread}; +use super::TimestampQuerySupport; + const MAX_COMMAND_BUFFERS: u64 = 2048; unsafe impl Send for super::Adapter {} @@ -543,6 +545,26 @@ impl super::PrivateCapabilities { MTLReadWriteTextureTier::TierNone }; + let mut timestamp_query_support = TimestampQuerySupport::empty(); + if version.at_least((11, 0), (14, 0), os_is_mac) + && device.supports_counter_sampling(metal::MTLCounterSamplingPoint::AtStageBoundary) + { + // If we don't support at stage boundary, don't support anything else. + timestamp_query_support.insert(TimestampQuerySupport::STAGE_BOUNDARIES); + + if device.supports_counter_sampling(metal::MTLCounterSamplingPoint::AtDrawBoundary) { + timestamp_query_support.insert(TimestampQuerySupport::ON_RENDER_ENCODER); + } + if device.supports_counter_sampling(metal::MTLCounterSamplingPoint::AtDispatchBoundary) + { + timestamp_query_support.insert(TimestampQuerySupport::ON_COMPUTE_ENCODER); + } + if device.supports_counter_sampling(metal::MTLCounterSamplingPoint::AtBlitBoundary) { + timestamp_query_support.insert(TimestampQuerySupport::ON_BLIT_ENCODER); + } + // `TimestampQuerySupport::INSIDE_WGPU_PASSES` emerges from the other flags. + } + Self { family_check, msl_version: if os_is_xr || version.at_least((12, 0), (15, 0), os_is_mac) { @@ -780,13 +802,7 @@ impl super::PrivateCapabilities { } else { None }, - support_timestamp_query: version.at_least((11, 0), (14, 0), os_is_mac) - && device - .supports_counter_sampling(metal::MTLCounterSamplingPoint::AtStageBoundary), - support_timestamp_query_in_passes: version.at_least((11, 0), (14, 0), os_is_mac) - && device.supports_counter_sampling(metal::MTLCounterSamplingPoint::AtDrawBoundary) - && device - .supports_counter_sampling(metal::MTLCounterSamplingPoint::AtDispatchBoundary), + timestamp_query_support, } } @@ -814,12 +830,20 @@ impl super::PrivateCapabilities { | F::DEPTH32FLOAT_STENCIL8 | F::MULTI_DRAW_INDIRECT; - features.set(F::TIMESTAMP_QUERY, self.support_timestamp_query); - // TODO: Not yet implemented. - // features.set( - // F::TIMESTAMP_QUERY_INSIDE_PASSES, - // self.support_timestamp_query_in_passes, - // ); + features.set( + F::TIMESTAMP_QUERY, + self.timestamp_query_support + .contains(TimestampQuerySupport::STAGE_BOUNDARIES), + ); + features.set( + F::TIMESTAMP_QUERY_INSIDE_PASSES, + self.timestamp_query_support + .contains(TimestampQuerySupport::INSIDE_WGPU_PASSES), + ); + features.set( + F::DUAL_SOURCE_BLENDING, + self.msl_version >= MTLLanguageVersion::V1_2 && self.dual_source_blending, + ); features.set(F::TEXTURE_COMPRESSION_ASTC, self.format_astc); features.set(F::TEXTURE_COMPRESSION_ASTC_HDR, self.format_astc_hdr); features.set(F::TEXTURE_COMPRESSION_BC, self.format_bc); @@ -854,6 +878,7 @@ impl super::PrivateCapabilities { features.set(F::ADDRESS_MODE_CLAMP_TO_ZERO, true); features.set(F::RG11B10UFLOAT_RENDERABLE, self.format_rg11b10_all); + features.set(F::SHADER_UNUSED_VERTEX_OUTPUT, true); features } diff --git a/wgpu-hal/src/metal/command.rs b/wgpu-hal/src/metal/command.rs index cc737fd228..c4b37f9932 100644 --- a/wgpu-hal/src/metal/command.rs +++ b/wgpu-hal/src/metal/command.rs @@ -1,4 +1,4 @@ -use super::{conv, AsNative}; +use super::{conv, AsNative, TimestampQuerySupport}; use crate::CommandEncoder as _; use std::{borrow::Cow, mem, ops::Range}; @@ -18,6 +18,7 @@ impl Default for super::CommandState { storage_buffer_length_map: Default::default(), work_group_memory_sizes: Vec::new(), push_constants: Vec::new(), + pending_timer_queries: Vec::new(), } } } @@ -26,10 +27,85 @@ impl super::CommandEncoder { fn enter_blit(&mut self) -> &metal::BlitCommandEncoderRef { if self.state.blit.is_none() { debug_assert!(self.state.render.is_none() && self.state.compute.is_none()); + let cmd_buf = self.raw_cmd_buf.as_ref().unwrap(); + + // Take care of pending timer queries. + // If we can't use `sample_counters_in_buffer` we have to create a dummy blit encoder! + // + // There is a known bug in Metal where blit encoders won't write timestamps if they don't have a blit operation. + // See https://github.com/gpuweb/gpuweb/issues/2046#issuecomment-1205793680 & https://source.chromium.org/chromium/chromium/src/+/006c4eb70c96229834bbaf271290f40418144cd3:third_party/dawn/src/dawn/native/metal/BackendMTL.mm;l=350 + // + // To make things worse: + // * what counts as a blit operation is a bit unclear, experimenting seemed to indicate that resolve_counters doesn't count. + // * in some cases (when?) using `set_start_of_encoder_sample_index` doesn't work, so we have to use `set_end_of_encoder_sample_index` instead + // + // All this means that pretty much the only *reliable* thing as of writing is to: + // * create a dummy blit encoder using set_end_of_encoder_sample_index + // * do a dummy write that is known to be not optimized out. + // * close the encoder since we used set_end_of_encoder_sample_index and don't want to get any extra stuff in there. + // * create another encoder for whatever we actually had in mind. + let supports_sample_counters_in_buffer = self + .shared + .private_caps + .timestamp_query_support + .contains(TimestampQuerySupport::ON_BLIT_ENCODER); + + if !self.state.pending_timer_queries.is_empty() && !supports_sample_counters_in_buffer { + objc::rc::autoreleasepool(|| { + let descriptor = metal::BlitPassDescriptor::new(); + let mut last_query = None; + for (i, (set, index)) in self.state.pending_timer_queries.drain(..).enumerate() + { + let sba_descriptor = descriptor + .sample_buffer_attachments() + .object_at(i as _) + .unwrap(); + sba_descriptor + .set_sample_buffer(set.counter_sample_buffer.as_ref().unwrap()); + + // Here be dragons: + // As mentioned above, for some reasons using the start of the encoder won't yield any results sometimes! + sba_descriptor + .set_start_of_encoder_sample_index(metal::COUNTER_DONT_SAMPLE); + sba_descriptor.set_end_of_encoder_sample_index(index as _); + + last_query = Some((set, index)); + } + let encoder = cmd_buf.blit_command_encoder_with_descriptor(descriptor); + + // As explained above, we need to do some write: + // Conveniently, we have a buffer with every query set, that we can use for this for a dummy write, + // since we know that it is going to be overwritten again on timer resolve and HAL doesn't define its state before that. + let raw_range = metal::NSRange { + location: last_query.as_ref().unwrap().1 as u64 * crate::QUERY_SIZE, + length: 1, + }; + encoder.fill_buffer( + &last_query.as_ref().unwrap().0.raw_buffer, + raw_range, + 255, // Don't write 0, so it's easier to identify if something went wrong. + ); + + encoder.end_encoding(); + }); + } + objc::rc::autoreleasepool(|| { - let cmd_buf = self.raw_cmd_buf.as_ref().unwrap(); self.state.blit = Some(cmd_buf.new_blit_command_encoder().to_owned()); }); + + let encoder = self.state.blit.as_ref().unwrap(); + + // UNTESTED: + // If the above described issue with empty blit encoder applies to `sample_counters_in_buffer` as well, we should use the same workaround instead! + for (set, index) in self.state.pending_timer_queries.drain(..) { + debug_assert!(supports_sample_counters_in_buffer); + encoder.sample_counters_in_buffer( + set.counter_sample_buffer.as_ref().unwrap(), + index as _, + true, + ) + } } self.state.blit.as_ref().unwrap() } @@ -40,7 +116,7 @@ impl super::CommandEncoder { } } - fn enter_any(&mut self) -> Option<&metal::CommandEncoderRef> { + fn active_encoder(&mut self) -> Option<&metal::CommandEncoderRef> { if let Some(ref encoder) = self.state.render { Some(encoder) } else if let Some(ref encoder) = self.state.compute { @@ -127,9 +203,17 @@ impl crate::CommandEncoder for super::CommandEncoder { } unsafe fn end_encoding(&mut self) -> Result { + // Handle pending timer query if any. + if !self.state.pending_timer_queries.is_empty() { + self.leave_blit(); + self.enter_blit(); + } + self.leave_blit(); debug_assert!(self.state.render.is_none()); debug_assert!(self.state.compute.is_none()); + debug_assert!(self.state.pending_timer_queries.is_empty()); + Ok(super::CommandBuffer { raw: self.raw_cmd_buf.take().unwrap(), }) @@ -322,16 +406,43 @@ impl crate::CommandEncoder for super::CommandEncoder { _ => {} } } - unsafe fn write_timestamp(&mut self, _set: &super::QuerySet, _index: u32) { - // TODO: If MTLCounterSamplingPoint::AtDrawBoundary/AtBlitBoundary/AtDispatchBoundary is supported, - // we don't need to insert a new encoder, but can instead use respective current one. - //let encoder = self.enter_any().unwrap_or_else(|| self.enter_blit()); + unsafe fn write_timestamp(&mut self, set: &super::QuerySet, index: u32) { + let support = self.shared.private_caps.timestamp_query_support; + debug_assert!( + support.contains(TimestampQuerySupport::STAGE_BOUNDARIES), + "Timestamp queries are not supported" + ); + let sample_buffer = set.counter_sample_buffer.as_ref().unwrap(); + let with_barrier = true; + + // Try to use an existing encoder for timestamp query if possible. + // This works only if it's supported for the active encoder. + if let (true, Some(encoder)) = ( + support.contains(TimestampQuerySupport::ON_BLIT_ENCODER), + self.state.blit.as_ref(), + ) { + encoder.sample_counters_in_buffer(sample_buffer, index as _, with_barrier); + } else if let (true, Some(encoder)) = ( + support.contains(TimestampQuerySupport::ON_RENDER_ENCODER), + self.state.render.as_ref(), + ) { + encoder.sample_counters_in_buffer(sample_buffer, index as _, with_barrier); + } else if let (true, Some(encoder)) = ( + support.contains(TimestampQuerySupport::ON_COMPUTE_ENCODER), + self.state.compute.as_ref(), + ) { + encoder.sample_counters_in_buffer(sample_buffer, index as _, with_barrier); + } else { + // If we're here it means we either have no encoder open, or it's not supported to sample within them. + // If this happens with render/compute open, this is an invalid usage! + debug_assert!(self.state.render.is_none() && self.state.compute.is_none()); - // TODO: Otherwise, we need to create a new blit command encoder with a descriptor that inserts the timestamps. - // Note that as of writing creating a new encoder is not exposed by the metal crate. - // https://developer.apple.com/documentation/metal/mtlcommandbuffer/3564431-makeblitcommandencoder + // But otherwise it means we'll put defer this to the next created encoder. + self.state.pending_timer_queries.push((set.clone(), index)); - // TODO: Enable respective test in `examples/timestamp-queries/src/tests.rs`. + // Ensure we didn't already have a blit open. + self.leave_blit(); + }; } unsafe fn reset_queries(&mut self, set: &super::QuerySet, range: Range) { @@ -342,6 +453,7 @@ impl crate::CommandEncoder for super::CommandEncoder { }; encoder.fill_buffer(&set.raw_buffer, raw_range, 0); } + unsafe fn copy_query_results( &mut self, set: &super::QuerySet, @@ -454,8 +566,29 @@ impl crate::CommandEncoder for super::CommandEncoder { } } + let mut sba_index = 0; + let mut next_sba_descriptor = || { + let sba_descriptor = descriptor + .sample_buffer_attachments() + .object_at(sba_index) + .unwrap(); + + sba_descriptor.set_end_of_vertex_sample_index(metal::COUNTER_DONT_SAMPLE); + sba_descriptor.set_start_of_fragment_sample_index(metal::COUNTER_DONT_SAMPLE); + + sba_index += 1; + sba_descriptor + }; + + for (set, index) in self.state.pending_timer_queries.drain(..) { + let sba_descriptor = next_sba_descriptor(); + sba_descriptor.set_sample_buffer(set.counter_sample_buffer.as_ref().unwrap()); + sba_descriptor.set_start_of_vertex_sample_index(index as _); + sba_descriptor.set_end_of_fragment_sample_index(metal::COUNTER_DONT_SAMPLE); + } + if let Some(ref timestamp_writes) = desc.timestamp_writes { - let sba_descriptor = descriptor.sample_buffer_attachments().object_at(0).unwrap(); + let sba_descriptor = next_sba_descriptor(); sba_descriptor.set_sample_buffer( timestamp_writes .query_set @@ -464,12 +597,16 @@ impl crate::CommandEncoder for super::CommandEncoder { .unwrap(), ); - if let Some(start_index) = timestamp_writes.beginning_of_pass_write_index { - sba_descriptor.set_start_of_vertex_sample_index(start_index as _); - } - if let Some(end_index) = timestamp_writes.end_of_pass_write_index { - sba_descriptor.set_end_of_fragment_sample_index(end_index as _); - } + sba_descriptor.set_start_of_vertex_sample_index( + timestamp_writes + .beginning_of_pass_write_index + .map_or(metal::COUNTER_DONT_SAMPLE, |i| i as _), + ); + sba_descriptor.set_end_of_fragment_sample_index( + timestamp_writes + .end_of_pass_write_index + .map_or(metal::COUNTER_DONT_SAMPLE, |i| i as _), + ); } if let Some(occlusion_query_set) = desc.occlusion_query_set { @@ -697,19 +834,19 @@ impl crate::CommandEncoder for super::CommandEncoder { } unsafe fn insert_debug_marker(&mut self, label: &str) { - if let Some(encoder) = self.enter_any() { + if let Some(encoder) = self.active_encoder() { encoder.insert_debug_signpost(label); } } unsafe fn begin_debug_marker(&mut self, group_label: &str) { - if let Some(encoder) = self.enter_any() { + if let Some(encoder) = self.active_encoder() { encoder.push_debug_group(group_label); } else if let Some(ref buf) = self.raw_cmd_buf { buf.push_debug_group(group_label); } } unsafe fn end_debug_marker(&mut self) { - if let Some(encoder) = self.enter_any() { + if let Some(encoder) = self.active_encoder() { encoder.pop_debug_group(); } else if let Some(ref buf) = self.raw_cmd_buf { buf.pop_debug_group(); @@ -969,11 +1106,25 @@ impl crate::CommandEncoder for super::CommandEncoder { objc::rc::autoreleasepool(|| { let descriptor = metal::ComputePassDescriptor::new(); - if let Some(timestamp_writes) = desc.timestamp_writes.as_ref() { + let mut sba_index = 0; + let mut next_sba_descriptor = || { let sba_descriptor = descriptor .sample_buffer_attachments() - .object_at(0 as _) + .object_at(sba_index) .unwrap(); + sba_index += 1; + sba_descriptor + }; + + for (set, index) in self.state.pending_timer_queries.drain(..) { + let sba_descriptor = next_sba_descriptor(); + sba_descriptor.set_sample_buffer(set.counter_sample_buffer.as_ref().unwrap()); + sba_descriptor.set_start_of_encoder_sample_index(index as _); + sba_descriptor.set_end_of_encoder_sample_index(metal::COUNTER_DONT_SAMPLE); + } + + if let Some(timestamp_writes) = desc.timestamp_writes.as_ref() { + let sba_descriptor = next_sba_descriptor(); sba_descriptor.set_sample_buffer( timestamp_writes .query_set @@ -982,12 +1133,16 @@ impl crate::CommandEncoder for super::CommandEncoder { .unwrap(), ); - if let Some(start_index) = timestamp_writes.beginning_of_pass_write_index { - sba_descriptor.set_start_of_encoder_sample_index(start_index as _); - } - if let Some(end_index) = timestamp_writes.end_of_pass_write_index { - sba_descriptor.set_end_of_encoder_sample_index(end_index as _); - } + sba_descriptor.set_start_of_encoder_sample_index( + timestamp_writes + .beginning_of_pass_write_index + .map_or(metal::COUNTER_DONT_SAMPLE, |i| i as _), + ); + sba_descriptor.set_end_of_encoder_sample_index( + timestamp_writes + .end_of_pass_write_index + .map_or(metal::COUNTER_DONT_SAMPLE, |i| i as _), + ); } let encoder = raw.compute_command_encoder_with_descriptor(descriptor); diff --git a/wgpu-hal/src/metal/conv.rs b/wgpu-hal/src/metal/conv.rs index a1ceb287ab..8f6439b50b 100644 --- a/wgpu-hal/src/metal/conv.rs +++ b/wgpu-hal/src/metal/conv.rs @@ -152,13 +152,11 @@ pub fn map_blend_factor(factor: wgt::BlendFactor) -> metal::MTLBlendFactor { Bf::OneMinusDstAlpha => OneMinusDestinationAlpha, Bf::Constant => BlendColor, Bf::OneMinusConstant => OneMinusBlendColor, - //Bf::ConstantAlpha => BlendAlpha, - //Bf::OneMinusConstantAlpha => OneMinusBlendAlpha, Bf::SrcAlphaSaturated => SourceAlphaSaturated, - //Bf::Src1 => Source1Color, - //Bf::OneMinusSrc1 => OneMinusSource1Color, - //Bf::Src1Alpha => Source1Alpha, - //Bf::OneMinusSrc1Alpha => OneMinusSource1Alpha, + Bf::Src1 => Source1Color, + Bf::OneMinusSrc1 => OneMinusSource1Color, + Bf::Src1Alpha => Source1Alpha, + Bf::OneMinusSrc1Alpha => OneMinusSource1Alpha, } } diff --git a/wgpu-hal/src/metal/mod.rs b/wgpu-hal/src/metal/mod.rs index a5cfa80edf..1f729211f3 100644 --- a/wgpu-hal/src/metal/mod.rs +++ b/wgpu-hal/src/metal/mod.rs @@ -33,6 +33,7 @@ use std::{ }; use arrayvec::ArrayVec; +use bitflags::bitflags; use metal::foreign_types::ForeignTypeRef as _; use parking_lot::Mutex; @@ -145,6 +146,24 @@ impl crate::Instance for Instance { } } +bitflags!( + /// Similar to `MTLCounterSamplingPoint`, but a bit higher abstracted for our purposes. + #[derive(Debug, Copy, Clone)] + pub struct TimestampQuerySupport: u32 { + /// On creating Metal encoders. + const STAGE_BOUNDARIES = 1 << 1; + /// Within existing draw encoders. + const ON_RENDER_ENCODER = Self::STAGE_BOUNDARIES.bits() | (1 << 2); + /// Within existing dispatch encoders. + const ON_COMPUTE_ENCODER = Self::STAGE_BOUNDARIES.bits() | (1 << 3); + /// Within existing blit encoders. + const ON_BLIT_ENCODER = Self::STAGE_BOUNDARIES.bits() | (1 << 4); + + /// Within any wgpu render/compute pass. + const INSIDE_WGPU_PASSES = Self::ON_RENDER_ENCODER.bits() | Self::ON_COMPUTE_ENCODER.bits(); + } +); + #[allow(dead_code)] #[derive(Clone, Debug)] struct PrivateCapabilities { @@ -241,8 +260,7 @@ struct PrivateCapabilities { supports_preserve_invariance: bool, supports_shader_primitive_index: bool, has_unified_memory: Option, - support_timestamp_query: bool, - support_timestamp_query_in_passes: bool, + timestamp_query_support: TimestampQuerySupport, } #[derive(Clone, Debug)] @@ -706,7 +724,7 @@ pub struct ComputePipeline { unsafe impl Send for ComputePipeline {} unsafe impl Sync for ComputePipeline {} -#[derive(Debug)] +#[derive(Debug, Clone)] pub struct QuerySet { raw_buffer: metal::Buffer, //Metal has a custom buffer for counters. @@ -789,6 +807,9 @@ struct CommandState { work_group_memory_sizes: Vec, push_constants: Vec, + + /// Timer query that should be executed when the next pass starts. + pending_timer_queries: Vec<(QuerySet, u32)>, } pub struct CommandEncoder { diff --git a/wgpu-hal/src/vulkan/adapter.rs b/wgpu-hal/src/vulkan/adapter.rs index 6d78e6901d..2de41cc3c9 100644 --- a/wgpu-hal/src/vulkan/adapter.rs +++ b/wgpu-hal/src/vulkan/adapter.rs @@ -177,6 +177,7 @@ impl PhysicalDeviceFeatures { //.shader_resource_residency(requested_features.contains(wgt::Features::SHADER_RESOURCE_RESIDENCY)) .geometry_shader(requested_features.contains(wgt::Features::SHADER_PRIMITIVE_INDEX)) .depth_clamp(requested_features.contains(wgt::Features::DEPTH_CLIP_CONTROL)) + .dual_src_blend(requested_features.contains(wgt::Features::DUAL_SOURCE_BLENDING)) .build(), descriptor_indexing: if requested_features.intersects(indexing_features()) { Some( @@ -460,6 +461,7 @@ impl PhysicalDeviceFeatures { } features.set(F::DEPTH_CLIP_CONTROL, self.core.depth_clamp != 0); + features.set(F::DUAL_SOURCE_BLENDING, self.core.dual_src_blend != 0); if let Some(ref multiview) = self.multiview { features.set(F::MULTIVIEW, multiview.multiview != 0); @@ -520,6 +522,7 @@ impl PhysicalDeviceFeatures { | vk::FormatFeatureFlags::COLOR_ATTACHMENT_BLEND, ); features.set(F::RG11B10UFLOAT_RENDERABLE, rg11b10ufloat_renderable); + features.set(F::SHADER_UNUSED_VERTEX_OUTPUT, true); (features, dl_flags) } diff --git a/wgpu-hal/src/vulkan/conv.rs b/wgpu-hal/src/vulkan/conv.rs index e2398c2689..459b7f858f 100644 --- a/wgpu-hal/src/vulkan/conv.rs +++ b/wgpu-hal/src/vulkan/conv.rs @@ -792,6 +792,10 @@ fn map_blend_factor(factor: wgt::BlendFactor) -> vk::BlendFactor { Bf::SrcAlphaSaturated => vk::BlendFactor::SRC_ALPHA_SATURATE, Bf::Constant => vk::BlendFactor::CONSTANT_COLOR, Bf::OneMinusConstant => vk::BlendFactor::ONE_MINUS_CONSTANT_COLOR, + Bf::Src1 => vk::BlendFactor::SRC1_COLOR, + Bf::OneMinusSrc1 => vk::BlendFactor::ONE_MINUS_SRC1_COLOR, + Bf::Src1Alpha => vk::BlendFactor::SRC1_ALPHA, + Bf::OneMinusSrc1Alpha => vk::BlendFactor::ONE_MINUS_SRC1_ALPHA, } } diff --git a/wgpu-types/src/lib.rs b/wgpu-types/src/lib.rs index c892874afa..13603fc03f 100644 --- a/wgpu-types/src/lib.rs +++ b/wgpu-types/src/lib.rs @@ -270,7 +270,7 @@ bitflags::bitflags! { /// Supported Platforms: /// - Vulkan /// - DX12 - /// - Metal - TODO: Not yet supported on command encoder. + /// - Metal /// /// This is a web and native feature. const TIMESTAMP_QUERY = 1 << 1; @@ -371,7 +371,7 @@ bitflags::bitflags! { /// Compressed textures sacrifice some quality in exchange for significantly reduced /// bandwidth usage. /// - /// Support for this feature guarantees availability of [`TextureUsages::COPY_SRC | TextureUsages::COPY_DST | TextureUsages::TEXTURE_BINDING`] for ASTC formats. + /// Support for this feature guarantees availability of [`TextureUsages::COPY_SRC | TextureUsages::COPY_DST | TextureUsages::TEXTURE_BINDING`] for ASTC formats with Unorm/UnormSrgb channel type. /// [`Features::TEXTURE_ADAPTER_SPECIFIC_FORMAT_FEATURES`] may enable additional usages. /// /// Supported Platforms: @@ -409,7 +409,7 @@ bitflags::bitflags! { /// Compressed textures sacrifice some quality in exchange for significantly reduced /// bandwidth usage. /// - /// Support for this feature guarantees availability of [`TextureUsages::COPY_SRC | TextureUsages::COPY_DST | TextureUsages::TEXTURE_BINDING`] for BCn formats. + /// Support for this feature guarantees availability of [`TextureUsages::COPY_SRC | TextureUsages::COPY_DST | TextureUsages::TEXTURE_BINDING`] for ASTC formats with the HDR channel type. /// [`Features::TEXTURE_ADAPTER_SPECIFIC_FORMAT_FEATURES`] may enable additional usages. /// /// Supported Platforms: @@ -458,10 +458,9 @@ bitflags::bitflags! { /// Supported platforms: /// - Vulkan /// - DX12 + /// - Metal (AMD & Intel, not Apple GPUs) /// - /// This is currently unimplemented on Metal. - /// When implemented, it will be supported on Metal on AMD and Intel GPUs, but not Apple GPUs. - /// (This is a common limitation of tile-based rasterization GPUs) + /// This is generally not available on tile-based rasterization GPUs. /// /// This is a native only feature with a [proposal](https://github.com/gpuweb/gpuweb/blob/0008bd30da2366af88180b511a5d0d0c1dffbc36/proposals/timestamp-query-inside-passes.md) for the web. const TIMESTAMP_QUERY_INSIDE_PASSES = 1 << 33; @@ -738,6 +737,15 @@ bitflags::bitflags! { /// This is a native only feature. const VERTEX_ATTRIBUTE_64BIT = 1 << 53; + /// Allows vertex shaders to have outputs which are not consumed + /// by the fragment shader. + /// + /// Supported platforms: + /// - Vulkan + /// - Metal + /// - OpenGL + const SHADER_UNUSED_VERTEX_OUTPUT = 1 << 54; + // 54..59 available // Shader: @@ -782,7 +790,17 @@ bitflags::bitflags! { /// This is a native only feature. const SHADER_EARLY_DEPTH_TEST = 1 << 62; - // 62..64 available + /// Allows two outputs from a shader to be used for blending. + /// Note that dual-source blending doesn't support multiple render targets. + /// + /// For more info see the OpenGL ES extension GL_EXT_blend_func_extended. + /// + /// Supported platforms: + /// - OpenGL ES (with GL_EXT_blend_func_extended) + /// - Metal (with MSL 1.2+) + /// - Vulkan (with dualSrcBlend) + /// - DX12 + const DUAL_SOURCE_BLENDING = 1 << 63; } } @@ -1550,6 +1568,8 @@ impl TextureViewDimension { /// /// Corresponds to [WebGPU `GPUBlendFactor`]( /// https://gpuweb.github.io/gpuweb/#enumdef-gpublendfactor). +/// Values using S1 requires [`Features::DUAL_SOURCE_BLENDING`] and can only be +/// used with the first render target. #[repr(C)] #[derive(Copy, Clone, Debug, Hash, Eq, PartialEq)] #[cfg_attr(feature = "trace", derive(Serialize))] @@ -1582,6 +1602,29 @@ pub enum BlendFactor { Constant = 11, /// 1.0 - Constant OneMinusConstant = 12, + /// S1.component + Src1 = 13, + /// 1.0 - S1.component + OneMinusSrc1 = 14, + /// S1.alpha + Src1Alpha = 15, + /// 1.0 - S1.alpha + OneMinusSrc1Alpha = 16, +} + +impl BlendFactor { + /// Returns `true` if the blend factor references the second blend source. + /// + /// Note that the usage of those blend factors require [`Features::DUAL_SOURCE_BLENDING`]. + pub fn ref_second_blend_source(&self) -> bool { + match self { + BlendFactor::Src1 + | BlendFactor::OneMinusSrc1 + | BlendFactor::Src1Alpha + | BlendFactor::OneMinusSrc1Alpha => true, + _ => false, + } + } } /// Alpha blend operation. diff --git a/wgpu/src/backend/direct.rs b/wgpu/src/backend/direct.rs index ca38a42ec8..6a70b002fe 100644 --- a/wgpu/src/backend/direct.rs +++ b/wgpu/src/backend/direct.rs @@ -4,7 +4,7 @@ use crate::{ BufferDescriptor, CommandEncoderDescriptor, ComputePassDescriptor, ComputePipelineDescriptor, DownlevelCapabilities, Features, Label, Limits, LoadOp, MapMode, Operations, PipelineLayoutDescriptor, RenderBundleEncoderDescriptor, RenderPipelineDescriptor, - SamplerDescriptor, ShaderModuleDescriptor, ShaderModuleDescriptorSpirV, ShaderSource, + SamplerDescriptor, ShaderModuleDescriptor, ShaderModuleDescriptorSpirV, ShaderSource, StoreOp, SurfaceStatus, TextureDescriptor, TextureViewDescriptor, UncapturedErrorHandler, }; @@ -441,6 +441,13 @@ fn map_texture_tagged_copy_view( } } +fn map_store_op(op: StoreOp) -> wgc::command::StoreOp { + match op { + StoreOp::Store => wgc::command::StoreOp::Store, + StoreOp::Discard => wgc::command::StoreOp::Discard, + } +} + fn map_pass_channel( ops: Option<&Operations>, ) -> wgc::command::PassChannel { @@ -450,11 +457,7 @@ fn map_pass_channel( store, }) => wgc::command::PassChannel { load_op: wgc::command::LoadOp::Clear, - store_op: if store { - wgc::command::StoreOp::Store - } else { - wgc::command::StoreOp::Discard - }, + store_op: map_store_op(store), clear_value, read_only: false, }, @@ -463,11 +466,7 @@ fn map_pass_channel( store, }) => wgc::command::PassChannel { load_op: wgc::command::LoadOp::Load, - store_op: if store { - wgc::command::StoreOp::Store - } else { - wgc::command::StoreOp::Discard - }, + store_op: map_store_op(store), clear_value: V::default(), read_only: false, }, @@ -671,8 +670,7 @@ impl crate::Context for Context { () )); if let Some(err) = error { - log::error!("Error in Adapter::request_device: {}", err); - return ready(Err(crate::RequestDeviceError)); + return ready(Err(err.into())); } let error_sink = Arc::new(Mutex::new(ErrorSinkRaw::new())); let device = Device { diff --git a/wgpu/src/backend/web.rs b/wgpu/src/backend/web.rs index d64bd8bcb1..b649d41ebb 100644 --- a/wgpu/src/backend/web.rs +++ b/wgpu/src/backend/web.rs @@ -421,6 +421,15 @@ fn map_blend_factor(factor: wgt::BlendFactor) -> web_sys::GpuBlendFactor { BlendFactor::SrcAlphaSaturated => bf::SrcAlphaSaturated, BlendFactor::Constant => bf::Constant, BlendFactor::OneMinusConstant => bf::OneMinusConstant, + BlendFactor::Src1 + | BlendFactor::OneMinusSrc1 + | BlendFactor::Src1Alpha + | BlendFactor::OneMinusSrc1Alpha => { + panic!( + "{:?} is not enabled for this backend", + wgt::Features::DUAL_SOURCE_BLENDING + ) + } } } @@ -621,11 +630,10 @@ fn map_color(color: wgt::Color) -> web_sys::GpuColorDict { web_sys::GpuColorDict::new(color.a, color.b, color.g, color.r) } -fn map_store_op(store: bool) -> web_sys::GpuStoreOp { - if store { - web_sys::GpuStoreOp::Store - } else { - web_sys::GpuStoreOp::Discard +fn map_store_op(store: crate::StoreOp) -> web_sys::GpuStoreOp { + match store { + crate::StoreOp::Store => web_sys::GpuStoreOp::Store, + crate::StoreOp::Discard => web_sys::GpuStoreOp::Discard, } } @@ -812,7 +820,9 @@ fn future_request_device( (device_id, device_data, queue_id, queue_data) }) - .map_err(|_| crate::RequestDeviceError) + .map_err(|error_value| crate::RequestDeviceError { + inner: crate::RequestDeviceErrorKind::Web(error_value), + }) } fn future_pop_error_scope(result: JsFutureResult) -> Option { diff --git a/wgpu/src/lib.rs b/wgpu/src/lib.rs index b482f303a1..3fbabe74da 100644 --- a/wgpu/src/lib.rs +++ b/wgpu/src/lib.rs @@ -1006,16 +1006,24 @@ static_assertions::assert_impl_all!(BufferBinding: Send, Sync); /// Operation to perform to the output attachment at the start of a render pass. /// -/// The render target must be cleared at least once before its content is loaded. -/// -/// Corresponds to [WebGPU `GPULoadOp`](https://gpuweb.github.io/gpuweb/#enumdef-gpuloadop). +/// Corresponds to [WebGPU `GPULoadOp`](https://gpuweb.github.io/gpuweb/#enumdef-gpuloadop), +/// plus the corresponding clearValue. #[derive(Copy, Clone, Debug, Hash, Eq, PartialEq)] #[cfg_attr(feature = "trace", derive(serde::Serialize))] #[cfg_attr(feature = "replay", derive(serde::Deserialize))] pub enum LoadOp { - /// Clear with a specified value. + /// Loads the specified value for this attachment into the render pass. + /// + /// On some GPU hardware (primarily mobile), "clear" is significantly cheaper + /// because it avoids loading data from main memory into tile-local memory. + /// + /// On other GPU hardware, there isn’t a significant difference. + /// + /// As a result, it is recommended to use "clear" rather than "load" in cases + /// where the initial value doesn’t matter + /// (e.g. the render target will be cleared using a skybox). Clear(V), - /// Load from memory. + /// Loads the existing value for this attachment into the render pass. Load, } @@ -1025,6 +1033,28 @@ impl Default for LoadOp { } } +/// Operation to perform to the output attachment at the end of a render pass. +/// +/// Corresponds to [WebGPU `GPUStoreOp`](https://gpuweb.github.io/gpuweb/#enumdef-gpustoreop). +#[derive(Copy, Clone, Debug, Hash, Eq, PartialEq, Default)] +#[cfg_attr(feature = "trace", derive(serde::Serialize))] +#[cfg_attr(feature = "replay", derive(serde::Deserialize))] +pub enum StoreOp { + /// Stores the resulting value of the render pass for this attachment. + #[default] + Store, + /// Discards the resulting value of the render pass for this attachment. + /// + /// The attachment will be treated as uninitialized afterwards. + /// (If only either Depth or Stencil texture-aspects is set to `Discard`, + /// the respective other texture-aspect will be preserved.) + /// + /// This can be significantly faster on tile-based render hardware. + /// + /// Prefer this if the attachment is not read by subsequent passes. + Discard, +} + /// Pair of load and store operations for an attachment aspect. /// /// This type is unique to the Rust API of `wgpu`. In the WebGPU specification, @@ -1036,14 +1066,18 @@ pub struct Operations { /// How data should be read through this attachment. pub load: LoadOp, /// Whether data will be written to through this attachment. - pub store: bool, + /// + /// Note that resolve textures (if specified) are always written to, + /// regardless of this setting. + pub store: StoreOp, } impl Default for Operations { + #[inline] fn default() -> Self { Self { - load: Default::default(), - store: true, + load: LoadOp::::default(), + store: StoreOp::default(), } } } @@ -1084,6 +1118,8 @@ pub struct RenderPassColorAttachment<'tex> { /// The view to use as an attachment. pub view: &'tex TextureView, /// The view that will receive the resolved output if multisampling is used. + /// + /// If set, it is always written to, regardless of how [`Self::ops`] is configured. pub resolve_target: Option<&'tex TextureView>, /// What operations will be performed on this color attachment. pub ops: Operations, @@ -2752,18 +2788,103 @@ impl Drop for Device { } } -/// Requesting a device failed. -#[derive(Clone, PartialEq, Eq, Debug)] -pub struct RequestDeviceError; +/// Requesting a device from an [`Adapter`] failed. +#[derive(Clone, Debug)] +pub struct RequestDeviceError { + inner: RequestDeviceErrorKind, +} +#[derive(Clone, Debug)] +enum RequestDeviceErrorKind { + /// Error from [`wgpu_core`]. + // must match dependency cfg + #[cfg(any( + not(target_arch = "wasm32"), + feature = "webgl", + target_os = "emscripten" + ))] + Core(core::instance::RequestDeviceError), + + /// Error from web API that was called by `wgpu` to request a device. + /// + /// (This is currently never used by the webgl backend, but it could be.) + #[cfg(all( + target_arch = "wasm32", + not(any(target_os = "emscripten", feature = "webgl")) + ))] + Web(wasm_bindgen::JsValue), +} + +#[cfg(all( + feature = "fragile-send-sync-non-atomic-wasm", + not(target_feature = "atomics") +))] +unsafe impl Send for RequestDeviceErrorKind {} +#[cfg(all( + feature = "fragile-send-sync-non-atomic-wasm", + not(target_feature = "atomics") +))] +unsafe impl Sync for RequestDeviceErrorKind {} + +#[cfg(any( + not(target_arch = "wasm32"), + all( + feature = "fragile-send-sync-non-atomic-wasm", + not(target_feature = "atomics") + ) +))] static_assertions::assert_impl_all!(RequestDeviceError: Send, Sync); impl fmt::Display for RequestDeviceError { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - write!(f, "Requesting a device failed") + match &self.inner { + #[cfg(any( + not(target_arch = "wasm32"), + feature = "webgl", + target_os = "emscripten" + ))] + RequestDeviceErrorKind::Core(error) => error.fmt(f), + #[cfg(all( + target_arch = "wasm32", + not(any(target_os = "emscripten", feature = "webgl")) + ))] + RequestDeviceErrorKind::Web(error_js_value) => { + // wasm-bindgen provides a reasonable error stringification via `Debug` impl + write!(f, "{error_js_value:?}") + } + } } } -impl error::Error for RequestDeviceError {} +impl error::Error for RequestDeviceError { + fn source(&self) -> Option<&(dyn error::Error + 'static)> { + match &self.inner { + #[cfg(any( + not(target_arch = "wasm32"), + feature = "webgl", + target_os = "emscripten" + ))] + RequestDeviceErrorKind::Core(error) => error.source(), + #[cfg(all( + target_arch = "wasm32", + not(any(target_os = "emscripten", feature = "webgl")) + ))] + RequestDeviceErrorKind::Web(_) => None, + } + } +} + +#[cfg(any( + not(target_arch = "wasm32"), + feature = "webgl", + target_os = "emscripten" +))] +impl From for RequestDeviceError { + fn from(error: core::instance::RequestDeviceError) -> Self { + Self { + inner: RequestDeviceErrorKind::Core(error), + } + } +} /// [`Instance::create_surface()`] or a related function failed. #[derive(Clone, Debug)]