From ae7514f361779780bcdaac924ffefed264f691fe Mon Sep 17 00:00:00 2001 From: Imbris Date: Sun, 14 Apr 2024 13:34:01 -0400 Subject: [PATCH 1/2] Allow unconsumed inputs in fragment shaders by removing them from vertex outputs when generating HLSL. Fixes https://github.com/gfx-rs/wgpu/issues/3748 * Add naga::back::hlsl::FragmentEntryPoint for providing information about the fragment entry point when generating vertex entry points via naga::back::hlsl::Writer::write. Vertex outputs not consumed by the fragment entry point are omitted in the final output struct. * Add naga snapshot test for this new feature, * Remove Features::SHADER_UNUSED_VERTEX_OUTPUT, StageError::InputNotConsumed, and associated validation logic. * Make wgpu dx12 backend pass fragment shader info when generating vertex HLSL. * Add wgpu regression test for allowing unconsumed inputs. --- CHANGELOG.md | 8 +++ benches/benches/shader.rs | 1 + deno_webgpu/lib.rs | 7 -- naga-cli/src/bin/naga.rs | 2 +- naga/src/back/hlsl/mod.rs | 29 ++++++++ naga/src/back/hlsl/writer.rs | 72 +++++++++++++++++-- .../unconsumed_vertex_outputs_frag.param.ron | 2 + .../in/unconsumed_vertex_outputs_frag.wgsl | 13 ++++ .../unconsumed_vertex_outputs_vert.param.ron | 2 + .../in/unconsumed_vertex_outputs_vert.wgsl | 13 ++++ .../hlsl/unconsumed_vertex_outputs_frag.hlsl | 17 +++++ .../hlsl/unconsumed_vertex_outputs_frag.ron | 12 ++++ .../hlsl/unconsumed_vertex_outputs_vert.hlsl | 30 ++++++++ .../hlsl/unconsumed_vertex_outputs_vert.ron | 12 ++++ naga/tests/snapshots.rs | 50 +++++++++++-- tests/tests/regression/issue_3748.rs | 53 ++++++++++++++ tests/tests/regression/issue_3748.wgsl | 23 ++++++ tests/tests/root.rs | 1 + wgpu-core/src/device/resource.rs | 3 +- wgpu-core/src/validation.rs | 49 +++++-------- wgpu-hal/src/dx12/device.rs | 28 ++++++-- wgpu-hal/src/gles/adapter.rs | 1 - wgpu-hal/src/metal/adapter.rs | 1 - wgpu-hal/src/vulkan/adapter.rs | 1 - wgpu-types/src/lib.rs | 8 --- 25 files changed, 369 insertions(+), 69 deletions(-) create mode 100644 naga/tests/in/unconsumed_vertex_outputs_frag.param.ron create mode 100644 naga/tests/in/unconsumed_vertex_outputs_frag.wgsl create mode 100644 naga/tests/in/unconsumed_vertex_outputs_vert.param.ron create mode 100644 naga/tests/in/unconsumed_vertex_outputs_vert.wgsl create mode 100644 naga/tests/out/hlsl/unconsumed_vertex_outputs_frag.hlsl create mode 100644 naga/tests/out/hlsl/unconsumed_vertex_outputs_frag.ron create mode 100644 naga/tests/out/hlsl/unconsumed_vertex_outputs_vert.hlsl create mode 100644 naga/tests/out/hlsl/unconsumed_vertex_outputs_vert.ron create mode 100644 tests/tests/regression/issue_3748.rs create mode 100644 tests/tests/regression/issue_3748.wgsl diff --git a/CHANGELOG.md b/CHANGELOG.md index 710fb2d6d5..42a7f76cfa 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -137,6 +137,7 @@ By @atlv24 in [#5383](https://github.com/gfx-rs/wgpu/pull/5383) #### General - Added `as_hal` for `Buffer` to access wgpu created buffers form wgpu-hal. By @JasondeWolff in [#5724](https://github.com/gfx-rs/wgpu/pull/5724) +- Unconsumed vertex outputs are now always allowed. Removed `StageError::InputNotConsumed`, `Features::SHADER_UNUSED_VERTEX_OUTPUT`, and associated validation. By @Imberflur in [#5531](https://github.com/gfx-rs/wgpu/pull/5531) #### Naga @@ -144,6 +145,13 @@ By @atlv24 in [#5383](https://github.com/gfx-rs/wgpu/pull/5383) - Added type upgrades to SPIR-V atomic support. Added related infrastructure. Tracking issue is [here](https://github.com/gfx-rs/wgpu/issues/4489). By @schell in [#5775](https://github.com/gfx-rs/wgpu/pull/5775). - Implement `WGSL`'s `unpack4xI8`,`unpack4xU8`,`pack4xI8` and `pack4xU8`. By @VlaDexa in [#5424](https://github.com/gfx-rs/wgpu/pull/5424) - Began work adding support for atomics to the SPIR-V frontend. Tracking issue is [here](https://github.com/gfx-rs/wgpu/issues/4489). By @schell in [#5702](https://github.com/gfx-rs/wgpu/pull/5702). +- In hlsl-out, allow passing information about the fragment entry point to omit vertex outputs that are not in the fragment inputs. By @Imberflur in [#5531](https://github.com/gfx-rs/wgpu/pull/5531) + + ```diff + let writer: naga::back::hlsl::Writer = /* ... */; + -writer.write(&module, &module_info); + +writer.write(&module, &module_info, None); + ``` #### WebGPU diff --git a/benches/benches/shader.rs b/benches/benches/shader.rs index 6d20b6029f..c6aa631d9b 100644 --- a/benches/benches/shader.rs +++ b/benches/benches/shader.rs @@ -308,6 +308,7 @@ fn backends(c: &mut Criterion) { let _ = writer.write( input.module.as_ref().unwrap(), input.module_info.as_ref().unwrap(), + None, ); // may fail on unimplemented things string.clear(); } diff --git a/deno_webgpu/lib.rs b/deno_webgpu/lib.rs index a9d36afdca..d77c60cac0 100644 --- a/deno_webgpu/lib.rs +++ b/deno_webgpu/lib.rs @@ -360,9 +360,6 @@ fn deserialize_features(features: &wgpu_types::Features) -> Vec<&'static str> { if features.contains(wgpu_types::Features::SHADER_EARLY_DEPTH_TEST) { return_features.push("shader-early-depth-test"); } - if features.contains(wgpu_types::Features::SHADER_UNUSED_VERTEX_OUTPUT) { - return_features.push("shader-unused-vertex-output"); - } return_features } @@ -648,10 +645,6 @@ impl From for wgpu_types::Features { wgpu_types::Features::SHADER_EARLY_DEPTH_TEST, required_features.0.contains("shader-early-depth-test"), ); - features.set( - wgpu_types::Features::SHADER_UNUSED_VERTEX_OUTPUT, - required_features.0.contains("shader-unused-vertex-output"), - ); features } diff --git a/naga-cli/src/bin/naga.rs b/naga-cli/src/bin/naga.rs index 4072d2d8a6..97d947973e 100644 --- a/naga-cli/src/bin/naga.rs +++ b/naga-cli/src/bin/naga.rs @@ -811,7 +811,7 @@ fn write_output( let mut buffer = String::new(); let mut writer = hlsl::Writer::new(&mut buffer, ¶ms.hlsl); - writer.write(&module, &info).unwrap_pretty(); + writer.write(&module, &info, None).unwrap_pretty(); fs::write(output_path, buffer)?; } "wgsl" => { diff --git a/naga/src/back/hlsl/mod.rs b/naga/src/back/hlsl/mod.rs index 28edbf70e1..5cbfffc474 100644 --- a/naga/src/back/hlsl/mod.rs +++ b/naga/src/back/hlsl/mod.rs @@ -287,6 +287,35 @@ impl Wrapped { } } +/// A fragment entry point to be considered when generating HLSL for the output interface of vertex +/// entry points. +/// +/// This is provided as an optional paramter to [`Writer::write`]. +/// +/// If this is provided, vertex outputs will be removed if they are not inputs of this fragment +/// entry point. This is necessary for generating correct HLSL when some of the vertex shader +/// outputs are not consumed by the fragment shader. +pub struct FragmentEntryPoint<'a> { + module: &'a crate::Module, + func: &'a crate::Function, +} + +impl<'a> FragmentEntryPoint<'a> { + /// Returns `None` if the entry point with the provided name can't be found or isn't a fragment + /// entry point. + pub fn new(module: &'a crate::Module, ep_name: &'a str) -> Option { + module + .entry_points + .iter() + .find(|ep| ep.name == ep_name) + .filter(|ep| ep.stage == crate::ShaderStage::Fragment) + .map(|ep| Self { + module, + func: &ep.function, + }) + } +} + pub struct Writer<'a, W> { out: W, names: crate::FastHashMap, diff --git a/naga/src/back/hlsl/writer.rs b/naga/src/back/hlsl/writer.rs index e06951b05a..1ecbb1070e 100644 --- a/naga/src/back/hlsl/writer.rs +++ b/naga/src/back/hlsl/writer.rs @@ -4,7 +4,7 @@ use super::{ WrappedZeroValue, }, storage::StoreValue, - BackendResult, Error, Options, + BackendResult, Error, FragmentEntryPoint, Options, }; use crate::{ back::{self, Baked}, @@ -28,6 +28,7 @@ pub(crate) const INSERT_BITS_FUNCTION: &str = "naga_insertBits"; struct EpStructMember { name: String, ty: Handle, + // TODO: log error if binding is none? // technically, this should always be `Some` binding: Option, index: u32, @@ -200,6 +201,7 @@ impl<'a, W: fmt::Write> super::Writer<'a, W> { &mut self, module: &Module, module_info: &valid::ModuleInfo, + fragment_entry_point: Option<&FragmentEntryPoint<'_>>, ) -> Result { if !module.overrides.is_empty() { return Err(Error::Override); @@ -300,7 +302,13 @@ impl<'a, W: fmt::Write> super::Writer<'a, W> { // Write all entry points wrapped structs for (index, ep) in module.entry_points.iter().enumerate() { let ep_name = self.names[&NameKey::EntryPoint(index as u16)].clone(); - let ep_io = self.write_ep_interface(module, &ep.function, ep.stage, &ep_name)?; + let ep_io = self.write_ep_interface( + module, + &ep.function, + ep.stage, + &ep_name, + fragment_entry_point, + )?; self.entry_point_io.push(ep_io); } @@ -508,6 +516,7 @@ impl<'a, W: fmt::Write> super::Writer<'a, W> { writeln!(self.out, "}};")?; writeln!(self.out)?; + // See ordering notes on EntryPointInterface fields match shader_stage.1 { Io::Input => { // bring back the original order @@ -541,6 +550,8 @@ impl<'a, W: fmt::Write> super::Writer<'a, W> { for arg in func.arguments.iter() { match module.types[arg.ty].inner { TypeInner::Struct { ref members, .. } => { + // TODO: what about nested structs? Is that possible? Maybe try an unwrap on + // the binding? for member in members.iter() { let name = self.namer.call_or(&member.name, "member"); let index = fake_members.len() as u32; @@ -577,10 +588,10 @@ impl<'a, W: fmt::Write> super::Writer<'a, W> { result: &crate::FunctionResult, stage: ShaderStage, entry_point_name: &str, + frag_ep: Option<&FragmentEntryPoint<'_>>, ) -> Result { let struct_name = format!("{stage:?}Output_{entry_point_name}"); - let mut fake_members = Vec::new(); let empty = []; let members = match module.types[result.ty].inner { TypeInner::Struct { ref members, .. } => members, @@ -590,14 +601,60 @@ impl<'a, W: fmt::Write> super::Writer<'a, W> { } }; - for member in members.iter() { + // Gather list of fragment input locations. We use this below to remove user-defined + // varyings from VS outputs that aren't in the FS inputs. This makes the VS interface match + // as long as the FS inputs are a subset of the VS outputs. This is only applied if the + // writer is supplied with information about the fragment entry point. + let fs_input_locs = if let (Some(frag_ep), ShaderStage::Vertex) = (frag_ep, stage) { + let mut fs_input_locs = Vec::new(); + for arg in frag_ep.func.arguments.iter() { + let mut push_if_location = |binding: &Option| { + match *binding { + Some(crate::Binding::Location { location, .. }) => { + fs_input_locs.push(location) + } + Some(crate::Binding::BuiltIn(_)) => {} + // Log error? + None => {} + } + }; + match frag_ep.module.types[arg.ty].inner { + TypeInner::Struct { ref members, .. } => { + // TODO: nesting? + for member in members.iter() { + push_if_location(&member.binding); + } + } + _ => push_if_location(&arg.binding), + } + } + fs_input_locs.sort(); + Some(fs_input_locs) + } else { + None + }; + + let mut fake_members = Vec::new(); + for (index, member) in members.iter().enumerate() { + if let Some(ref fs_input_locs) = fs_input_locs { + match member.binding { + Some(crate::Binding::Location { location, .. }) => { + if fs_input_locs.binary_search(&location).is_err() { + continue; + } + } + Some(crate::Binding::BuiltIn(_)) => {} + // Log error? + None => {} + } + } + let member_name = self.namer.call_or(&member.name, "member"); - let index = fake_members.len() as u32; fake_members.push(EpStructMember { name: member_name, ty: member.ty, binding: member.binding.clone(), - index, + index: index as u32, }); } @@ -613,6 +670,7 @@ impl<'a, W: fmt::Write> super::Writer<'a, W> { func: &crate::Function, stage: ShaderStage, ep_name: &str, + frag_ep: Option<&FragmentEntryPoint<'_>>, ) -> Result { Ok(EntryPointInterface { input: if !func.arguments.is_empty() @@ -628,7 +686,7 @@ impl<'a, W: fmt::Write> super::Writer<'a, W> { }, output: match func.result { Some(ref fr) if fr.binding.is_none() && stage == ShaderStage::Vertex => { - Some(self.write_ep_output_struct(module, fr, stage, ep_name)?) + Some(self.write_ep_output_struct(module, fr, stage, ep_name, frag_ep)?) } _ => None, }, diff --git a/naga/tests/in/unconsumed_vertex_outputs_frag.param.ron b/naga/tests/in/unconsumed_vertex_outputs_frag.param.ron new file mode 100644 index 0000000000..72873dd667 --- /dev/null +++ b/naga/tests/in/unconsumed_vertex_outputs_frag.param.ron @@ -0,0 +1,2 @@ +( +) diff --git a/naga/tests/in/unconsumed_vertex_outputs_frag.wgsl b/naga/tests/in/unconsumed_vertex_outputs_frag.wgsl new file mode 100644 index 0000000000..3a656c9696 --- /dev/null +++ b/naga/tests/in/unconsumed_vertex_outputs_frag.wgsl @@ -0,0 +1,13 @@ +// Out of order to test sorting. +struct FragmentIn { + @location(1) value: f32, + @location(3) value2: f32, + @builtin(position) position: vec4, + // @location(0) unused_value: f32, + // @location(2) unused_value2: vec4, +} + +@fragment +fn fs_main(v_out: FragmentIn) -> @location(0) vec4 { + return vec4(v_out.value, v_out.value, v_out.value2, v_out.value2); +} diff --git a/naga/tests/in/unconsumed_vertex_outputs_vert.param.ron b/naga/tests/in/unconsumed_vertex_outputs_vert.param.ron new file mode 100644 index 0000000000..72873dd667 --- /dev/null +++ b/naga/tests/in/unconsumed_vertex_outputs_vert.param.ron @@ -0,0 +1,2 @@ +( +) diff --git a/naga/tests/in/unconsumed_vertex_outputs_vert.wgsl b/naga/tests/in/unconsumed_vertex_outputs_vert.wgsl new file mode 100644 index 0000000000..46c39ea930 --- /dev/null +++ b/naga/tests/in/unconsumed_vertex_outputs_vert.wgsl @@ -0,0 +1,13 @@ +// Out of order to test sorting. +struct VertexOut { + @builtin(position) position: vec4, + @location(1) value: f32, + @location(2) unused_value2: vec4, + @location(0) unused_value: f32, + @location(3) value2: f32, +} + +@vertex +fn vs_main() -> VertexOut { + return VertexOut(vec4(1.0), 1.0, vec4(2.0), 1.0, 0.5); +} diff --git a/naga/tests/out/hlsl/unconsumed_vertex_outputs_frag.hlsl b/naga/tests/out/hlsl/unconsumed_vertex_outputs_frag.hlsl new file mode 100644 index 0000000000..4005e43538 --- /dev/null +++ b/naga/tests/out/hlsl/unconsumed_vertex_outputs_frag.hlsl @@ -0,0 +1,17 @@ +struct FragmentIn { + float value : LOC1; + float value2_ : LOC3; + float4 position : SV_Position; +}; + +struct FragmentInput_fs_main { + float value : LOC1; + float value2_ : LOC3; + float4 position : SV_Position; +}; + +float4 fs_main(FragmentInput_fs_main fragmentinput_fs_main) : SV_Target0 +{ + FragmentIn v_out = { fragmentinput_fs_main.value, fragmentinput_fs_main.value2_, fragmentinput_fs_main.position }; + return float4(v_out.value, v_out.value, v_out.value2_, v_out.value2_); +} diff --git a/naga/tests/out/hlsl/unconsumed_vertex_outputs_frag.ron b/naga/tests/out/hlsl/unconsumed_vertex_outputs_frag.ron new file mode 100644 index 0000000000..eac1b945d2 --- /dev/null +++ b/naga/tests/out/hlsl/unconsumed_vertex_outputs_frag.ron @@ -0,0 +1,12 @@ +( + vertex:[ + ], + fragment:[ + ( + entry_point:"fs_main", + target_profile:"ps_5_1", + ), + ], + compute:[ + ], +) diff --git a/naga/tests/out/hlsl/unconsumed_vertex_outputs_vert.hlsl b/naga/tests/out/hlsl/unconsumed_vertex_outputs_vert.hlsl new file mode 100644 index 0000000000..ea75d63877 --- /dev/null +++ b/naga/tests/out/hlsl/unconsumed_vertex_outputs_vert.hlsl @@ -0,0 +1,30 @@ +struct VertexOut { + float4 position : SV_Position; + float value : LOC1; + float4 unused_value2_ : LOC2; + float unused_value : LOC0; + float value2_ : LOC3; +}; + +struct VertexOutput_vs_main { + float value : LOC1; + float value2_ : LOC3; + float4 position : SV_Position; +}; + +VertexOut ConstructVertexOut(float4 arg0, float arg1, float4 arg2, float arg3, float arg4) { + VertexOut ret = (VertexOut)0; + ret.position = arg0; + ret.value = arg1; + ret.unused_value2_ = arg2; + ret.unused_value = arg3; + ret.value2_ = arg4; + return ret; +} + +VertexOutput_vs_main vs_main() +{ + const VertexOut vertexout = ConstructVertexOut((1.0).xxxx, 1.0, (2.0).xxxx, 1.0, 0.5); + const VertexOutput_vs_main vertexout_1 = { vertexout.value, vertexout.value2_, vertexout.position }; + return vertexout_1; +} diff --git a/naga/tests/out/hlsl/unconsumed_vertex_outputs_vert.ron b/naga/tests/out/hlsl/unconsumed_vertex_outputs_vert.ron new file mode 100644 index 0000000000..a24f8d0eb8 --- /dev/null +++ b/naga/tests/out/hlsl/unconsumed_vertex_outputs_vert.ron @@ -0,0 +1,12 @@ +( + vertex:[ + ( + entry_point:"vs_main", + target_profile:"vs_5_1", + ), + ], + fragment:[ + ], + compute:[ + ], +) diff --git a/naga/tests/snapshots.rs b/naga/tests/snapshots.rs index be8eb6a171..6e02475d68 100644 --- a/naga/tests/snapshots.rs +++ b/naga/tests/snapshots.rs @@ -266,7 +266,13 @@ fn check_targets( module: &mut naga::Module, targets: Targets, source_code: Option<&str>, + // For testing hlsl generation when fragment shader doesn't consume all vertex outputs. + frag_ep: Option, ) { + if frag_ep.is_some() && !targets.contains(Targets::HLSL) { + panic!("Providing FragmentEntryPoint only makes sense when testing hlsl-out"); + } + let params = input.read_parameters(); let name = &input.file_name; @@ -416,6 +422,7 @@ fn check_targets( &info, ¶ms.hlsl, ¶ms.pipeline_constants, + frag_ep, ); } } @@ -594,6 +601,7 @@ fn write_output_hlsl( info: &naga::valid::ModuleInfo, options: &naga::back::hlsl::Options, pipeline_constants: &naga::back::PipelineConstants, + frag_ep: Option, ) { use naga::back::hlsl; use std::fmt::Write as _; @@ -606,7 +614,9 @@ fn write_output_hlsl( let mut buffer = String::new(); let mut writer = hlsl::Writer::new(&mut buffer, options); - let reflection_info = writer.write(&module, &info).expect("HLSL write failed"); + let reflection_info = writer + .write(&module, &info, frag_ep.as_ref()) + .expect("HLSL write failed"); input.write_output_file("hlsl", "hlsl", buffer); @@ -910,7 +920,7 @@ fn convert_wgsl() { let input = Input::new(None, name, "wgsl"); let source = input.read_source(); match naga::front::wgsl::parse_str(&source) { - Ok(mut module) => check_targets(&input, &mut module, targets, None), + Ok(mut module) => check_targets(&input, &mut module, targets, None, None), Err(e) => panic!( "{}", e.emit_to_string_with_path(&source, input.input_path()) @@ -932,7 +942,7 @@ fn convert_wgsl() { // crlf will make the large split output different on different platform let source = source.replace('\r', ""); match naga::front::wgsl::parse_str(&source) { - Ok(mut module) => check_targets(&input, &mut module, targets, Some(&source)), + Ok(mut module) => check_targets(&input, &mut module, targets, Some(&source), None), Err(e) => panic!( "{}", e.emit_to_string_with_path(&source, input.input_path()) @@ -942,6 +952,36 @@ fn convert_wgsl() { } } +#[cfg(feature = "wgsl-in")] +#[test] +fn unconsumed_vertex_outputs_hlsl_out() { + let load_and_parse = |name| { + // WGSL shaders lives in root dir as a privileged. + let input = Input::new(None, name, "wgsl"); + let source = input.read_source(); + let module = match naga::front::wgsl::parse_str(&source) { + Ok(module) => module, + Err(e) => panic!( + "{}", + e.emit_to_string_with_path(&source, input.input_path()) + ), + }; + (input, module) + }; + + // Uses separate wgsl files to make sure the tested code doesn't accidentally rely on + // the fragment entry point being from the same parsed content (e.g. accidentally using the + // wrong `Module` when looking up info). We also don't just create a module from the same file + // twice since everything would probably be stored behind the same keys. + let (input, mut module) = load_and_parse("unconsumed_vertex_outputs_vert"); + let (frag_input, mut frag_module) = load_and_parse("unconsumed_vertex_outputs_frag"); + let frag_ep = naga::back::hlsl::FragmentEntryPoint::new(&frag_module, "fs_main") + .expect("fs_main not found"); + + check_targets(&input, &mut module, Targets::HLSL, None, Some(frag_ep)); + check_targets(&frag_input, &mut frag_module, Targets::HLSL, None, None); +} + #[cfg(feature = "spv-in")] fn convert_spv(name: &str, adjust_coordinate_space: bool, targets: Targets) { let _ = env_logger::try_init(); @@ -956,7 +996,7 @@ fn convert_spv(name: &str, adjust_coordinate_space: bool, targets: Targets) { }, ) .unwrap(); - check_targets(&input, &mut module, targets, None); + check_targets(&input, &mut module, targets, None, None); } #[cfg(feature = "spv-in")] @@ -1022,7 +1062,7 @@ fn convert_glsl_variations_check() { &source, ) .unwrap(); - check_targets(&input, &mut module, Targets::GLSL, None); + check_targets(&input, &mut module, Targets::GLSL, None, None); } #[cfg(feature = "glsl-in")] diff --git a/tests/tests/regression/issue_3748.rs b/tests/tests/regression/issue_3748.rs new file mode 100644 index 0000000000..e3a459aa8b --- /dev/null +++ b/tests/tests/regression/issue_3748.rs @@ -0,0 +1,53 @@ +use wgpu_test::{gpu_test, GpuTestConfiguration}; + +use wgpu::*; + +/// Previously, for every user-defined vertex output a fragment shader had to have a corresponding +/// user-defined input. This would generate `StageError::InputNotComsumed`. +/// +/// This requirement was removed from the WebGPU spec. Now, when generating hlsl, wgpu will +/// automatically remove any user-defined outputs from the vertex shader that are not present in +/// the fragment inputs. This is necessary for generating correct hlsl: +/// https://github.com/gfx-rs/naga/issues/1945 +#[gpu_test] +static ALLOW_INPUT_NOT_CONSUMED: GpuTestConfiguration = + GpuTestConfiguration::new().run_async(|ctx| async move { + let module = ctx + .device + .create_shader_module(include_wgsl!("issue_3748.wgsl")); + + let pipeline_layout = ctx + .device + .create_pipeline_layout(&PipelineLayoutDescriptor { + label: Some("Pipeline Layout"), + bind_group_layouts: &[], + push_constant_ranges: &[], + }); + + ctx.device + .create_render_pipeline(&RenderPipelineDescriptor { + label: Some("Pipeline"), + layout: Some(&pipeline_layout), + vertex: VertexState { + module: &module, + entry_point: "vs_main", + compilation_options: Default::default(), + buffers: &[], + }, + primitive: PrimitiveState::default(), + depth_stencil: None, + multisample: MultisampleState::default(), + fragment: Some(FragmentState { + module: &module, + entry_point: "fs_main", + compilation_options: Default::default(), + targets: &[Some(ColorTargetState { + format: TextureFormat::Rgba8Unorm, + blend: None, + write_mask: ColorWrites::all(), + })], + }), + multiview: None, + cache: None, + }); + }); diff --git a/tests/tests/regression/issue_3748.wgsl b/tests/tests/regression/issue_3748.wgsl new file mode 100644 index 0000000000..78ace6d9db --- /dev/null +++ b/tests/tests/regression/issue_3748.wgsl @@ -0,0 +1,23 @@ +struct VertexOut { + @builtin(position) position: vec4, + @location(0) unused_value: f32, + @location(1) value: f32, +} + +struct FragmentIn { + @builtin(position) position: vec4, + // @location(0) unused_value: f32, + @location(1) value: f32, +} + +@vertex +fn vs_main() -> VertexOut { + return VertexOut(vec4(1.0), 1.0, 1.0); +} + +@fragment +fn fs_main(v_out: FragmentIn) -> @location(0) vec4 { + return vec4(v_out.value); +} + + diff --git a/tests/tests/root.rs b/tests/tests/root.rs index 159c22d046..aac65be4b9 100644 --- a/tests/tests/root.rs +++ b/tests/tests/root.rs @@ -1,6 +1,7 @@ mod regression { mod issue_3349; mod issue_3457; + mod issue_3748; mod issue_4024; mod issue_4122; } diff --git a/wgpu-core/src/device/resource.rs b/wgpu-core/src/device/resource.rs index eb43202bca..9a75dc81b1 100644 --- a/wgpu-core/src/device/resource.rs +++ b/wgpu-core/src/device/resource.rs @@ -1603,8 +1603,7 @@ impl Device { }) })?; - let interface = - validation::Interface::new(&module, &info, self.limits.clone(), self.features); + let interface = validation::Interface::new(&module, &info, self.limits.clone()); let hal_shader = hal::ShaderInput::Naga(hal::NagaShader { module, info, diff --git a/wgpu-core/src/validation.rs b/wgpu-core/src/validation.rs index 0df843128e..57161d1483 100644 --- a/wgpu-core/src/validation.rs +++ b/wgpu-core/src/validation.rs @@ -128,7 +128,6 @@ struct EntryPoint { #[derive(Debug)] pub struct Interface { limits: wgt::Limits, - features: wgt::Features, resources: naga::Arena, entry_points: FastHashMap<(naga::ShaderStage, String), EntryPoint>, } @@ -232,8 +231,6 @@ pub enum StageError { #[source] error: InputError, }, - #[error("Location[{location}] is provided by the previous stage output but is not consumed as input by this stage.")] - InputNotConsumed { location: wgt::ShaderLocation }, #[error( "Unable to select an entry point: no entry point was found in the provided shader module" )] @@ -838,12 +835,7 @@ impl Interface { list.push(varying); } - pub fn new( - module: &naga::Module, - info: &naga::valid::ModuleInfo, - limits: wgt::Limits, - features: wgt::Features, - ) -> Self { + pub fn new(module: &naga::Module, info: &naga::valid::ModuleInfo, limits: wgt::Limits) -> Self { let mut resources = naga::Arena::new(); let mut resource_mapping = FastHashMap::default(); for (var_handle, var) in module.global_variables.iter() { @@ -921,7 +913,6 @@ impl Interface { Self { limits, - features, resources, entry_points, } @@ -1143,6 +1134,11 @@ impl Interface { )); } ( + // TODO: is a subtype allowed here? This isn't clear + // from the line in the spec: "For each user-defined + // input of descriptor.fragment there must be a + // user-defined output of descriptor.vertex that + // location, type, and interpolation of the input." iv.ty.is_subtype_of(&provided.ty), iv.ty.dim.num_components(), ) @@ -1168,35 +1164,23 @@ impl Interface { } } } + // TODO: front_facing, sample_index, and sample_mask builtin's should all increase + // components count for fragment input. Varying::BuiltIn(_) => {} } } - // Check all vertex outputs and make sure the fragment shader consumes them. - // This requirement is removed if the `SHADER_UNUSED_VERTEX_OUTPUT` feature is enabled. - if shader_stage == naga::ShaderStage::Fragment - && !self - .features - .contains(wgt::Features::SHADER_UNUSED_VERTEX_OUTPUT) - { - for &index in inputs.keys() { - // This is a linear scan, but the count should be low enough - // that this should be fine. - let found = entry_point.inputs.iter().any(|v| match *v { - Varying::Local { location, .. } => location == index, - Varying::BuiltIn(_) => false, - }); - - if !found { - return Err(StageError::InputNotConsumed { location: index }); - } - } - } - if shader_stage == naga::ShaderStage::Vertex { + // TODO: if topology is point we should add 1 to inter_stage_components? for output in entry_point.outputs.iter() { //TODO: count builtins towards the limit? inter_stage_components += match *output { + // TODO: Spec mentions "Each user-defined output of descriptor.vertex consumes + // 4 scalar components". Not that it varies based on the type. So is there an + // inconsistency here? Also are all these "user-defined" or is that unknown at + // this stage? + // https://gpuweb.github.io/gpuweb/#abstract-opdef-validating-inter-stage-interfaces + // (same applies to counting components for fragment inputs) Varying::Local { ref iv, .. } => iv.ty.dim.num_components(), Varying::BuiltIn(_) => 0, }; @@ -1218,6 +1202,9 @@ impl Interface { } } + // TODO: spec also has a max_inter_stage_shader_variables + // https://gpuweb.github.io/gpuweb/#abstract-opdef-validating-inter-stage-interfaces and + // the location of user defined outputs(vertex)/inputs(fragment) must all be less than this if inter_stage_components > self.limits.max_inter_stage_shader_components { return Err(StageError::TooManyVaryings { used: inter_stage_components, diff --git a/wgpu-hal/src/dx12/device.rs b/wgpu-hal/src/dx12/device.rs index 6e96a67f9e..ceb430a701 100644 --- a/wgpu-hal/src/dx12/device.rs +++ b/wgpu-hal/src/dx12/device.rs @@ -206,14 +206,27 @@ impl super::Device { Ok(()) } + /// When generating the vertex shader, the fragment stage must be passed if it exists! + /// Otherwise, the generated HLSL may be incorrect since the fragment shader inputs are + /// allowed to be a subset of the vertex outputs. fn load_shader( &self, stage: &crate::ProgrammableStage, layout: &super::PipelineLayout, naga_stage: naga::ShaderStage, + fragment_stage: Option<&crate::ProgrammableStage>, ) -> Result { use naga::back::hlsl; + let frag_ep = fragment_stage + .map(|fs_stage| { + hlsl::FragmentEntryPoint::new(&fs_stage.module.naga.module, fs_stage.entry_point) + .ok_or(crate::PipelineError::EntryPoint( + naga::ShaderStage::Fragment, + )) + }) + .transpose()?; + let stage_bit = auxil::map_naga_stage(naga_stage); let (module, info) = naga::back::pipeline_constants::process_overrides( @@ -240,7 +253,7 @@ impl super::Device { let reflection_info = { profiling::scope!("naga::back::hlsl::write"); writer - .write(&module, &info) + .write(&module, &info, frag_ep.as_ref()) .map_err(|e| crate::PipelineError::Linkage(stage_bit, format!("HLSL: {e:?}")))? }; @@ -1335,12 +1348,16 @@ impl crate::Device for super::Device { let (topology_class, topology) = conv::map_topology(desc.primitive.topology); let mut shader_stages = wgt::ShaderStages::VERTEX; - let blob_vs = - self.load_shader(&desc.vertex_stage, desc.layout, naga::ShaderStage::Vertex)?; + let blob_vs = self.load_shader( + &desc.vertex_stage, + desc.layout, + naga::ShaderStage::Vertex, + desc.fragment_stage.as_ref(), + )?; let blob_fs = match desc.fragment_stage { Some(ref stage) => { shader_stages |= wgt::ShaderStages::FRAGMENT; - Some(self.load_shader(stage, desc.layout, naga::ShaderStage::Fragment)?) + Some(self.load_shader(stage, desc.layout, naga::ShaderStage::Fragment, None)?) } None => None, }; @@ -1523,7 +1540,8 @@ impl crate::Device for super::Device { &self, desc: &crate::ComputePipelineDescriptor, ) -> Result { - let blob_cs = self.load_shader(&desc.stage, desc.layout, naga::ShaderStage::Compute)?; + let blob_cs = + self.load_shader(&desc.stage, desc.layout, naga::ShaderStage::Compute, None)?; let pair = { profiling::scope!("ID3D12Device::CreateComputePipelineState"); diff --git a/wgpu-hal/src/gles/adapter.rs b/wgpu-hal/src/gles/adapter.rs index 3c2a55e6a9..9a41dfe113 100644 --- a/wgpu-hal/src/gles/adapter.rs +++ b/wgpu-hal/src/gles/adapter.rs @@ -471,7 +471,6 @@ impl super::Adapter { wgt::Features::SHADER_EARLY_DEPTH_TEST, supported((3, 1), (4, 2)) || extensions.contains("GL_ARB_shader_image_load_store"), ); - features.set(wgt::Features::SHADER_UNUSED_VERTEX_OUTPUT, true); if extensions.contains("GL_ARB_timer_query") { features.set(wgt::Features::TIMESTAMP_QUERY, true); features.set(wgt::Features::TIMESTAMP_QUERY_INSIDE_ENCODERS, true); diff --git a/wgpu-hal/src/metal/adapter.rs b/wgpu-hal/src/metal/adapter.rs index 4a1caf91ac..7f8e789b48 100644 --- a/wgpu-hal/src/metal/adapter.rs +++ b/wgpu-hal/src/metal/adapter.rs @@ -914,7 +914,6 @@ impl super::PrivateCapabilities { features.set(F::ADDRESS_MODE_CLAMP_TO_ZERO, true); features.set(F::RG11B10UFLOAT_RENDERABLE, self.format_rg11b10_all); - features.set(F::SHADER_UNUSED_VERTEX_OUTPUT, true); if self.supports_simd_scoped_operations { features.insert(F::SUBGROUP | F::SUBGROUP_BARRIER); diff --git a/wgpu-hal/src/vulkan/adapter.rs b/wgpu-hal/src/vulkan/adapter.rs index 995930a760..efe32929ad 100644 --- a/wgpu-hal/src/vulkan/adapter.rs +++ b/wgpu-hal/src/vulkan/adapter.rs @@ -735,7 +735,6 @@ impl PhysicalDeviceFeatures { | vk::FormatFeatureFlags::COLOR_ATTACHMENT_BLEND, ); features.set(F::RG11B10UFLOAT_RENDERABLE, rg11b10ufloat_renderable); - features.set(F::SHADER_UNUSED_VERTEX_OUTPUT, true); features.set( F::BGRA8UNORM_STORAGE, diff --git a/wgpu-types/src/lib.rs b/wgpu-types/src/lib.rs index a345a57437..04532a4c7c 100644 --- a/wgpu-types/src/lib.rs +++ b/wgpu-types/src/lib.rs @@ -802,14 +802,6 @@ bitflags::bitflags! { /// /// This is a native only feature. const VERTEX_ATTRIBUTE_64BIT = 1 << 45; - /// Allows vertex shaders to have outputs which are not consumed - /// by the fragment shader. - /// - /// Supported platforms: - /// - Vulkan - /// - Metal - /// - OpenGL - const SHADER_UNUSED_VERTEX_OUTPUT = 1 << 46; /// Allows for creation of textures of format [`TextureFormat::NV12`] /// /// Supported platforms: From 2acc943e8b5c97a251e2146d0a94b883db2eb345 Mon Sep 17 00:00:00 2001 From: Imbris Date: Sun, 21 Apr 2024 20:46:38 -0400 Subject: [PATCH 2/2] Address review * Add note that nesting structs for the inter-stage interface can't happen. * Remove new TODO notes (some addressed and some transferred to an issue https://github.com/gfx-rs/wgpu/issues/5577) * Changed issue that regression test refers to 3748 -> 5553 * Add debug_assert that binding.is_some() in hlsl writer * Fix typos caught in CI Also, fix compiling snapshot test when hlsl-out feature is not enabled. --- naga/src/back/hlsl/mod.rs | 2 +- naga/src/back/hlsl/writer.rs | 32 +++++++++---------- naga/tests/snapshots.rs | 9 ++++-- .../{issue_3748.rs => issue_5553.rs} | 6 ++-- .../{issue_3748.wgsl => issue_5553.wgsl} | 0 tests/tests/root.rs | 2 +- wgpu-core/src/validation.rs | 17 ---------- 7 files changed, 28 insertions(+), 40 deletions(-) rename tests/tests/regression/{issue_3748.rs => issue_5553.rs} (94%) rename tests/tests/regression/{issue_3748.wgsl => issue_5553.wgsl} (100%) diff --git a/naga/src/back/hlsl/mod.rs b/naga/src/back/hlsl/mod.rs index 5cbfffc474..49ff07ebf2 100644 --- a/naga/src/back/hlsl/mod.rs +++ b/naga/src/back/hlsl/mod.rs @@ -290,7 +290,7 @@ impl Wrapped { /// A fragment entry point to be considered when generating HLSL for the output interface of vertex /// entry points. /// -/// This is provided as an optional paramter to [`Writer::write`]. +/// This is provided as an optional parameter to [`Writer::write`]. /// /// If this is provided, vertex outputs will be removed if they are not inputs of this fragment /// entry point. This is necessary for generating correct HLSL when some of the vertex shader diff --git a/naga/src/back/hlsl/writer.rs b/naga/src/back/hlsl/writer.rs index 1ecbb1070e..d40b9b24c2 100644 --- a/naga/src/back/hlsl/writer.rs +++ b/naga/src/back/hlsl/writer.rs @@ -28,8 +28,8 @@ pub(crate) const INSERT_BITS_FUNCTION: &str = "naga_insertBits"; struct EpStructMember { name: String, ty: Handle, - // TODO: log error if binding is none? // technically, this should always be `Some` + // (we `debug_assert!` this in `write_interface_struct`) binding: Option, index: u32, } @@ -489,6 +489,10 @@ impl<'a, W: fmt::Write> super::Writer<'a, W> { write!(self.out, "struct {struct_name}")?; writeln!(self.out, " {{")?; for m in members.iter() { + // Sanity check that each IO member is a built-in or is assigned a + // location. Also see note about nesting in `write_ep_input_struct`. + debug_assert!(m.binding.is_some()); + if is_subgroup_builtin_binding(&m.binding) { continue; } @@ -548,10 +552,12 @@ impl<'a, W: fmt::Write> super::Writer<'a, W> { let mut fake_members = Vec::new(); for arg in func.arguments.iter() { + // NOTE: We don't need to handle nesting structs. All members must + // be either built-ins or assigned a location. I.E. `binding` is + // `Some`. This is checked in `VaryingContext::validate`. See: + // https://gpuweb.github.io/gpuweb/wgsl/#input-output-locations match module.types[arg.ty].inner { TypeInner::Struct { ref members, .. } => { - // TODO: what about nested structs? Is that possible? Maybe try an unwrap on - // the binding? for member in members.iter() { let name = self.namer.call_or(&member.name, "member"); let index = fake_members.len() as u32; @@ -608,19 +614,15 @@ impl<'a, W: fmt::Write> super::Writer<'a, W> { let fs_input_locs = if let (Some(frag_ep), ShaderStage::Vertex) = (frag_ep, stage) { let mut fs_input_locs = Vec::new(); for arg in frag_ep.func.arguments.iter() { - let mut push_if_location = |binding: &Option| { - match *binding { - Some(crate::Binding::Location { location, .. }) => { - fs_input_locs.push(location) - } - Some(crate::Binding::BuiltIn(_)) => {} - // Log error? - None => {} - } + let mut push_if_location = |binding: &Option| match *binding { + Some(crate::Binding::Location { location, .. }) => fs_input_locs.push(location), + Some(crate::Binding::BuiltIn(_)) | None => {} }; + + // NOTE: We don't need to handle struct nesting. See note in + // `write_ep_input_struct`. match frag_ep.module.types[arg.ty].inner { TypeInner::Struct { ref members, .. } => { - // TODO: nesting? for member in members.iter() { push_if_location(&member.binding); } @@ -643,9 +645,7 @@ impl<'a, W: fmt::Write> super::Writer<'a, W> { continue; } } - Some(crate::Binding::BuiltIn(_)) => {} - // Log error? - None => {} + Some(crate::Binding::BuiltIn(_)) | None => {} } } diff --git a/naga/tests/snapshots.rs b/naga/tests/snapshots.rs index 6e02475d68..d18ea5b62a 100644 --- a/naga/tests/snapshots.rs +++ b/naga/tests/snapshots.rs @@ -260,6 +260,11 @@ impl Input { } } +#[cfg(feature = "hlsl-out")] +type FragmentEntryPoint<'a> = naga::back::hlsl::FragmentEntryPoint<'a>; +#[cfg(not(feature = "hlsl-out"))] +type FragmentEntryPoint<'a> = (); + #[allow(unused_variables)] fn check_targets( input: &Input, @@ -267,7 +272,7 @@ fn check_targets( targets: Targets, source_code: Option<&str>, // For testing hlsl generation when fragment shader doesn't consume all vertex outputs. - frag_ep: Option, + frag_ep: Option, ) { if frag_ep.is_some() && !targets.contains(Targets::HLSL) { panic!("Providing FragmentEntryPoint only makes sense when testing hlsl-out"); @@ -952,7 +957,7 @@ fn convert_wgsl() { } } -#[cfg(feature = "wgsl-in")] +#[cfg(all(feature = "wgsl-in", feature = "hlsl-out"))] #[test] fn unconsumed_vertex_outputs_hlsl_out() { let load_and_parse = |name| { diff --git a/tests/tests/regression/issue_3748.rs b/tests/tests/regression/issue_5553.rs similarity index 94% rename from tests/tests/regression/issue_3748.rs rename to tests/tests/regression/issue_5553.rs index e3a459aa8b..19247eec1c 100644 --- a/tests/tests/regression/issue_3748.rs +++ b/tests/tests/regression/issue_5553.rs @@ -3,18 +3,18 @@ use wgpu_test::{gpu_test, GpuTestConfiguration}; use wgpu::*; /// Previously, for every user-defined vertex output a fragment shader had to have a corresponding -/// user-defined input. This would generate `StageError::InputNotComsumed`. +/// user-defined input. This would generate `StageError::InputNotConsumed`. /// /// This requirement was removed from the WebGPU spec. Now, when generating hlsl, wgpu will /// automatically remove any user-defined outputs from the vertex shader that are not present in /// the fragment inputs. This is necessary for generating correct hlsl: -/// https://github.com/gfx-rs/naga/issues/1945 +/// https://github.com/gfx-rs/wgpu/issues/5553 #[gpu_test] static ALLOW_INPUT_NOT_CONSUMED: GpuTestConfiguration = GpuTestConfiguration::new().run_async(|ctx| async move { let module = ctx .device - .create_shader_module(include_wgsl!("issue_3748.wgsl")); + .create_shader_module(include_wgsl!("issue_5553.wgsl")); let pipeline_layout = ctx .device diff --git a/tests/tests/regression/issue_3748.wgsl b/tests/tests/regression/issue_5553.wgsl similarity index 100% rename from tests/tests/regression/issue_3748.wgsl rename to tests/tests/regression/issue_5553.wgsl diff --git a/tests/tests/root.rs b/tests/tests/root.rs index aac65be4b9..088d663a12 100644 --- a/tests/tests/root.rs +++ b/tests/tests/root.rs @@ -1,9 +1,9 @@ mod regression { mod issue_3349; mod issue_3457; - mod issue_3748; mod issue_4024; mod issue_4122; + mod issue_5553; } mod bgra8unorm_storage; diff --git a/wgpu-core/src/validation.rs b/wgpu-core/src/validation.rs index 57161d1483..8fc6340eb1 100644 --- a/wgpu-core/src/validation.rs +++ b/wgpu-core/src/validation.rs @@ -1134,11 +1134,6 @@ impl Interface { )); } ( - // TODO: is a subtype allowed here? This isn't clear - // from the line in the spec: "For each user-defined - // input of descriptor.fragment there must be a - // user-defined output of descriptor.vertex that - // location, type, and interpolation of the input." iv.ty.is_subtype_of(&provided.ty), iv.ty.dim.num_components(), ) @@ -1164,23 +1159,14 @@ impl Interface { } } } - // TODO: front_facing, sample_index, and sample_mask builtin's should all increase - // components count for fragment input. Varying::BuiltIn(_) => {} } } if shader_stage == naga::ShaderStage::Vertex { - // TODO: if topology is point we should add 1 to inter_stage_components? for output in entry_point.outputs.iter() { //TODO: count builtins towards the limit? inter_stage_components += match *output { - // TODO: Spec mentions "Each user-defined output of descriptor.vertex consumes - // 4 scalar components". Not that it varies based on the type. So is there an - // inconsistency here? Also are all these "user-defined" or is that unknown at - // this stage? - // https://gpuweb.github.io/gpuweb/#abstract-opdef-validating-inter-stage-interfaces - // (same applies to counting components for fragment inputs) Varying::Local { ref iv, .. } => iv.ty.dim.num_components(), Varying::BuiltIn(_) => 0, }; @@ -1202,9 +1188,6 @@ impl Interface { } } - // TODO: spec also has a max_inter_stage_shader_variables - // https://gpuweb.github.io/gpuweb/#abstract-opdef-validating-inter-stage-interfaces and - // the location of user defined outputs(vertex)/inputs(fragment) must all be less than this if inter_stage_components > self.limits.max_inter_stage_shader_components { return Err(StageError::TooManyVaryings { used: inter_stage_components,