diff --git a/CHANGELOG.md b/CHANGELOG.md index 538546e4c17..67d91eef769 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -68,8 +68,18 @@ By @stefnotch in [#5410](https://github.com/gfx-rs/wgpu/pull/5410) #### General +- Unconsumed vertex outputs are now always allowed. Removed `StageError::InputNotConsumed`, `Features::SHADER_UNUSED_VERTEX_OUTPUT`, and associated validation. By @Imberflur in [#5531](https://github.com/gfx-rs/wgpu/pull/5531) + #### Naga +- In hlsl-out, allow passing information about the fragment entry point to omit vertex outputs that are not in the fragment inputs. By @Imberflur in [#5531](https://github.com/gfx-rs/wgpu/pull/5531) + + ```diff + let writer: naga::back::hlsl::Writer = /* ... */; + -writer.write(&module, &module_info); + +writer.write(&module, &module_info, None); + ``` + ### Bug Fixes ## v0.20.0 (2024-04-28) diff --git a/deno_webgpu/lib.rs b/deno_webgpu/lib.rs index 453d4ea7e3d..18c6e932ad1 100644 --- a/deno_webgpu/lib.rs +++ b/deno_webgpu/lib.rs @@ -360,9 +360,6 @@ fn deserialize_features(features: &wgpu_types::Features) -> Vec<&'static str> { if features.contains(wgpu_types::Features::SHADER_EARLY_DEPTH_TEST) { return_features.push("shader-early-depth-test"); } - if features.contains(wgpu_types::Features::SHADER_UNUSED_VERTEX_OUTPUT) { - return_features.push("shader-unused-vertex-output"); - } return_features } @@ -648,10 +645,6 @@ impl From for wgpu_types::Features { wgpu_types::Features::SHADER_EARLY_DEPTH_TEST, required_features.0.contains("shader-early-depth-test"), ); - features.set( - wgpu_types::Features::SHADER_UNUSED_VERTEX_OUTPUT, - required_features.0.contains("shader-unused-vertex-output"), - ); features } diff --git a/naga-cli/src/bin/naga.rs b/naga-cli/src/bin/naga.rs index 7ff086d3f7a..593630e6389 100644 --- a/naga-cli/src/bin/naga.rs +++ b/naga-cli/src/bin/naga.rs @@ -777,7 +777,7 @@ fn write_output( let mut buffer = String::new(); let mut writer = hlsl::Writer::new(&mut buffer, ¶ms.hlsl); - writer.write(&module, &info).unwrap_pretty(); + writer.write(&module, &info, None).unwrap_pretty(); fs::write(output_path, buffer)?; } "wgsl" => { diff --git a/naga/benches/criterion.rs b/naga/benches/criterion.rs index e57c58a8476..b8d794563ec 100644 --- a/naga/benches/criterion.rs +++ b/naga/benches/criterion.rs @@ -226,7 +226,7 @@ fn backends(c: &mut Criterion) { let mut string = String::new(); for &(ref module, ref info) in inputs.iter() { let mut writer = naga::back::hlsl::Writer::new(&mut string, &options); - let _ = writer.write(module, info); // may fail on unimplemented things + let _ = writer.write(module, info, None); // may fail on unimplemented things string.clear(); } }); diff --git a/naga/src/back/hlsl/mod.rs b/naga/src/back/hlsl/mod.rs index 28edbf70e1d..5cbfffc4745 100644 --- a/naga/src/back/hlsl/mod.rs +++ b/naga/src/back/hlsl/mod.rs @@ -287,6 +287,35 @@ impl Wrapped { } } +/// A fragment entry point to be considered when generating HLSL for the output interface of vertex +/// entry points. +/// +/// This is provided as an optional paramter to [`Writer::write`]. +/// +/// If this is provided, vertex outputs will be removed if they are not inputs of this fragment +/// entry point. This is necessary for generating correct HLSL when some of the vertex shader +/// outputs are not consumed by the fragment shader. +pub struct FragmentEntryPoint<'a> { + module: &'a crate::Module, + func: &'a crate::Function, +} + +impl<'a> FragmentEntryPoint<'a> { + /// Returns `None` if the entry point with the provided name can't be found or isn't a fragment + /// entry point. + pub fn new(module: &'a crate::Module, ep_name: &'a str) -> Option { + module + .entry_points + .iter() + .find(|ep| ep.name == ep_name) + .filter(|ep| ep.stage == crate::ShaderStage::Fragment) + .map(|ep| Self { + module, + func: &ep.function, + }) + } +} + pub struct Writer<'a, W> { out: W, names: crate::FastHashMap, diff --git a/naga/src/back/hlsl/writer.rs b/naga/src/back/hlsl/writer.rs index 86d8f890357..5289002a15c 100644 --- a/naga/src/back/hlsl/writer.rs +++ b/naga/src/back/hlsl/writer.rs @@ -4,7 +4,7 @@ use super::{ WrappedZeroValue, }, storage::StoreValue, - BackendResult, Error, Options, + BackendResult, Error, FragmentEntryPoint, Options, }; use crate::{ back, @@ -28,6 +28,7 @@ pub(crate) const INSERT_BITS_FUNCTION: &str = "naga_insertBits"; struct EpStructMember { name: String, ty: Handle, + // TODO: log error if binding is none? // technically, this should always be `Some` binding: Option, index: u32, @@ -196,6 +197,7 @@ impl<'a, W: fmt::Write> super::Writer<'a, W> { &mut self, module: &Module, module_info: &valid::ModuleInfo, + fragment_entry_point: Option<&FragmentEntryPoint<'_>>, ) -> Result { if !module.overrides.is_empty() { return Err(Error::Override); @@ -296,7 +298,13 @@ impl<'a, W: fmt::Write> super::Writer<'a, W> { // Write all entry points wrapped structs for (index, ep) in module.entry_points.iter().enumerate() { let ep_name = self.names[&NameKey::EntryPoint(index as u16)].clone(); - let ep_io = self.write_ep_interface(module, &ep.function, ep.stage, &ep_name)?; + let ep_io = self.write_ep_interface( + module, + &ep.function, + ep.stage, + &ep_name, + fragment_entry_point, + )?; self.entry_point_io.push(ep_io); } @@ -504,6 +512,7 @@ impl<'a, W: fmt::Write> super::Writer<'a, W> { writeln!(self.out, "}};")?; writeln!(self.out)?; + // See ordering notes on EntryPointInterface fields match shader_stage.1 { Io::Input => { // bring back the original order @@ -537,6 +546,8 @@ impl<'a, W: fmt::Write> super::Writer<'a, W> { for arg in func.arguments.iter() { match module.types[arg.ty].inner { TypeInner::Struct { ref members, .. } => { + // TODO: what about nested structs? Is that possible? Maybe try an unwrap on + // the binding? for member in members.iter() { let name = self.namer.call_or(&member.name, "member"); let index = fake_members.len() as u32; @@ -573,10 +584,10 @@ impl<'a, W: fmt::Write> super::Writer<'a, W> { result: &crate::FunctionResult, stage: ShaderStage, entry_point_name: &str, + frag_ep: Option<&FragmentEntryPoint<'_>>, ) -> Result { let struct_name = format!("{stage:?}Output_{entry_point_name}"); - let mut fake_members = Vec::new(); let empty = []; let members = match module.types[result.ty].inner { TypeInner::Struct { ref members, .. } => members, @@ -586,14 +597,60 @@ impl<'a, W: fmt::Write> super::Writer<'a, W> { } }; - for member in members.iter() { + // Gather list of fragment input locations. We use this below to remove user-defined + // varyings from VS outputs that aren't in the FS inputs. This makes the VS interface match + // as long as the FS inputs are a subset of the VS outputs. This is only applied if the + // writer is supplied with information about the fragment entry point. + let fs_input_locs = if let (Some(frag_ep), ShaderStage::Vertex) = (frag_ep, stage) { + let mut fs_input_locs = Vec::new(); + for arg in frag_ep.func.arguments.iter() { + let mut push_if_location = |binding: &Option| { + match *binding { + Some(crate::Binding::Location { location, .. }) => { + fs_input_locs.push(location) + } + Some(crate::Binding::BuiltIn(_)) => {} + // Log error? + None => {} + } + }; + match frag_ep.module.types[arg.ty].inner { + TypeInner::Struct { ref members, .. } => { + // TODO: nesting? + for member in members.iter() { + push_if_location(&member.binding); + } + } + _ => push_if_location(&arg.binding), + } + } + fs_input_locs.sort(); + Some(fs_input_locs) + } else { + None + }; + + let mut fake_members = Vec::new(); + for (index, member) in members.iter().enumerate() { + if let Some(ref fs_input_locs) = fs_input_locs { + match member.binding { + Some(crate::Binding::Location { location, .. }) => { + if fs_input_locs.binary_search(&location).is_err() { + continue; + } + } + Some(crate::Binding::BuiltIn(_)) => {} + // Log error? + None => {} + } + } + let member_name = self.namer.call_or(&member.name, "member"); - let index = fake_members.len() as u32; fake_members.push(EpStructMember { name: member_name, ty: member.ty, binding: member.binding.clone(), - index, + index: index as u32, }); } @@ -609,6 +666,7 @@ impl<'a, W: fmt::Write> super::Writer<'a, W> { func: &crate::Function, stage: ShaderStage, ep_name: &str, + frag_ep: Option<&FragmentEntryPoint<'_>>, ) -> Result { Ok(EntryPointInterface { input: if !func.arguments.is_empty() @@ -624,7 +682,7 @@ impl<'a, W: fmt::Write> super::Writer<'a, W> { }, output: match func.result { Some(ref fr) if fr.binding.is_none() && stage == ShaderStage::Vertex => { - Some(self.write_ep_output_struct(module, fr, stage, ep_name)?) + Some(self.write_ep_output_struct(module, fr, stage, ep_name, frag_ep)?) } _ => None, }, diff --git a/naga/tests/in/unconsumed_vertex_outputs_frag.param.ron b/naga/tests/in/unconsumed_vertex_outputs_frag.param.ron new file mode 100644 index 00000000000..72873dd6677 --- /dev/null +++ b/naga/tests/in/unconsumed_vertex_outputs_frag.param.ron @@ -0,0 +1,2 @@ +( +) diff --git a/naga/tests/in/unconsumed_vertex_outputs_frag.wgsl b/naga/tests/in/unconsumed_vertex_outputs_frag.wgsl new file mode 100644 index 00000000000..3a656c9696b --- /dev/null +++ b/naga/tests/in/unconsumed_vertex_outputs_frag.wgsl @@ -0,0 +1,13 @@ +// Out of order to test sorting. +struct FragmentIn { + @location(1) value: f32, + @location(3) value2: f32, + @builtin(position) position: vec4, + // @location(0) unused_value: f32, + // @location(2) unused_value2: vec4, +} + +@fragment +fn fs_main(v_out: FragmentIn) -> @location(0) vec4 { + return vec4(v_out.value, v_out.value, v_out.value2, v_out.value2); +} diff --git a/naga/tests/in/unconsumed_vertex_outputs_vert.param.ron b/naga/tests/in/unconsumed_vertex_outputs_vert.param.ron new file mode 100644 index 00000000000..72873dd6677 --- /dev/null +++ b/naga/tests/in/unconsumed_vertex_outputs_vert.param.ron @@ -0,0 +1,2 @@ +( +) diff --git a/naga/tests/in/unconsumed_vertex_outputs_vert.wgsl b/naga/tests/in/unconsumed_vertex_outputs_vert.wgsl new file mode 100644 index 00000000000..46c39ea9300 --- /dev/null +++ b/naga/tests/in/unconsumed_vertex_outputs_vert.wgsl @@ -0,0 +1,13 @@ +// Out of order to test sorting. +struct VertexOut { + @builtin(position) position: vec4, + @location(1) value: f32, + @location(2) unused_value2: vec4, + @location(0) unused_value: f32, + @location(3) value2: f32, +} + +@vertex +fn vs_main() -> VertexOut { + return VertexOut(vec4(1.0), 1.0, vec4(2.0), 1.0, 0.5); +} diff --git a/naga/tests/out/hlsl/unconsumed_vertex_outputs_frag.hlsl b/naga/tests/out/hlsl/unconsumed_vertex_outputs_frag.hlsl new file mode 100644 index 00000000000..4005e435380 --- /dev/null +++ b/naga/tests/out/hlsl/unconsumed_vertex_outputs_frag.hlsl @@ -0,0 +1,17 @@ +struct FragmentIn { + float value : LOC1; + float value2_ : LOC3; + float4 position : SV_Position; +}; + +struct FragmentInput_fs_main { + float value : LOC1; + float value2_ : LOC3; + float4 position : SV_Position; +}; + +float4 fs_main(FragmentInput_fs_main fragmentinput_fs_main) : SV_Target0 +{ + FragmentIn v_out = { fragmentinput_fs_main.value, fragmentinput_fs_main.value2_, fragmentinput_fs_main.position }; + return float4(v_out.value, v_out.value, v_out.value2_, v_out.value2_); +} diff --git a/naga/tests/out/hlsl/unconsumed_vertex_outputs_frag.ron b/naga/tests/out/hlsl/unconsumed_vertex_outputs_frag.ron new file mode 100644 index 00000000000..eac1b945d2b --- /dev/null +++ b/naga/tests/out/hlsl/unconsumed_vertex_outputs_frag.ron @@ -0,0 +1,12 @@ +( + vertex:[ + ], + fragment:[ + ( + entry_point:"fs_main", + target_profile:"ps_5_1", + ), + ], + compute:[ + ], +) diff --git a/naga/tests/out/hlsl/unconsumed_vertex_outputs_vert.hlsl b/naga/tests/out/hlsl/unconsumed_vertex_outputs_vert.hlsl new file mode 100644 index 00000000000..ea75d638773 --- /dev/null +++ b/naga/tests/out/hlsl/unconsumed_vertex_outputs_vert.hlsl @@ -0,0 +1,30 @@ +struct VertexOut { + float4 position : SV_Position; + float value : LOC1; + float4 unused_value2_ : LOC2; + float unused_value : LOC0; + float value2_ : LOC3; +}; + +struct VertexOutput_vs_main { + float value : LOC1; + float value2_ : LOC3; + float4 position : SV_Position; +}; + +VertexOut ConstructVertexOut(float4 arg0, float arg1, float4 arg2, float arg3, float arg4) { + VertexOut ret = (VertexOut)0; + ret.position = arg0; + ret.value = arg1; + ret.unused_value2_ = arg2; + ret.unused_value = arg3; + ret.value2_ = arg4; + return ret; +} + +VertexOutput_vs_main vs_main() +{ + const VertexOut vertexout = ConstructVertexOut((1.0).xxxx, 1.0, (2.0).xxxx, 1.0, 0.5); + const VertexOutput_vs_main vertexout_1 = { vertexout.value, vertexout.value2_, vertexout.position }; + return vertexout_1; +} diff --git a/naga/tests/out/hlsl/unconsumed_vertex_outputs_vert.ron b/naga/tests/out/hlsl/unconsumed_vertex_outputs_vert.ron new file mode 100644 index 00000000000..a24f8d0eb8b --- /dev/null +++ b/naga/tests/out/hlsl/unconsumed_vertex_outputs_vert.ron @@ -0,0 +1,12 @@ +( + vertex:[ + ( + entry_point:"vs_main", + target_profile:"vs_5_1", + ), + ], + fragment:[ + ], + compute:[ + ], +) diff --git a/naga/tests/snapshots.rs b/naga/tests/snapshots.rs index ee775a3e63b..e98d368c030 100644 --- a/naga/tests/snapshots.rs +++ b/naga/tests/snapshots.rs @@ -265,7 +265,13 @@ fn check_targets( module: &mut naga::Module, targets: Targets, source_code: Option<&str>, + // For testing hlsl generation when fragment shader doesn't consume all vertex outputs. + frag_ep: Option, ) { + if frag_ep.is_some() && !targets.contains(Targets::HLSL) { + panic!("Providing FragmentEntryPoint only makes sense when testing hlsl-out"); + } + let params = input.read_parameters(); let name = &input.file_name; @@ -409,6 +415,7 @@ fn check_targets( &info, ¶ms.hlsl, ¶ms.pipeline_constants, + frag_ep, ); } } @@ -587,6 +594,7 @@ fn write_output_hlsl( info: &naga::valid::ModuleInfo, options: &naga::back::hlsl::Options, pipeline_constants: &naga::back::PipelineConstants, + frag_ep: Option, ) { use naga::back::hlsl; use std::fmt::Write as _; @@ -599,7 +607,9 @@ fn write_output_hlsl( let mut buffer = String::new(); let mut writer = hlsl::Writer::new(&mut buffer, options); - let reflection_info = writer.write(&module, &info).expect("HLSL write failed"); + let reflection_info = writer + .write(&module, &info, frag_ep.as_ref()) + .expect("HLSL write failed"); input.write_output_file("hlsl", "hlsl", buffer); @@ -890,7 +900,7 @@ fn convert_wgsl() { let input = Input::new(None, name, "wgsl"); let source = input.read_source(); match naga::front::wgsl::parse_str(&source) { - Ok(mut module) => check_targets(&input, &mut module, targets, None), + Ok(mut module) => check_targets(&input, &mut module, targets, None, None), Err(e) => panic!( "{}", e.emit_to_string_with_path(&source, input.input_path()) @@ -912,7 +922,7 @@ fn convert_wgsl() { // crlf will make the large split output different on different platform let source = source.replace('\r', ""); match naga::front::wgsl::parse_str(&source) { - Ok(mut module) => check_targets(&input, &mut module, targets, Some(&source)), + Ok(mut module) => check_targets(&input, &mut module, targets, Some(&source), None), Err(e) => panic!( "{}", e.emit_to_string_with_path(&source, input.input_path()) @@ -922,6 +932,36 @@ fn convert_wgsl() { } } +#[cfg(feature = "wgsl-in")] +#[test] +fn unconsumed_vertex_outputs_hlsl_out() { + let load_and_parse = |name| { + // WGSL shaders lives in root dir as a privileged. + let input = Input::new(None, name, "wgsl"); + let source = input.read_source(); + let module = match naga::front::wgsl::parse_str(&source) { + Ok(module) => module, + Err(e) => panic!( + "{}", + e.emit_to_string_with_path(&source, input.input_path()) + ), + }; + (input, module) + }; + + // Uses separate wgsl files to make sure the tested code doesn't accidentally rely on + // the fragment entry point being from the same parsed content (e.g. accidentally using the + // wrong `Module` when looking up info). We also don't just create a module from the same file + // twice since everything would probably be stored behind the same keys. + let (input, mut module) = load_and_parse("unconsumed_vertex_outputs_vert"); + let (frag_input, mut frag_module) = load_and_parse("unconsumed_vertex_outputs_frag"); + let frag_ep = naga::back::hlsl::FragmentEntryPoint::new(&frag_module, "fs_main") + .expect("fs_main not found"); + + check_targets(&input, &mut module, Targets::HLSL, None, Some(frag_ep)); + check_targets(&frag_input, &mut frag_module, Targets::HLSL, None, None); +} + #[cfg(feature = "spv-in")] fn convert_spv(name: &str, adjust_coordinate_space: bool, targets: Targets) { let _ = env_logger::try_init(); @@ -936,7 +976,7 @@ fn convert_spv(name: &str, adjust_coordinate_space: bool, targets: Targets) { }, ) .unwrap(); - check_targets(&input, &mut module, targets, None); + check_targets(&input, &mut module, targets, None, None); } #[cfg(feature = "spv-in")] @@ -996,7 +1036,7 @@ fn convert_glsl_variations_check() { &source, ) .unwrap(); - check_targets(&input, &mut module, Targets::GLSL, None); + check_targets(&input, &mut module, Targets::GLSL, None, None); } #[cfg(feature = "glsl-in")] diff --git a/tests/tests/regression/issue_3748.rs b/tests/tests/regression/issue_3748.rs new file mode 100644 index 00000000000..c38235021ed --- /dev/null +++ b/tests/tests/regression/issue_3748.rs @@ -0,0 +1,52 @@ +use wgpu_test::{gpu_test, GpuTestConfiguration}; + +use wgpu::*; + +/// Previously, for every user-defined vertex output a fragment shader had to have a corresponding +/// user-defined input. This would generate `StageError::InputNotComsumed`. +/// +/// This requirement was removed from the WebGPU spec. Now, when generating hlsl, wgpu will +/// automatically remove any user-defined outputs from the vertex shader that are not present in +/// the fragment inputs. This is necessary for generating correct hlsl: +/// https://github.com/gfx-rs/naga/issues/1945 +#[gpu_test] +static ALLOW_INPUT_NOT_CONSUMED: GpuTestConfiguration = + GpuTestConfiguration::new().run_async(|ctx| async move { + let module = ctx + .device + .create_shader_module(include_wgsl!("issue_3748.wgsl")); + + let pipeline_layout = ctx + .device + .create_pipeline_layout(&PipelineLayoutDescriptor { + label: Some("Pipeline Layout"), + bind_group_layouts: &[], + push_constant_ranges: &[], + }); + + ctx.device + .create_render_pipeline(&RenderPipelineDescriptor { + label: Some("Pipeline"), + layout: Some(&pipeline_layout), + vertex: VertexState { + module: &module, + entry_point: "vs_main", + compilation_options: Default::default(), + buffers: &[], + }, + primitive: PrimitiveState::default(), + depth_stencil: None, + multisample: MultisampleState::default(), + fragment: Some(FragmentState { + module: &module, + entry_point: "fs_main", + compilation_options: Default::default(), + targets: &[Some(ColorTargetState { + format: TextureFormat::Rgba8Unorm, + blend: None, + write_mask: ColorWrites::all(), + })], + }), + multiview: None, + }); + }); diff --git a/tests/tests/regression/issue_3748.wgsl b/tests/tests/regression/issue_3748.wgsl new file mode 100644 index 00000000000..78ace6d9dba --- /dev/null +++ b/tests/tests/regression/issue_3748.wgsl @@ -0,0 +1,23 @@ +struct VertexOut { + @builtin(position) position: vec4, + @location(0) unused_value: f32, + @location(1) value: f32, +} + +struct FragmentIn { + @builtin(position) position: vec4, + // @location(0) unused_value: f32, + @location(1) value: f32, +} + +@vertex +fn vs_main() -> VertexOut { + return VertexOut(vec4(1.0), 1.0, 1.0); +} + +@fragment +fn fs_main(v_out: FragmentIn) -> @location(0) vec4 { + return vec4(v_out.value); +} + + diff --git a/tests/tests/root.rs b/tests/tests/root.rs index 6dc7af56ec2..91a3542c1b9 100644 --- a/tests/tests/root.rs +++ b/tests/tests/root.rs @@ -1,6 +1,7 @@ mod regression { mod issue_3349; mod issue_3457; + mod issue_3748; mod issue_4024; mod issue_4122; } diff --git a/wgpu-core/src/device/resource.rs b/wgpu-core/src/device/resource.rs index b1aa17ba782..2a2af1ddb59 100644 --- a/wgpu-core/src/device/resource.rs +++ b/wgpu-core/src/device/resource.rs @@ -1506,8 +1506,7 @@ impl Device { }) })?; - let interface = - validation::Interface::new(&module, &info, self.limits.clone(), self.features); + let interface = validation::Interface::new(&module, &info, self.limits.clone()); let hal_shader = hal::ShaderInput::Naga(hal::NagaShader { module, info, diff --git a/wgpu-core/src/validation.rs b/wgpu-core/src/validation.rs index d360ee96219..79e023283cd 100644 --- a/wgpu-core/src/validation.rs +++ b/wgpu-core/src/validation.rs @@ -132,7 +132,6 @@ struct EntryPoint { #[derive(Debug)] pub struct Interface { limits: wgt::Limits, - features: wgt::Features, resources: naga::Arena, entry_points: FastHashMap<(naga::ShaderStage, String), EntryPoint>, } @@ -281,8 +280,6 @@ pub enum StageError { #[source] error: InputError, }, - #[error("Location[{location}] is provided by the previous stage output but is not consumed as input by this stage.")] - InputNotConsumed { location: wgt::ShaderLocation }, #[error( "Unable to select an entry point: no entry point was found in the provided shader module" )] @@ -884,12 +881,7 @@ impl Interface { list.push(varying); } - pub fn new( - module: &naga::Module, - info: &naga::valid::ModuleInfo, - limits: wgt::Limits, - features: wgt::Features, - ) -> Self { + pub fn new(module: &naga::Module, info: &naga::valid::ModuleInfo, limits: wgt::Limits) -> Self { let mut resources = naga::Arena::new(); let mut resource_mapping = FastHashMap::default(); for (var_handle, var) in module.global_variables.iter() { @@ -976,7 +968,6 @@ impl Interface { Self { limits, - features, resources, entry_points, } @@ -1198,6 +1189,11 @@ impl Interface { )); } ( + // TODO: is a subtype allowed here? This isn't clear + // from the line in the spec: "For each user-defined + // input of descriptor.fragment there must be a + // user-defined output of descriptor.vertex that + // location, type, and interpolation of the input." iv.ty.is_subtype_of(&provided.ty), iv.ty.dim.num_components(), ) @@ -1223,35 +1219,23 @@ impl Interface { } } } + // TODO: front_facing, sample_index, and sample_mask builtin's should all increase + // components count for fragment input. Varying::BuiltIn(_) => {} } } - // Check all vertex outputs and make sure the fragment shader consumes them. - // This requirement is removed if the `SHADER_UNUSED_VERTEX_OUTPUT` feature is enabled. - if shader_stage == naga::ShaderStage::Fragment - && !self - .features - .contains(wgt::Features::SHADER_UNUSED_VERTEX_OUTPUT) - { - for &index in inputs.keys() { - // This is a linear scan, but the count should be low enough - // that this should be fine. - let found = entry_point.inputs.iter().any(|v| match *v { - Varying::Local { location, .. } => location == index, - Varying::BuiltIn(_) => false, - }); - - if !found { - return Err(StageError::InputNotConsumed { location: index }); - } - } - } - if shader_stage == naga::ShaderStage::Vertex { + // TODO: if topology is point we should add 1 to inter_stage_components? for output in entry_point.outputs.iter() { //TODO: count builtins towards the limit? inter_stage_components += match *output { + // TODO: Spec mentions "Each user-defined output of descriptor.vertex consumes + // 4 scalar components". Not that it varies based on the type. So is there an + // inconsistency here? Also are all these "user-defined" or is that unknown at + // this stage? + // https://gpuweb.github.io/gpuweb/#abstract-opdef-validating-inter-stage-interfaces + // (same applies to counting components for fragment inputs) Varying::Local { ref iv, .. } => iv.ty.dim.num_components(), Varying::BuiltIn(_) => 0, }; @@ -1273,6 +1257,9 @@ impl Interface { } } + // TODO: spec also has a max_inter_stage_shader_variables + // https://gpuweb.github.io/gpuweb/#abstract-opdef-validating-inter-stage-interfaces and + // the location of user defined outputs(vertex)/inputs(fragment) must all be less than this if inter_stage_components > self.limits.max_inter_stage_shader_components { return Err(StageError::TooManyVaryings { used: inter_stage_components, diff --git a/wgpu-hal/src/dx12/device.rs b/wgpu-hal/src/dx12/device.rs index 82075294ee5..c37289f4748 100644 --- a/wgpu-hal/src/dx12/device.rs +++ b/wgpu-hal/src/dx12/device.rs @@ -209,14 +209,26 @@ impl super::Device { Ok(()) } + /// When generating the vertex shader, the fragment stage must be passed if it exists! + /// Otherwise, the generated HLSL may be incorrect since the fragment shader inputs are + /// allowed to be a subset of the vertex outputs. fn load_shader( &self, stage: &crate::ProgrammableStage, layout: &super::PipelineLayout, naga_stage: naga::ShaderStage, + fragment_stage: Option<&crate::ProgrammableStage>, ) -> Result { use naga::back::hlsl; + let frag_ep = fragment_stage + .map(|fs_stage| { + hlsl::FragmentEntryPoint::new(&fs_stage.module.naga.module, fs_stage.entry_point).ok_or( + crate::PipelineError::EntryPoint(naga::ShaderStage::Fragment), + ) + }) + .transpose()?; + let stage_bit = crate::auxil::map_naga_stage(naga_stage); let (module, info) = naga::back::pipeline_constants::process_overrides( @@ -243,7 +255,7 @@ impl super::Device { let reflection_info = { profiling::scope!("naga::back::hlsl::write"); writer - .write(&module, &info) + .write(&module, &info, frag_ep.as_ref()) .map_err(|e| crate::PipelineError::Linkage(stage_bit, format!("HLSL: {e:?}")))? }; @@ -1300,12 +1312,16 @@ impl crate::Device for super::Device { let (topology_class, topology) = conv::map_topology(desc.primitive.topology); let mut shader_stages = wgt::ShaderStages::VERTEX; - let blob_vs = - self.load_shader(&desc.vertex_stage, desc.layout, naga::ShaderStage::Vertex)?; + let blob_vs = self.load_shader( + &desc.vertex_stage, + desc.layout, + naga::ShaderStage::Vertex, + desc.fragment_stage.as_ref(), + )?; let blob_fs = match desc.fragment_stage { Some(ref stage) => { shader_stages |= wgt::ShaderStages::FRAGMENT; - Some(self.load_shader(stage, desc.layout, naga::ShaderStage::Fragment)?) + Some(self.load_shader(stage, desc.layout, naga::ShaderStage::Fragment, None)?) } None => None, }; @@ -1484,7 +1500,7 @@ impl crate::Device for super::Device { &self, desc: &crate::ComputePipelineDescriptor, ) -> Result { - let blob_cs = self.load_shader(&desc.stage, desc.layout, naga::ShaderStage::Compute)?; + let blob_cs = self.load_shader(&desc.stage, desc.layout, naga::ShaderStage::Compute, None)?; let pair = { profiling::scope!("ID3D12Device::CreateComputePipelineState"); diff --git a/wgpu-hal/src/gles/adapter.rs b/wgpu-hal/src/gles/adapter.rs index 052c77006bd..0fff90c5b7e 100644 --- a/wgpu-hal/src/gles/adapter.rs +++ b/wgpu-hal/src/gles/adapter.rs @@ -491,7 +491,6 @@ impl super::Adapter { wgt::Features::SHADER_EARLY_DEPTH_TEST, supported((3, 1), (4, 2)) || extensions.contains("GL_ARB_shader_image_load_store"), ); - features.set(wgt::Features::SHADER_UNUSED_VERTEX_OUTPUT, true); if extensions.contains("GL_ARB_timer_query") { features.set(wgt::Features::TIMESTAMP_QUERY, true); features.set(wgt::Features::TIMESTAMP_QUERY_INSIDE_ENCODERS, true); diff --git a/wgpu-hal/src/metal/adapter.rs b/wgpu-hal/src/metal/adapter.rs index cddba472bd1..0892f70b536 100644 --- a/wgpu-hal/src/metal/adapter.rs +++ b/wgpu-hal/src/metal/adapter.rs @@ -904,7 +904,6 @@ impl super::PrivateCapabilities { features.set(F::ADDRESS_MODE_CLAMP_TO_ZERO, true); features.set(F::RG11B10UFLOAT_RENDERABLE, self.format_rg11b10_all); - features.set(F::SHADER_UNUSED_VERTEX_OUTPUT, true); if self.supports_simd_scoped_operations { features.insert(F::SUBGROUP | F::SUBGROUP_BARRIER); diff --git a/wgpu-hal/src/vulkan/adapter.rs b/wgpu-hal/src/vulkan/adapter.rs index 21219361f4c..90970b3f189 100644 --- a/wgpu-hal/src/vulkan/adapter.rs +++ b/wgpu-hal/src/vulkan/adapter.rs @@ -740,7 +740,6 @@ impl PhysicalDeviceFeatures { | vk::FormatFeatureFlags::COLOR_ATTACHMENT_BLEND, ); features.set(F::RG11B10UFLOAT_RENDERABLE, rg11b10ufloat_renderable); - features.set(F::SHADER_UNUSED_VERTEX_OUTPUT, true); features.set( F::BGRA8UNORM_STORAGE, diff --git a/wgpu-types/src/lib.rs b/wgpu-types/src/lib.rs index cb3f1add0ee..ec0111c2975 100644 --- a/wgpu-types/src/lib.rs +++ b/wgpu-types/src/lib.rs @@ -799,14 +799,6 @@ bitflags::bitflags! { /// /// This is a native only feature. const VERTEX_ATTRIBUTE_64BIT = 1 << 45; - /// Allows vertex shaders to have outputs which are not consumed - /// by the fragment shader. - /// - /// Supported platforms: - /// - Vulkan - /// - Metal - /// - OpenGL - const SHADER_UNUSED_VERTEX_OUTPUT = 1 << 46; /// Allows for creation of textures of format [`TextureFormat::NV12`] /// /// Supported platforms: