From d3edbc57a9df91816dfe4915c380a311c19c2106 Mon Sep 17 00:00:00 2001 From: Andreas Reich Date: Sun, 14 Jul 2024 22:13:50 +0200 Subject: [PATCH] Compute pass benchmark (#5767) Adds a benchmark for compute pass recording, very similar to what we have for render passes. --- CHANGELOG.md | 1 + benches/Cargo.toml | 2 +- benches/README.md | 15 + benches/benches/computepass-bindless.wgsl | 26 + benches/benches/computepass.rs | 574 ++++++++++++++++++++++ benches/benches/computepass.wgsl | 26 + benches/benches/renderpass.rs | 5 + benches/benches/root.rs | 4 +- wgpu-core/src/binding_model.rs | 2 +- 9 files changed, 652 insertions(+), 3 deletions(-) create mode 100644 benches/benches/computepass-bindless.wgsl create mode 100644 benches/benches/computepass.rs create mode 100644 benches/benches/computepass.wgsl diff --git a/CHANGELOG.md b/CHANGELOG.md index 0bfe4577a3..c52dbac34c 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -183,6 +183,7 @@ By @teoxoy in [#5901](https://github.com/gfx-rs/wgpu/pull/5901) - Unconsumed vertex outputs are now always allowed. Removed `StageError::InputNotConsumed`, `Features::SHADER_UNUSED_VERTEX_OUTPUT`, and associated validation. By @Imberflur in [#5531](https://github.com/gfx-rs/wgpu/pull/5531) - Avoid introducing spurious features for optional dependencies. By @bjorn3 in [#5691](https://github.com/gfx-rs/wgpu/pull/5691) - `wgpu::Error` is now `Sync`, making it possible to be wrapped in `anyhow::Error` or `eyre::Report`. By @nolanderc in [#5820](https://github.com/gfx-rs/wgpu/pull/5820) +- Added benchmark suite. By @cwfitzgerald in [#5694](https://github.com/gfx-rs/wgpu/pull/5694), compute passes by @wumpf in [#5767](https://github.com/gfx-rs/wgpu/pull/5767) #### Metal - Removed the `link` Cargo feature. diff --git a/benches/Cargo.toml b/benches/Cargo.toml index 65ac0eefdb..1dba81434b 100644 --- a/benches/Cargo.toml +++ b/benches/Cargo.toml @@ -43,4 +43,4 @@ pollster.workspace = true profiling.workspace = true rayon.workspace = true tracy-client = { workspace = true, optional = true } -wgpu.workspace = true +wgpu = { workspace = true, features = ["wgsl"] } diff --git a/benches/README.md b/benches/README.md index 3f20cbba7d..55af5fe18e 100644 --- a/benches/README.md +++ b/benches/README.md @@ -24,6 +24,21 @@ By default it measures 10k draw calls, with 90k total resources. Within this benchmark, both single threaded and multi-threaded recording are tested, as well as splitting the render pass into multiple passes over multiple command buffers. +If available, it also tests a bindless approach, binding all textures at once instead of switching +the bind group for every draw call. + +#### `Computepass` + +This benchmark measures the performance of recording and submitting a compute pass with a large +number of dispatches and resources. +By default it measures 10k dispatch calls, with 60k total resources, emulating an unusually complex and sequential compute workload. + +Within this benchmark, both single threaded and multi-threaded recording are tested, as well as splitting +the compute pass into multiple passes over multiple command buffers. +If available, it also tests a bindless approach, binding all resources at once instead of switching +the bind group for every draw call. +TODO(https://github.com/gfx-rs/wgpu/issues/5766): The bindless version uses only 1k dispatches with 6k resources since it would be too slow for a reasonable benchmarking time otherwise. + #### `Resource Creation` diff --git a/benches/benches/computepass-bindless.wgsl b/benches/benches/computepass-bindless.wgsl new file mode 100644 index 0000000000..402ff94489 --- /dev/null +++ b/benches/benches/computepass-bindless.wgsl @@ -0,0 +1,26 @@ +@group(0) @binding(0) +var tex: binding_array>; + +@group(0) @binding(1) +// TODO(https://github.com/gfx-rs/wgpu/issues/5765): The extra whitespace between the angle brackets is needed to workaround a parsing bug. +var images: binding_array >; +struct BufferElement { + element: vec4f, +} + +@group(0) @binding(2) +var buffers: binding_array; + +@compute +@workgroup_size(16) +fn cs_main(@builtin(global_invocation_id) global_invocation_id: vec3) { + let offset = global_invocation_id.x; // Would be nice to offset this dynamically (it's just 0 always in the current setup) + + let idx0 = offset * 2 + 0; + let idx1 = offset * 2 + 1; + + let tex = textureLoad(tex[idx0], vec2u(0), 0) + textureLoad(tex[idx0], vec2u(0), 0); + let image = textureLoad(images[idx0], vec2u(0)) + textureLoad(images[idx1], vec2u(0)); + buffers[idx0].element = tex.rrrr; + buffers[idx1].element = image.rrrr; +} \ No newline at end of file diff --git a/benches/benches/computepass.rs b/benches/benches/computepass.rs new file mode 100644 index 0000000000..6ddbf55620 --- /dev/null +++ b/benches/benches/computepass.rs @@ -0,0 +1,574 @@ +use std::{ + num::{NonZeroU32, NonZeroU64}, + time::{Duration, Instant}, +}; + +use criterion::{criterion_group, Criterion, Throughput}; +use nanorand::{Rng, WyRand}; +use once_cell::sync::Lazy; +use rayon::iter::{IntoParallelIterator, ParallelIterator}; + +use crate::DeviceState; + +#[cfg(not(test))] +const DISPATCH_COUNT: usize = 10_000; +#[cfg(test)] +const DISPATCH_COUNT: usize = 8; // Running with up to 8 threads. + +// Currently bindless is _much_ slower than with regularly resources, +// since wgpu needs to issues barriers for all resources between each dispatch for all read/write textures & buffers. +// This is in fact so slow that it makes the benchmark unusable when we use the same amount of +// resources as the regular benchmark. +// For details see https://github.com/gfx-rs/wgpu/issues/5766 +#[cfg(not(test))] +const DISPATCH_COUNT_BINDLESS: usize = 1_000; +#[cfg(test)] +const DISPATCH_COUNT_BINDLESS: usize = 8; // Running with up to 8 threads. + +// Must match the number of textures in the computepass.wgsl shader +const TEXTURES_PER_DISPATCH: usize = 2; +const STORAGE_TEXTURES_PER_DISPATCH: usize = 2; +const STORAGE_BUFFERS_PER_DISPATCH: usize = 2; + +const TEXTURE_COUNT: usize = DISPATCH_COUNT * TEXTURES_PER_DISPATCH; +const STORAGE_TEXTURE_COUNT: usize = DISPATCH_COUNT * STORAGE_TEXTURES_PER_DISPATCH; +const STORAGE_BUFFER_COUNT: usize = DISPATCH_COUNT * STORAGE_BUFFERS_PER_DISPATCH; + +const BUFFER_SIZE: u64 = 16; + +struct ComputepassState { + device_state: DeviceState, + pipeline: wgpu::ComputePipeline, + bind_groups: Vec, + + // Bindless resources + bindless_bind_group: Option, + bindless_pipeline: Option, +} + +impl ComputepassState { + /// Create and prepare all the resources needed for the computepass benchmark. + fn new() -> Self { + let device_state = DeviceState::new(); + + let supports_bindless = device_state.device.features().contains( + wgpu::Features::BUFFER_BINDING_ARRAY + | wgpu::Features::TEXTURE_BINDING_ARRAY + | wgpu::Features::STORAGE_RESOURCE_BINDING_ARRAY + | wgpu::Features::SAMPLED_TEXTURE_AND_STORAGE_BUFFER_ARRAY_NON_UNIFORM_INDEXING, + ) + // TODO: as of writing llvmpipe segfaults the bindless benchmark on ci + && device_state.adapter_info.driver != "llvmpipe"; + + // Performance gets considerably worse if the resources are shuffled. + // + // This more closely matches the real-world use case where resources have no + // well defined usage order. + let mut random = WyRand::new_seed(0x8BADF00D); + + let mut bind_group_layout_entries = Vec::with_capacity(TEXTURES_PER_DISPATCH); + for i in 0..TEXTURES_PER_DISPATCH { + bind_group_layout_entries.push(wgpu::BindGroupLayoutEntry { + binding: i as u32, + visibility: wgpu::ShaderStages::COMPUTE, + ty: wgpu::BindingType::Texture { + sample_type: wgpu::TextureSampleType::Float { filterable: true }, + view_dimension: wgpu::TextureViewDimension::D2, + multisampled: false, + }, + count: None, + }); + } + for i in 0..STORAGE_TEXTURES_PER_DISPATCH { + bind_group_layout_entries.push(wgpu::BindGroupLayoutEntry { + binding: (TEXTURES_PER_DISPATCH + i) as u32, + visibility: wgpu::ShaderStages::COMPUTE, + ty: wgpu::BindingType::StorageTexture { + access: wgpu::StorageTextureAccess::ReadWrite, + format: wgpu::TextureFormat::R32Float, + view_dimension: wgpu::TextureViewDimension::D2, + }, + count: None, + }); + } + for i in 0..STORAGE_BUFFERS_PER_DISPATCH { + bind_group_layout_entries.push(wgpu::BindGroupLayoutEntry { + binding: (TEXTURES_PER_DISPATCH + STORAGE_BUFFERS_PER_DISPATCH + i) as u32, + visibility: wgpu::ShaderStages::COMPUTE, + ty: wgpu::BindingType::Buffer { + ty: wgpu::BufferBindingType::Storage { read_only: false }, + has_dynamic_offset: false, + min_binding_size: NonZeroU64::new(BUFFER_SIZE), + }, + count: None, + }); + } + + let bind_group_layout = + device_state + .device + .create_bind_group_layout(&wgpu::BindGroupLayoutDescriptor { + label: None, + entries: &bind_group_layout_entries, + }); + + let mut texture_views = Vec::with_capacity(TEXTURE_COUNT); + for i in 0..TEXTURE_COUNT { + let texture = device_state + .device + .create_texture(&wgpu::TextureDescriptor { + label: Some(&format!("Texture {i}")), + size: wgpu::Extent3d { + width: 1, + height: 1, + depth_or_array_layers: 1, + }, + mip_level_count: 1, + sample_count: 1, + dimension: wgpu::TextureDimension::D2, + format: wgpu::TextureFormat::Rgba8UnormSrgb, + usage: wgpu::TextureUsages::TEXTURE_BINDING, + view_formats: &[], + }); + texture_views.push(texture.create_view(&wgpu::TextureViewDescriptor { + label: Some(&format!("Texture View {i}")), + ..Default::default() + })); + } + random.shuffle(&mut texture_views); + let texture_view_refs: Vec<_> = texture_views.iter().collect(); + + let mut storage_texture_views = Vec::with_capacity(STORAGE_TEXTURE_COUNT); + for i in 0..TEXTURE_COUNT { + let texture = device_state + .device + .create_texture(&wgpu::TextureDescriptor { + label: Some(&format!("StorageTexture {i}")), + size: wgpu::Extent3d { + width: 1, + height: 1, + depth_or_array_layers: 1, + }, + mip_level_count: 1, + sample_count: 1, + dimension: wgpu::TextureDimension::D2, + format: wgpu::TextureFormat::R32Float, + usage: wgpu::TextureUsages::STORAGE_BINDING, + view_formats: &[], + }); + storage_texture_views.push(texture.create_view(&wgpu::TextureViewDescriptor { + label: Some(&format!("StorageTexture View {i}")), + ..Default::default() + })); + } + random.shuffle(&mut storage_texture_views); + let storage_texture_view_refs: Vec<_> = storage_texture_views.iter().collect(); + + let mut storage_buffers = Vec::with_capacity(STORAGE_BUFFER_COUNT); + for i in 0..STORAGE_BUFFER_COUNT { + storage_buffers.push(device_state.device.create_buffer(&wgpu::BufferDescriptor { + label: Some(&format!("Buffer {i}")), + size: BUFFER_SIZE, + usage: wgpu::BufferUsages::STORAGE, + mapped_at_creation: false, + })); + } + random.shuffle(&mut storage_buffers); + let storage_buffer_bindings: Vec<_> = storage_buffers + .iter() + .map(|b| b.as_entire_buffer_binding()) + .collect(); + + let mut bind_groups = Vec::with_capacity(DISPATCH_COUNT); + for dispatch_idx in 0..DISPATCH_COUNT { + let mut entries = Vec::with_capacity(TEXTURES_PER_DISPATCH); + for tex_idx in 0..TEXTURES_PER_DISPATCH { + entries.push(wgpu::BindGroupEntry { + binding: tex_idx as u32, + resource: wgpu::BindingResource::TextureView( + &texture_views[dispatch_idx * TEXTURES_PER_DISPATCH + tex_idx], + ), + }); + } + for tex_idx in 0..STORAGE_TEXTURES_PER_DISPATCH { + entries.push(wgpu::BindGroupEntry { + binding: (TEXTURES_PER_DISPATCH + tex_idx) as u32, + resource: wgpu::BindingResource::TextureView( + &storage_texture_views + [dispatch_idx * STORAGE_TEXTURES_PER_DISPATCH + tex_idx], + ), + }); + } + for buffer_idx in 0..STORAGE_BUFFERS_PER_DISPATCH { + entries.push(wgpu::BindGroupEntry { + binding: (TEXTURES_PER_DISPATCH + STORAGE_BUFFERS_PER_DISPATCH + buffer_idx) + as u32, + resource: wgpu::BindingResource::Buffer( + storage_buffers[dispatch_idx * STORAGE_BUFFERS_PER_DISPATCH + buffer_idx] + .as_entire_buffer_binding(), + ), + }); + } + + bind_groups.push( + device_state + .device + .create_bind_group(&wgpu::BindGroupDescriptor { + label: None, + layout: &bind_group_layout, + entries: &entries, + }), + ); + } + random.shuffle(&mut bind_groups); + + let sm = device_state + .device + .create_shader_module(wgpu::include_wgsl!("computepass.wgsl")); + + let pipeline_layout = + device_state + .device + .create_pipeline_layout(&wgpu::PipelineLayoutDescriptor { + label: None, + bind_group_layouts: &[&bind_group_layout], + push_constant_ranges: &[], + }); + + let pipeline = + device_state + .device + .create_compute_pipeline(&wgpu::ComputePipelineDescriptor { + label: Some("Compute Pipeline"), + layout: Some(&pipeline_layout), + module: &sm, + entry_point: "cs_main", + compilation_options: wgpu::PipelineCompilationOptions::default(), + cache: None, + }); + + let (bindless_bind_group, bindless_pipeline) = if supports_bindless { + let bindless_bind_group_layout = + device_state + .device + .create_bind_group_layout(&wgpu::BindGroupLayoutDescriptor { + label: None, + entries: &[ + wgpu::BindGroupLayoutEntry { + binding: 0, + visibility: wgpu::ShaderStages::COMPUTE, + ty: wgpu::BindingType::Texture { + sample_type: wgpu::TextureSampleType::Float { + filterable: true, + }, + view_dimension: wgpu::TextureViewDimension::D2, + multisampled: false, + }, + count: Some(NonZeroU32::new(TEXTURE_COUNT as u32).unwrap()), + }, + wgpu::BindGroupLayoutEntry { + binding: 1, + visibility: wgpu::ShaderStages::COMPUTE, + ty: wgpu::BindingType::StorageTexture { + access: wgpu::StorageTextureAccess::ReadWrite, + format: wgpu::TextureFormat::R32Float, + view_dimension: wgpu::TextureViewDimension::D2, + }, + count: Some(NonZeroU32::new(STORAGE_TEXTURE_COUNT as u32).unwrap()), + }, + wgpu::BindGroupLayoutEntry { + binding: 2, + visibility: wgpu::ShaderStages::COMPUTE, + ty: wgpu::BindingType::Buffer { + ty: wgpu::BufferBindingType::Storage { read_only: false }, + has_dynamic_offset: false, + min_binding_size: std::num::NonZeroU64::new(BUFFER_SIZE), + }, + count: Some(NonZeroU32::new(STORAGE_BUFFER_COUNT as u32).unwrap()), + }, + ], + }); + + let bindless_bind_group = + device_state + .device + .create_bind_group(&wgpu::BindGroupDescriptor { + label: None, + layout: &bindless_bind_group_layout, + entries: &[ + wgpu::BindGroupEntry { + binding: 0, + resource: wgpu::BindingResource::TextureViewArray( + &texture_view_refs[..DISPATCH_COUNT_BINDLESS], + ), + }, + wgpu::BindGroupEntry { + binding: 1, + resource: wgpu::BindingResource::TextureViewArray( + &storage_texture_view_refs[..DISPATCH_COUNT_BINDLESS], + ), + }, + wgpu::BindGroupEntry { + binding: 2, + resource: wgpu::BindingResource::BufferArray( + &storage_buffer_bindings[..DISPATCH_COUNT_BINDLESS], + ), + }, + ], + }); + + let bindless_sm = device_state + .device + .create_shader_module(wgpu::include_wgsl!("computepass-bindless.wgsl")); + + let bindless_pipeline_layout = + device_state + .device + .create_pipeline_layout(&wgpu::PipelineLayoutDescriptor { + label: None, + bind_group_layouts: &[&bindless_bind_group_layout], + push_constant_ranges: &[], + }); + + let bindless_pipeline = + device_state + .device + .create_compute_pipeline(&wgpu::ComputePipelineDescriptor { + label: Some("Compute Pipeline bindless"), + layout: Some(&bindless_pipeline_layout), + module: &bindless_sm, + entry_point: "cs_main", + compilation_options: wgpu::PipelineCompilationOptions::default(), + cache: None, + }); + + (Some(bindless_bind_group), Some(bindless_pipeline)) + } else { + (None, None) + }; + + Self { + device_state, + pipeline, + bind_groups, + + bindless_bind_group, + bindless_pipeline, + } + } + + fn run_subpass(&self, pass_number: usize, total_passes: usize) -> wgpu::CommandBuffer { + profiling::scope!("Computepass", &format!("Pass {pass_number}/{total_passes}")); + + let dispatch_per_pass = DISPATCH_COUNT / total_passes; + + let mut encoder = self + .device_state + .device + .create_command_encoder(&wgpu::CommandEncoderDescriptor { label: None }); + + let mut compute_pass = encoder.begin_compute_pass(&wgpu::ComputePassDescriptor { + label: None, + timestamp_writes: None, + }); + + let start_idx = pass_number * dispatch_per_pass; + let end_idx = start_idx + dispatch_per_pass; + for dispatch_idx in start_idx..end_idx { + compute_pass.set_pipeline(&self.pipeline); + compute_pass.set_bind_group(0, &self.bind_groups[dispatch_idx], &[]); + compute_pass.dispatch_workgroups(1, 1, 1); + } + + drop(compute_pass); + + encoder.finish() + } + + fn run_bindless_pass(&self) -> wgpu::CommandBuffer { + profiling::scope!("Bindless Computepass"); + + let mut encoder = self + .device_state + .device + .create_command_encoder(&wgpu::CommandEncoderDescriptor { label: None }); + + let mut compute_pass = encoder.begin_compute_pass(&wgpu::ComputePassDescriptor { + label: None, + timestamp_writes: None, + }); + + compute_pass.set_pipeline(self.bindless_pipeline.as_ref().unwrap()); + compute_pass.set_bind_group(0, self.bindless_bind_group.as_ref().unwrap(), &[]); + for _ in 0..DISPATCH_COUNT_BINDLESS { + compute_pass.dispatch_workgroups(1, 1, 1); + } + + drop(compute_pass); + + encoder.finish() + } +} + +fn run_bench(ctx: &mut Criterion) { + let state = Lazy::new(ComputepassState::new); + + // Test 10k dispatch calls split up into 1, 2, 4, and 8 computepasses + let mut group = ctx.benchmark_group("Computepass: Single Threaded"); + group.throughput(Throughput::Elements(DISPATCH_COUNT as _)); + + for time_submit in [false, true] { + for cpasses in [1, 2, 4, 8] { + let dispatch_per_pass = DISPATCH_COUNT / cpasses; + + let label = if time_submit { + "Submit Time" + } else { + "Computepass Time" + }; + + group.bench_function( + &format!("{cpasses} computepasses x {dispatch_per_pass} dispatches ({label})"), + |b| { + Lazy::force(&state); + + b.iter_custom(|iters| { + profiling::scope!("benchmark invocation"); + + let mut duration = Duration::ZERO; + + for _ in 0..iters { + profiling::scope!("benchmark iteration"); + + let mut start = Instant::now(); + + let mut buffers: Vec = Vec::with_capacity(cpasses); + for i in 0..cpasses { + buffers.push(state.run_subpass(i, cpasses)); + } + + if time_submit { + start = Instant::now(); + } else { + duration += start.elapsed(); + } + + state.device_state.queue.submit(buffers); + + if time_submit { + duration += start.elapsed(); + } + + state.device_state.device.poll(wgpu::Maintain::Wait); + } + + duration + }) + }, + ); + } + } + group.finish(); + + // Test 10k dispatch calls split up over 2, 4, and 8 threads. + let mut group = ctx.benchmark_group("Computepass: Multi Threaded"); + group.throughput(Throughput::Elements(DISPATCH_COUNT as _)); + + for threads in [2, 4, 8] { + let dispatch_per_pass = DISPATCH_COUNT / threads; + group.bench_function( + &format!("{threads} threads x {dispatch_per_pass} dispatch"), + |b| { + Lazy::force(&state); + + b.iter_custom(|iters| { + profiling::scope!("benchmark invocation"); + + // This benchmark hangs on Apple Paravirtualized GPUs. No idea why. + if state.device_state.adapter_info.name.contains("Paravirtual") { + return Duration::from_secs_f32(1.0); + } + + let mut duration = Duration::ZERO; + + for _ in 0..iters { + profiling::scope!("benchmark iteration"); + + let start = Instant::now(); + + let buffers = (0..threads) + .into_par_iter() + .map(|i| state.run_subpass(i, threads)) + .collect::>(); + + duration += start.elapsed(); + + state.device_state.queue.submit(buffers); + state.device_state.device.poll(wgpu::Maintain::Wait); + } + + duration + }) + }, + ); + } + group.finish(); + + // Test 10k dispatch calls split up over 1, 2, 4, and 8 threads. + let mut group = ctx.benchmark_group("Computepass: Bindless"); + group.throughput(Throughput::Elements(DISPATCH_COUNT_BINDLESS as _)); + + group.bench_function(&format!("{DISPATCH_COUNT_BINDLESS} dispatch"), |b| { + Lazy::force(&state); + + b.iter_custom(|iters| { + profiling::scope!("benchmark invocation"); + + // This benchmark hangs on Apple Paravirtualized GPUs. No idea why. + if state.device_state.adapter_info.name.contains("Paravirtual") { + return Duration::from_secs_f32(1.0); + } + + // Need bindless to run this benchmark + if state.bindless_bind_group.is_none() { + return Duration::from_secs_f32(1.0); + } + + let mut duration = Duration::ZERO; + + for _ in 0..iters { + profiling::scope!("benchmark iteration"); + + let start = Instant::now(); + + let buffer = state.run_bindless_pass(); + + duration += start.elapsed(); + + state.device_state.queue.submit([buffer]); + state.device_state.device.poll(wgpu::Maintain::Wait); + } + + duration + }) + }); + group.finish(); + + ctx.bench_function( + &format!( + "Computepass: Empty Submit with {} Resources", + TEXTURE_COUNT + STORAGE_TEXTURE_COUNT + STORAGE_BUFFER_COUNT + ), + |b| { + Lazy::force(&state); + + b.iter(|| state.device_state.queue.submit([])); + }, + ); +} + +criterion_group! { + name = computepass; + config = Criterion::default().measurement_time(Duration::from_secs(10)); + targets = run_bench, +} diff --git a/benches/benches/computepass.wgsl b/benches/benches/computepass.wgsl new file mode 100644 index 0000000000..83d7d49785 --- /dev/null +++ b/benches/benches/computepass.wgsl @@ -0,0 +1,26 @@ +@group(0) @binding(0) +var tex_0: texture_2d; + +@group(0) @binding(1) +var tex_1: texture_2d; + +@group(0) @binding(2) +var image_0: texture_storage_2d; + +@group(0) @binding(3) +var image_1: texture_storage_2d; + +@group(0) @binding(4) +var buffer0 : array; + +@group(0) @binding(5) +var buffer1 : array; + +@compute +@workgroup_size(16) +fn cs_main(@builtin(global_invocation_id) global_invocation_id: vec3) { + let tex = textureLoad(tex_0, vec2u(0), 0) + textureLoad(tex_1, vec2u(0), 0); + let image = textureLoad(image_0, vec2u(0)) + textureLoad(image_1, vec2u(0)); + buffer0[0] = tex.rrrr; + buffer1[0] = image.rrrr; +} diff --git a/benches/benches/renderpass.rs b/benches/benches/renderpass.rs index fcb35c3864..9a204c0f79 100644 --- a/benches/benches/renderpass.rs +++ b/benches/benches/renderpass.rs @@ -10,7 +10,12 @@ use rayon::iter::{IntoParallelIterator, ParallelIterator}; use crate::DeviceState; +#[cfg(test)] +const DRAW_COUNT: usize = 8; // Running with up to 8 threads. + +#[cfg(not(test))] const DRAW_COUNT: usize = 10_000; + // Must match the number of textures in the renderpass.wgsl shader const TEXTURES_PER_DRAW: usize = 7; const VERTEX_BUFFERS_PER_DRAW: usize = 2; diff --git a/benches/benches/root.rs b/benches/benches/root.rs index 6ef2efabc2..064617783d 100644 --- a/benches/benches/root.rs +++ b/benches/benches/root.rs @@ -1,6 +1,7 @@ use criterion::criterion_main; use pollster::block_on; +mod computepass; mod renderpass; mod resource_creation; mod shader; @@ -45,7 +46,7 @@ impl DeviceState { required_features: adapter.features(), required_limits: adapter.limits(), memory_hints: wgpu::MemoryHints::Performance, - label: Some("RenderPass Device"), + label: Some("Compute/RenderPass Device"), }, None, )) @@ -61,6 +62,7 @@ impl DeviceState { criterion_main!( renderpass::renderpass, + computepass::computepass, resource_creation::resource_creation, shader::shader ); diff --git a/wgpu-core/src/binding_model.rs b/wgpu-core/src/binding_model.rs index 729618995d..91952a8f8a 100644 --- a/wgpu-core/src/binding_model.rs +++ b/wgpu-core/src/binding_model.rs @@ -66,7 +66,7 @@ pub enum CreateBindGroupLayoutError { }, #[error(transparent)] TooManyBindings(BindingTypeMaxCountError), - #[error("Binding index {binding} is greater than the maximum index {maximum}")] + #[error("Binding index {binding} is greater than the maximum number {maximum}")] InvalidBindingIndex { binding: u32, maximum: u32 }, #[error("Invalid visibility {0:?}")] InvalidVisibility(wgt::ShaderStages),