From 2f9008f182cb7cb8ef27577feefcc5c722ff6878 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Alexander=20Mei=C3=9Fner?= Date: Thu, 12 Oct 2023 01:02:08 +0200 Subject: [PATCH] Adds subgroup_operations tests. --- tests/tests/gpu.rs | 1 + tests/tests/subgroup_operations/mod.rs | 103 ++++++++++++++++++ tests/tests/subgroup_operations/shader.wgsl | 109 ++++++++++++++++++++ 3 files changed, 213 insertions(+) create mode 100644 tests/tests/subgroup_operations/mod.rs create mode 100644 tests/tests/subgroup_operations/shader.wgsl diff --git a/tests/tests/gpu.rs b/tests/tests/gpu.rs index a5fbcde9da9..1494d0f128a 100644 --- a/tests/tests/gpu.rs +++ b/tests/tests/gpu.rs @@ -27,6 +27,7 @@ mod scissor_tests; mod shader; mod shader_primitive_index; mod shader_view_format; +mod subgroup_operations; mod texture_bounds; mod transfer; mod vertex_indices; diff --git a/tests/tests/subgroup_operations/mod.rs b/tests/tests/subgroup_operations/mod.rs new file mode 100644 index 00000000000..6e2ee72304c --- /dev/null +++ b/tests/tests/subgroup_operations/mod.rs @@ -0,0 +1,103 @@ +use std::{borrow::Cow, num::NonZeroU64}; + +use wgpu_test::{gpu_test, GpuTestConfiguration, TestParameters}; + +const THREAD_COUNT: u64 = 128; + +#[gpu_test] +static SUBGROUP_OPERATIONS: GpuTestConfiguration = GpuTestConfiguration::new() + .parameters( + TestParameters::default() + .features(wgpu::Features::SUBGROUP_OPERATIONS) + .limits(wgpu::Limits::downlevel_defaults()), + ) + .run_sync(|ctx| { + let device = &ctx.device; + + let storage_buffer = device.create_buffer(&wgpu::BufferDescriptor { + label: None, + size: THREAD_COUNT * std::mem::size_of::() as u64, + usage: wgpu::BufferUsages::STORAGE + | wgpu::BufferUsages::COPY_DST + | wgpu::BufferUsages::COPY_SRC, + mapped_at_creation: false, + }); + + let bind_group_layout = device.create_bind_group_layout(&wgpu::BindGroupLayoutDescriptor { + label: Some("bind group layout"), + entries: &[wgpu::BindGroupLayoutEntry { + binding: 0, + visibility: wgpu::ShaderStages::COMPUTE, + ty: wgpu::BindingType::Buffer { + ty: wgpu::BufferBindingType::Storage { read_only: false }, + has_dynamic_offset: false, + min_binding_size: NonZeroU64::new( + THREAD_COUNT * std::mem::size_of::() as u64, + ), + }, + count: None, + }], + }); + + let cs_module = device.create_shader_module(wgpu::ShaderModuleDescriptor { + label: None, + source: wgpu::ShaderSource::Wgsl(Cow::Borrowed(include_str!("shader.wgsl"))), + }); + + let pipeline_layout = device.create_pipeline_layout(&wgpu::PipelineLayoutDescriptor { + label: Some("main"), + bind_group_layouts: &[&bind_group_layout], + push_constant_ranges: &[], + }); + + let compute_pipeline = device.create_compute_pipeline(&wgpu::ComputePipelineDescriptor { + label: None, + layout: Some(&pipeline_layout), + module: &cs_module, + entry_point: "main", + }); + + let bind_group = device.create_bind_group(&wgpu::BindGroupDescriptor { + entries: &[wgpu::BindGroupEntry { + binding: 0, + resource: storage_buffer.as_entire_binding(), + }], + layout: &bind_group_layout, + label: Some("bind group"), + }); + + let mut encoder = + device.create_command_encoder(&wgpu::CommandEncoderDescriptor { label: None }); + { + let mut cpass = encoder.begin_compute_pass(&wgpu::ComputePassDescriptor { + label: None, + timestamp_writes: None, + }); + cpass.set_pipeline(&compute_pipeline); + cpass.set_bind_group(0, &bind_group, &[]); + cpass.dispatch_workgroups(THREAD_COUNT as u32, 1, 1); + } + + let mapping_buffer = ctx.device.create_buffer(&wgpu::BufferDescriptor { + label: Some("Mapping buffer"), + size: THREAD_COUNT * std::mem::size_of::() as u64, + usage: wgpu::BufferUsages::MAP_READ | wgpu::BufferUsages::COPY_DST, + mapped_at_creation: false, + }); + encoder.copy_buffer_to_buffer( + &storage_buffer, + 0, + &mapping_buffer, + 0, + THREAD_COUNT * std::mem::size_of::() as u64, + ); + ctx.queue.submit(Some(encoder.finish())); + + mapping_buffer + .slice(..) + .map_async(wgpu::MapMode::Read, |_| ()); + ctx.device.poll(wgpu::Maintain::Wait); + let mapping_buffer_view = mapping_buffer.slice(..).get_mapped_range(); + let result: &[u32; THREAD_COUNT as usize] = bytemuck::from_bytes(&mapping_buffer_view); + assert_eq!(result, &[27; THREAD_COUNT as usize]); + }); diff --git a/tests/tests/subgroup_operations/shader.wgsl b/tests/tests/subgroup_operations/shader.wgsl new file mode 100644 index 00000000000..70d98dacd49 --- /dev/null +++ b/tests/tests/subgroup_operations/shader.wgsl @@ -0,0 +1,109 @@ +@group(0) +@binding(0) +var storage_buffer: array; + +@compute +@workgroup_size(128) +fn main( + @builtin(global_invocation_id) global_id: vec3, + @builtin(num_subgroups) num_subgroups: u32, + @builtin(subgroup_id) subgroup_id: u32, + @builtin(subgroup_size) subgroup_size: u32, + @builtin(subgroup_invocation_id) subgroup_invocation_id: u32, +) { + var passed = 0u; + var expected: u32; + + passed += u32(num_subgroups == 128u / subgroup_size); + passed += u32(subgroup_id == global_id.x / subgroup_size); + passed += u32(subgroup_invocation_id == global_id.x % subgroup_size); + + var expected_ballot = vec4(0u); + for(var i = 0u; i < subgroup_size; i += 1u) { + expected_ballot[i / 32u] |= ((global_id.x - subgroup_invocation_id + i) & 1u) << (i % 32u); + } + passed += u32(dot(vec4(1u), vec4(subgroupBallot((subgroup_invocation_id & 1u) == 1u) == expected_ballot)) == 4u); + + passed += u32(subgroupAll(true)); + passed += u32(!subgroupAll(subgroup_invocation_id != 0u)); + + passed += u32(subgroupAny(subgroup_invocation_id == 0u)); + passed += u32(!subgroupAny(false)); + + expected = 0u; + for(var i = 0u; i < subgroup_size; i += 1u) { + expected += global_id.x - subgroup_invocation_id + i + 1u; + } + passed += u32(subgroupAdd(global_id.x + 1u) == expected); + + expected = 1u; + for(var i = 0u; i < subgroup_size; i += 1u) { + expected *= global_id.x - subgroup_invocation_id + i + 1u; + } + passed += u32(subgroupMul(global_id.x + 1u) == expected); + + expected = 0u; + for(var i = 0u; i < subgroup_size; i += 1u) { + expected = max(expected, global_id.x - subgroup_invocation_id + i + 1u); + } + passed += u32(subgroupMax(global_id.x + 1u) == expected); + + expected = 0xFFFFFFFFu; + for(var i = 0u; i < subgroup_size; i += 1u) { + expected = min(expected, global_id.x - subgroup_invocation_id + i + 1u); + } + passed += u32(subgroupMin(global_id.x + 1u) == expected); + + expected = 0xFFFFFFFFu; + for(var i = 0u; i < subgroup_size; i += 1u) { + expected &= global_id.x - subgroup_invocation_id + i + 1u; + } + passed += u32(subgroupAnd(global_id.x + 1u) == expected); + + expected = 0u; + for(var i = 0u; i < subgroup_size; i += 1u) { + expected |= global_id.x - subgroup_invocation_id + i + 1u; + } + passed += u32(subgroupOr(global_id.x + 1u) == expected); + + expected = 0u; + for(var i = 0u; i < subgroup_size; i += 1u) { + expected ^= global_id.x - subgroup_invocation_id + i + 1u; + } + passed += u32(subgroupXor(global_id.x + 1u) == expected); + + expected = 0u; + for(var i = 0u; i < subgroup_invocation_id; i += 1u) { + expected += global_id.x - subgroup_invocation_id + i + 1u; + } + passed += u32(subgroupPrefixExclusiveAdd(global_id.x + 1u) == expected); + + expected = 1u; + for(var i = 0u; i < subgroup_invocation_id; i += 1u) { + expected *= global_id.x - subgroup_invocation_id + i + 1u; + } + passed += u32(subgroupPrefixExclusiveMul(global_id.x + 1u) == expected); + + expected = 0u; + for(var i = 0u; i <= subgroup_invocation_id; i += 1u) { + expected += global_id.x - subgroup_invocation_id + i + 1u; + } + passed += u32(subgroupPrefixInclusiveAdd(global_id.x + 1u) == expected); + + expected = 1u; + for(var i = 0u; i <= subgroup_invocation_id; i += 1u) { + expected *= global_id.x - subgroup_invocation_id + i + 1u; + } + passed += u32(subgroupPrefixInclusiveMul(global_id.x + 1u) == expected); + + passed += u32(subgroupBroadcastFirst(u32(subgroup_invocation_id != 0u)) == 0u); + passed += u32(subgroupBroadcastFirst(u32(subgroup_invocation_id == 0u)) == 1u); + passed += u32(subgroupBroadcast(subgroup_invocation_id, 4u) == 4u); + passed += u32(subgroupShuffle(subgroup_invocation_id, subgroup_invocation_id) == subgroup_invocation_id); + passed += u32(subgroupShuffle(subgroup_invocation_id, subgroup_size - 1u - subgroup_invocation_id) == subgroup_size - 1u - subgroup_invocation_id); + passed += u32(subgroup_invocation_id == subgroup_size - 1u || subgroupShuffleDown(subgroup_invocation_id, 1u) == subgroup_invocation_id + 1u); + passed += u32(subgroup_invocation_id == 0u || subgroupShuffleUp(subgroup_invocation_id, 1u) == subgroup_invocation_id - 1u); + passed += u32(subgroupShuffleXor(subgroup_invocation_id, subgroup_size - 1u) == (subgroup_invocation_id ^ (subgroup_size - 1u))); + + storage_buffer[global_id.x] = passed; +}