Turn on GPU culling automatically if the target platform supports it.

This commit removes the undocumented `GpuCulling` component in favor of automatically turning GPU culling on when the platform supports it. The main reason to make GPU culling automatic is that GPU culling enables indirect mode. Indirect mode is needed for multidraw (#16427), because non-indirect multidraw doesn't exist in `wgpu`. Since multidraw is such a win for performance, when that feature is supported the small performance tax that indirect mode incurs is virtually always worth paying. CPU culling is always used in addition to GPU culling unless the `NoCpuCulling` component is placed on the camera. This results in some amount of redundant computation on the GPU, but the overhead is negligible. I figured that the GPU time savings gained from skipping this computation when not needed didn't outweigh the costs of the added complexity that would be necessarily to implement that optimization, especially with GPU two-phase occlusion culling on the horizon.
bevyengine · Dec 5, 2024 · 481a752 · 481a752
1 parent 0070514
commit 481a752
Show file tree

Hide file tree

Showing 6 changed files with 129 additions and 147 deletions.
diff --git a/crates/bevy_pbr/src/render/gpu_preprocess.rs b/crates/bevy_pbr/src/render/gpu_preprocess.rs
@@ -13,7 +13,7 @@ use bevy_asset::{load_internal_asset, Handle};
 use bevy_ecs::{
     component::Component,
     entity::Entity,
-    query::{Has, QueryState, Without},
+    query::{QueryState, Without},
     schedule::{common_conditions::resource_exists, IntoSystemConfigs as _},
     system::{lifetimeless::Read, Commands, Res, ResMut, Resource},
     world::{FromWorld, World},
@@ -33,7 +33,7 @@ use bevy_render::{
         SpecializedComputePipeline, SpecializedComputePipelines,
     },
     renderer::{RenderContext, RenderDevice, RenderQueue},
-    view::{GpuCulling, ViewUniform, ViewUniformOffset, ViewUniforms},
+    view::{ViewUniform, ViewUniformOffset, ViewUniforms},
     Render, RenderApp, RenderSet,
 };
 use bevy_utils::tracing::warn;
@@ -66,12 +66,7 @@ pub struct GpuMeshPreprocessPlugin {
 /// The render node for the mesh uniform building pass.
 pub struct GpuPreprocessNode {
     view_query: QueryState<
-        (
-            Entity,
-            Read<PreprocessBindGroup>,
-            Read<ViewUniformOffset>,
-            Has<GpuCulling>,
-        ),
+        (Entity, Read<PreprocessBindGroup>, Read<ViewUniformOffset>),
         Without<SkipGpuPreprocess>,
     >,
 }
@@ -192,6 +187,7 @@ impl Node for GpuPreprocessNode {
             ..
         } = world.resource::<BatchedInstanceBuffers<MeshUniform, MeshInputUniform>>();
 
+        let gpu_preprocessing_support = world.resource::<GpuPreprocessingSupport>();
         let pipeline_cache = world.resource::<PipelineCache>();
         let preprocess_pipelines = world.resource::<PreprocessPipelines>();
 
@@ -204,9 +200,7 @@ impl Node for GpuPreprocessNode {
                 });
 
         // Run the compute passes.
-        for (view, bind_group, view_uniform_offset, gpu_culling) in
-            self.view_query.iter_manual(world)
-        {
+        for (view, bind_group, view_uniform_offset) in self.view_query.iter_manual(world) {
             // Grab the index buffer for this view.
             let Some(index_buffer) = index_buffers.get(&view) else {
                 warn!("The preprocessing index buffer wasn't present");
@@ -215,10 +209,11 @@ impl Node for GpuPreprocessNode {
 
             // Select the right pipeline, depending on whether GPU culling is in
             // use.
-            let maybe_pipeline_id = if gpu_culling {
-                preprocess_pipelines.gpu_culling.pipeline_id
-            } else {
-                preprocess_pipelines.direct.pipeline_id
+            let maybe_pipeline_id = match *gpu_preprocessing_support {
+                GpuPreprocessingSupport::Culling => preprocess_pipelines.gpu_culling.pipeline_id,
+                GpuPreprocessingSupport::None | GpuPreprocessingSupport::PreprocessingOnly => {
+                    preprocess_pipelines.direct.pipeline_id
+                }
             };
 
             // Fetch the pipeline.
@@ -237,7 +232,7 @@ impl Node for GpuPreprocessNode {
             compute_pass.set_pipeline(preprocess_pipeline);
 
             let mut dynamic_offsets: SmallVec<[u32; 1]> = smallvec![];
-            if gpu_culling {
+            if matches!(*gpu_preprocessing_support, GpuPreprocessingSupport::Culling) {
                 dynamic_offsets.push(view_uniform_offset.offset);
             }
             compute_pass.set_bind_group(0, &bind_group.0, &dynamic_offsets);
@@ -386,6 +381,7 @@ impl PreprocessPipeline {
 
 /// A system that attaches the mesh uniform buffers to the bind groups for the
 /// variants of the mesh preprocessing compute shader.
+#[allow(clippy::too_many_arguments)]
 pub fn prepare_preprocess_bind_groups(
     mut commands: Commands,
     render_device: Res<RenderDevice>,
@@ -394,6 +390,7 @@ pub fn prepare_preprocess_bind_groups(
     mesh_culling_data_buffer: Res<MeshCullingDataBuffer>,
     view_uniforms: Res<ViewUniforms>,
     pipelines: Res<PreprocessPipelines>,
+    gpu_preprocessing_support: Res<GpuPreprocessingSupport>,
 ) {
     // Grab the `BatchedInstanceBuffers`.
     let BatchedInstanceBuffers {
@@ -424,52 +421,56 @@ pub fn prepare_preprocess_bind_groups(
         )
         .ok();
 
-        let bind_group = if index_buffer_vec.gpu_culling {
-            let (
-                Some(indirect_parameters_buffer),
-                Some(mesh_culling_data_buffer),
-                Some(view_uniforms_binding),
-            ) = (
-                indirect_parameters_buffer.buffer(),
-                mesh_culling_data_buffer.buffer(),
-                view_uniforms.uniforms.binding(),
-            )
-            else {
-                continue;
-            };
+        let bind_group = match *gpu_preprocessing_support {
+            GpuPreprocessingSupport::Culling => {
+                let (
+                    Some(indirect_parameters_buffer),
+                    Some(mesh_culling_data_buffer),
+                    Some(view_uniforms_binding),
+                ) = (
+                    indirect_parameters_buffer.buffer(),
+                    mesh_culling_data_buffer.buffer(),
+                    view_uniforms.uniforms.binding(),
+                )
+                else {
+                    continue;
+                };
+
+                PreprocessBindGroup(render_device.create_bind_group(
+                    "preprocess_gpu_culling_bind_group",
+                    &pipelines.gpu_culling.bind_group_layout,
+                    &BindGroupEntries::sequential((
+                        current_input_buffer.as_entire_binding(),
+                        previous_input_buffer.as_entire_binding(),
+                        BindingResource::Buffer(BufferBinding {
+                            buffer: index_buffer,
+                            offset: 0,
+                            size: index_buffer_size,
+                        }),
+                        data_buffer.as_entire_binding(),
+                        indirect_parameters_buffer.as_entire_binding(),
+                        mesh_culling_data_buffer.as_entire_binding(),
+                        view_uniforms_binding,
+                    )),
+                ))
+            }
 
-            PreprocessBindGroup(render_device.create_bind_group(
-                "preprocess_gpu_culling_bind_group",
-                &pipelines.gpu_culling.bind_group_layout,
-                &BindGroupEntries::sequential((
-                    current_input_buffer.as_entire_binding(),
-                    previous_input_buffer.as_entire_binding(),
-                    BindingResource::Buffer(BufferBinding {
-                        buffer: index_buffer,
-                        offset: 0,
-                        size: index_buffer_size,
-                    }),
-                    data_buffer.as_entire_binding(),
-                    indirect_parameters_buffer.as_entire_binding(),
-                    mesh_culling_data_buffer.as_entire_binding(),
-                    view_uniforms_binding,
-                )),
-            ))
-        } else {
-            PreprocessBindGroup(render_device.create_bind_group(
-                "preprocess_direct_bind_group",
-                &pipelines.direct.bind_group_layout,
-                &BindGroupEntries::sequential((
-                    current_input_buffer.as_entire_binding(),
-                    previous_input_buffer.as_entire_binding(),
-                    BindingResource::Buffer(BufferBinding {
-                        buffer: index_buffer,
-                        offset: 0,
-                        size: index_buffer_size,
-                    }),
-                    data_buffer.as_entire_binding(),
-                )),
-            ))
+            GpuPreprocessingSupport::None | GpuPreprocessingSupport::PreprocessingOnly => {
+                PreprocessBindGroup(render_device.create_bind_group(
+                    "preprocess_direct_bind_group",
+                    &pipelines.direct.bind_group_layout,
+                    &BindGroupEntries::sequential((
+                        current_input_buffer.as_entire_binding(),
+                        previous_input_buffer.as_entire_binding(),
+                        BindingResource::Buffer(BufferBinding {
+                            buffer: index_buffer,
+                            offset: 0,
+                            size: index_buffer_size,
+                        }),
+                        data_buffer.as_entire_binding(),
+                    )),
+                ))
+            }
         };
 
         commands.entity(*view).insert(bind_group);

diff --git a/crates/bevy_pbr/src/render/mesh.rs b/crates/bevy_pbr/src/render/mesh.rs
@@ -25,7 +25,6 @@ use bevy_render::{
         },
         no_gpu_preprocessing, GetBatchData, GetFullBatchData, NoAutomaticBatching,
     },
-    camera::Camera,
     mesh::*,
     primitives::Aabb,
     render_asset::{ExtractAssetsSet, RenderAssets},
@@ -37,7 +36,7 @@ use bevy_render::{
     renderer::{RenderDevice, RenderQueue},
     texture::DefaultImageSampler,
     view::{
-        prepare_view_targets, GpuCulling, RenderVisibilityRanges, ViewTarget, ViewUniformOffset,
+        prepare_view_targets, RenderVisibilityRanges, ViewTarget, ViewUniformOffset,
         ViewVisibility, VisibilityRange,
     },
     Extract,
@@ -1190,6 +1189,7 @@ pub fn extract_meshes_for_gpu_building(
     mut render_mesh_instances: ResMut<RenderMeshInstances>,
     render_visibility_ranges: Res<RenderVisibilityRanges>,
     mesh_material_ids: Res<RenderMeshMaterialIds>,
+    gpu_preprocessing_support: Res<GpuPreprocessingSupport>,
     mut render_mesh_instance_queues: ResMut<RenderMeshInstanceGpuQueues>,
     changed_meshes_query: Extract<
         Query<
@@ -1225,15 +1225,13 @@ pub fn extract_meshes_for_gpu_building(
     mut removed_visibilities_query: Extract<RemovedComponents<ViewVisibility>>,
     mut removed_global_transforms_query: Extract<RemovedComponents<GlobalTransform>>,
     mut removed_meshes_query: Extract<RemovedComponents<Mesh3d>>,
-    cameras_query: Extract<Query<(), (With<Camera>, With<GpuCulling>)>>,
 ) {
-    let any_gpu_culling = !cameras_query.is_empty();
+    let gpu_culling = matches!(*gpu_preprocessing_support, GpuPreprocessingSupport::Culling);
     for render_mesh_instance_queue in render_mesh_instance_queues.iter_mut() {
-        render_mesh_instance_queue.init(any_gpu_culling);
+        render_mesh_instance_queue.init(gpu_culling);
     }
 
     // Collect render mesh instances. Build up the uniform buffer.
-
     let RenderMeshInstances::GpuBuilding(ref mut render_mesh_instances) = *render_mesh_instances
     else {
         panic!(
@@ -1262,7 +1260,7 @@ pub fn extract_meshes_for_gpu_building(
             visibility_range,
         )| {
             if !view_visibility.get() {
-                queue.remove(entity.into(), any_gpu_culling);
+                queue.remove(entity.into(), gpu_culling);
                 return;
             }
 
@@ -1291,7 +1289,7 @@ pub fn extract_meshes_for_gpu_building(
 
             let lightmap_uv_rect = pack_lightmap_uv_rect(lightmap.map(|lightmap| lightmap.uv_rect));
 
-            let gpu_mesh_culling_data = any_gpu_culling.then(|| MeshCullingData::new(aabb));
+            let gpu_mesh_culling_data = gpu_culling.then(|| MeshCullingData::new(aabb));
 
             let previous_input_index = if shared
                 .flags
@@ -1331,7 +1329,7 @@ pub fn extract_meshes_for_gpu_building(
         // It's possible that a necessary component was removed and re-added in
         // the same frame.
         if !changed_meshes_query.contains(entity) {
-            queue.remove(entity.into(), any_gpu_culling);
+            queue.remove(entity.into(), gpu_culling);
         }
     }
 }