Skip to content

Commit

Permalink
Turn on GPU culling automatically if the target platform supports it.
Browse files Browse the repository at this point in the history
This commit removes the undocumented `GpuCulling` component in favor of
automatically turning GPU culling on when the platform supports it. The
main reason to make GPU culling automatic is that GPU culling enables
indirect mode. Indirect mode is needed for multidraw (#16427), because
non-indirect multidraw doesn't exist in `wgpu`. Since multidraw is such
a win for performance, when that feature is supported the small
performance tax that indirect mode incurs is virtually always worth
paying.

CPU culling is always used in addition to GPU culling unless the
`NoCpuCulling` component is placed on the camera. This results in some
amount of redundant computation on the GPU, but the overhead is
negligible. I figured that the GPU time savings gained from skipping
this computation when not needed didn't outweigh the costs of the added
complexity that would be necessarily to implement that optimization,
especially with GPU two-phase occlusion culling on the horizon.
  • Loading branch information
pcwalton committed Dec 5, 2024
1 parent 0070514 commit 481a752
Show file tree
Hide file tree
Showing 6 changed files with 129 additions and 147 deletions.
123 changes: 62 additions & 61 deletions crates/bevy_pbr/src/render/gpu_preprocess.rs
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ use bevy_asset::{load_internal_asset, Handle};
use bevy_ecs::{
component::Component,
entity::Entity,
query::{Has, QueryState, Without},
query::{QueryState, Without},
schedule::{common_conditions::resource_exists, IntoSystemConfigs as _},
system::{lifetimeless::Read, Commands, Res, ResMut, Resource},
world::{FromWorld, World},
Expand All @@ -33,7 +33,7 @@ use bevy_render::{
SpecializedComputePipeline, SpecializedComputePipelines,
},
renderer::{RenderContext, RenderDevice, RenderQueue},
view::{GpuCulling, ViewUniform, ViewUniformOffset, ViewUniforms},
view::{ViewUniform, ViewUniformOffset, ViewUniforms},
Render, RenderApp, RenderSet,
};
use bevy_utils::tracing::warn;
Expand Down Expand Up @@ -66,12 +66,7 @@ pub struct GpuMeshPreprocessPlugin {
/// The render node for the mesh uniform building pass.
pub struct GpuPreprocessNode {
view_query: QueryState<
(
Entity,
Read<PreprocessBindGroup>,
Read<ViewUniformOffset>,
Has<GpuCulling>,
),
(Entity, Read<PreprocessBindGroup>, Read<ViewUniformOffset>),
Without<SkipGpuPreprocess>,
>,
}
Expand Down Expand Up @@ -192,6 +187,7 @@ impl Node for GpuPreprocessNode {
..
} = world.resource::<BatchedInstanceBuffers<MeshUniform, MeshInputUniform>>();

let gpu_preprocessing_support = world.resource::<GpuPreprocessingSupport>();
let pipeline_cache = world.resource::<PipelineCache>();
let preprocess_pipelines = world.resource::<PreprocessPipelines>();

Expand All @@ -204,9 +200,7 @@ impl Node for GpuPreprocessNode {
});

// Run the compute passes.
for (view, bind_group, view_uniform_offset, gpu_culling) in
self.view_query.iter_manual(world)
{
for (view, bind_group, view_uniform_offset) in self.view_query.iter_manual(world) {
// Grab the index buffer for this view.
let Some(index_buffer) = index_buffers.get(&view) else {
warn!("The preprocessing index buffer wasn't present");
Expand All @@ -215,10 +209,11 @@ impl Node for GpuPreprocessNode {

// Select the right pipeline, depending on whether GPU culling is in
// use.
let maybe_pipeline_id = if gpu_culling {
preprocess_pipelines.gpu_culling.pipeline_id
} else {
preprocess_pipelines.direct.pipeline_id
let maybe_pipeline_id = match *gpu_preprocessing_support {
GpuPreprocessingSupport::Culling => preprocess_pipelines.gpu_culling.pipeline_id,
GpuPreprocessingSupport::None | GpuPreprocessingSupport::PreprocessingOnly => {
preprocess_pipelines.direct.pipeline_id
}
};

// Fetch the pipeline.
Expand All @@ -237,7 +232,7 @@ impl Node for GpuPreprocessNode {
compute_pass.set_pipeline(preprocess_pipeline);

let mut dynamic_offsets: SmallVec<[u32; 1]> = smallvec![];
if gpu_culling {
if matches!(*gpu_preprocessing_support, GpuPreprocessingSupport::Culling) {
dynamic_offsets.push(view_uniform_offset.offset);
}
compute_pass.set_bind_group(0, &bind_group.0, &dynamic_offsets);
Expand Down Expand Up @@ -386,6 +381,7 @@ impl PreprocessPipeline {

/// A system that attaches the mesh uniform buffers to the bind groups for the
/// variants of the mesh preprocessing compute shader.
#[allow(clippy::too_many_arguments)]
pub fn prepare_preprocess_bind_groups(
mut commands: Commands,
render_device: Res<RenderDevice>,
Expand All @@ -394,6 +390,7 @@ pub fn prepare_preprocess_bind_groups(
mesh_culling_data_buffer: Res<MeshCullingDataBuffer>,
view_uniforms: Res<ViewUniforms>,
pipelines: Res<PreprocessPipelines>,
gpu_preprocessing_support: Res<GpuPreprocessingSupport>,
) {
// Grab the `BatchedInstanceBuffers`.
let BatchedInstanceBuffers {
Expand Down Expand Up @@ -424,52 +421,56 @@ pub fn prepare_preprocess_bind_groups(
)
.ok();

let bind_group = if index_buffer_vec.gpu_culling {
let (
Some(indirect_parameters_buffer),
Some(mesh_culling_data_buffer),
Some(view_uniforms_binding),
) = (
indirect_parameters_buffer.buffer(),
mesh_culling_data_buffer.buffer(),
view_uniforms.uniforms.binding(),
)
else {
continue;
};
let bind_group = match *gpu_preprocessing_support {
GpuPreprocessingSupport::Culling => {
let (
Some(indirect_parameters_buffer),
Some(mesh_culling_data_buffer),
Some(view_uniforms_binding),
) = (
indirect_parameters_buffer.buffer(),
mesh_culling_data_buffer.buffer(),
view_uniforms.uniforms.binding(),
)
else {
continue;
};

PreprocessBindGroup(render_device.create_bind_group(
"preprocess_gpu_culling_bind_group",
&pipelines.gpu_culling.bind_group_layout,
&BindGroupEntries::sequential((
current_input_buffer.as_entire_binding(),
previous_input_buffer.as_entire_binding(),
BindingResource::Buffer(BufferBinding {
buffer: index_buffer,
offset: 0,
size: index_buffer_size,
}),
data_buffer.as_entire_binding(),
indirect_parameters_buffer.as_entire_binding(),
mesh_culling_data_buffer.as_entire_binding(),
view_uniforms_binding,
)),
))
}

PreprocessBindGroup(render_device.create_bind_group(
"preprocess_gpu_culling_bind_group",
&pipelines.gpu_culling.bind_group_layout,
&BindGroupEntries::sequential((
current_input_buffer.as_entire_binding(),
previous_input_buffer.as_entire_binding(),
BindingResource::Buffer(BufferBinding {
buffer: index_buffer,
offset: 0,
size: index_buffer_size,
}),
data_buffer.as_entire_binding(),
indirect_parameters_buffer.as_entire_binding(),
mesh_culling_data_buffer.as_entire_binding(),
view_uniforms_binding,
)),
))
} else {
PreprocessBindGroup(render_device.create_bind_group(
"preprocess_direct_bind_group",
&pipelines.direct.bind_group_layout,
&BindGroupEntries::sequential((
current_input_buffer.as_entire_binding(),
previous_input_buffer.as_entire_binding(),
BindingResource::Buffer(BufferBinding {
buffer: index_buffer,
offset: 0,
size: index_buffer_size,
}),
data_buffer.as_entire_binding(),
)),
))
GpuPreprocessingSupport::None | GpuPreprocessingSupport::PreprocessingOnly => {
PreprocessBindGroup(render_device.create_bind_group(
"preprocess_direct_bind_group",
&pipelines.direct.bind_group_layout,
&BindGroupEntries::sequential((
current_input_buffer.as_entire_binding(),
previous_input_buffer.as_entire_binding(),
BindingResource::Buffer(BufferBinding {
buffer: index_buffer,
offset: 0,
size: index_buffer_size,
}),
data_buffer.as_entire_binding(),
)),
))
}
};

commands.entity(*view).insert(bind_group);
Expand Down
16 changes: 7 additions & 9 deletions crates/bevy_pbr/src/render/mesh.rs
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,6 @@ use bevy_render::{
},
no_gpu_preprocessing, GetBatchData, GetFullBatchData, NoAutomaticBatching,
},
camera::Camera,
mesh::*,
primitives::Aabb,
render_asset::{ExtractAssetsSet, RenderAssets},
Expand All @@ -37,7 +36,7 @@ use bevy_render::{
renderer::{RenderDevice, RenderQueue},
texture::DefaultImageSampler,
view::{
prepare_view_targets, GpuCulling, RenderVisibilityRanges, ViewTarget, ViewUniformOffset,
prepare_view_targets, RenderVisibilityRanges, ViewTarget, ViewUniformOffset,
ViewVisibility, VisibilityRange,
},
Extract,
Expand Down Expand Up @@ -1190,6 +1189,7 @@ pub fn extract_meshes_for_gpu_building(
mut render_mesh_instances: ResMut<RenderMeshInstances>,
render_visibility_ranges: Res<RenderVisibilityRanges>,
mesh_material_ids: Res<RenderMeshMaterialIds>,
gpu_preprocessing_support: Res<GpuPreprocessingSupport>,
mut render_mesh_instance_queues: ResMut<RenderMeshInstanceGpuQueues>,
changed_meshes_query: Extract<
Query<
Expand Down Expand Up @@ -1225,15 +1225,13 @@ pub fn extract_meshes_for_gpu_building(
mut removed_visibilities_query: Extract<RemovedComponents<ViewVisibility>>,
mut removed_global_transforms_query: Extract<RemovedComponents<GlobalTransform>>,
mut removed_meshes_query: Extract<RemovedComponents<Mesh3d>>,
cameras_query: Extract<Query<(), (With<Camera>, With<GpuCulling>)>>,
) {
let any_gpu_culling = !cameras_query.is_empty();
let gpu_culling = matches!(*gpu_preprocessing_support, GpuPreprocessingSupport::Culling);
for render_mesh_instance_queue in render_mesh_instance_queues.iter_mut() {
render_mesh_instance_queue.init(any_gpu_culling);
render_mesh_instance_queue.init(gpu_culling);
}

// Collect render mesh instances. Build up the uniform buffer.

let RenderMeshInstances::GpuBuilding(ref mut render_mesh_instances) = *render_mesh_instances
else {
panic!(
Expand Down Expand Up @@ -1262,7 +1260,7 @@ pub fn extract_meshes_for_gpu_building(
visibility_range,
)| {
if !view_visibility.get() {
queue.remove(entity.into(), any_gpu_culling);
queue.remove(entity.into(), gpu_culling);
return;
}

Expand Down Expand Up @@ -1291,7 +1289,7 @@ pub fn extract_meshes_for_gpu_building(

let lightmap_uv_rect = pack_lightmap_uv_rect(lightmap.map(|lightmap| lightmap.uv_rect));

let gpu_mesh_culling_data = any_gpu_culling.then(|| MeshCullingData::new(aabb));
let gpu_mesh_culling_data = gpu_culling.then(|| MeshCullingData::new(aabb));

let previous_input_index = if shared
.flags
Expand Down Expand Up @@ -1331,7 +1329,7 @@ pub fn extract_meshes_for_gpu_building(
// It's possible that a necessary component was removed and re-added in
// the same frame.
if !changed_meshes_query.contains(entity) {
queue.remove(entity.into(), any_gpu_culling);
queue.remove(entity.into(), gpu_culling);
}
}
}
Expand Down
Loading

0 comments on commit 481a752

Please sign in to comment.