Skip to content

Commit

Permalink
Reduce the size of MeshUniform to improve performance (#9416)
Browse files Browse the repository at this point in the history
# Objective

- Significantly reduce the size of MeshUniform by only including
necessary data.

## Solution

Local to world, model transforms are affine. This means they only need a
4x3 matrix to represent them.

`MeshUniform` stores the current, and previous model transforms, and the
inverse transpose of the current model transform, all as 4x4 matrices.
Instead we can store the current, and previous model transforms as 4x3
matrices, and we only need the upper-left 3x3 part of the inverse
transpose of the current model transform. This change allows us to
reduce the serialized MeshUniform size from 208 bytes to 144 bytes,
which is over a 30% saving in data to serialize, and VRAM bandwidth and
space.

## Benchmarks

On an M1 Max, running `many_cubes -- sphere`, main is in yellow, this PR
is in red:
<img width="1484" alt="Screenshot 2023-08-11 at 02 36 43"
src="https://github.com/bevyengine/bevy/assets/302146/7d99c7b3-f2bb-4004-a8d0-4c00f755cb0d">
A reduction in frame time of ~14%.

---

## Changelog

- Changed: Redefined `MeshUniform` to improve performance by using 4x3
affine transforms and reconstructing 4x4 matrices in the shader. Helper
functions were added to `bevy_pbr::mesh_functions` to unpack the data.
`affine_to_square` converts the packed 4x3 in 3x4 matrix data to a 4x4
matrix. `mat2x4_f32_to_mat3x3` converts the 3x3 in mat2x4 + f32 matrix
data back into a 3x3.

## Migration Guide

Shader code before:
```
var model = mesh[instance_index].model;
```

Shader code after:
```
#import bevy_pbr::mesh_functions affine_to_square

var model = affine_to_square(mesh[instance_index].model);
```
  • Loading branch information
superdump authored Aug 15, 2023
1 parent b30ff2a commit 0a11af9
Show file tree
Hide file tree
Showing 18 changed files with 248 additions and 97 deletions.
5 changes: 2 additions & 3 deletions assets/shaders/custom_vertex_attribute.wgsl
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
#import bevy_pbr::mesh_bindings mesh
#import bevy_pbr::mesh_functions mesh_position_local_to_clip
#import bevy_render::instance_index
#import bevy_pbr::mesh_functions get_model_matrix, mesh_position_local_to_clip

struct CustomMaterial {
color: vec4<f32>,
Expand All @@ -23,7 +22,7 @@ struct VertexOutput {
fn vertex(vertex: Vertex) -> VertexOutput {
var out: VertexOutput;
out.clip_position = mesh_position_local_to_clip(
mesh[bevy_render::instance_index::get_instance_index(vertex.instance_index)].model,
get_model_matrix(vertex.instance_index),
vec4<f32>(vertex.position, 1.0),
);
out.blend_color = vertex.blend_color;
Expand Down
12 changes: 6 additions & 6 deletions assets/shaders/instancing.wgsl
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
#import bevy_pbr::mesh_functions mesh_position_local_to_clip
#import bevy_pbr::mesh_functions get_model_matrix, mesh_position_local_to_clip
#import bevy_pbr::mesh_bindings mesh

struct Vertex {
Expand All @@ -19,12 +19,12 @@ struct VertexOutput {
fn vertex(vertex: Vertex) -> VertexOutput {
let position = vertex.position * vertex.i_pos_scale.w + vertex.i_pos_scale.xyz;
var out: VertexOutput;
// NOTE: The 0 index into the Mesh array is a hack for this example as the
// instance_index builtin would map to the wrong index in the Mesh array.
// This index could be passed in via another uniform instead but it's
// unnecessary for the example.
// NOTE: Passing 0 as the instance_index to get_model_matrix() is a hack
// for this example as the instance_index builtin would map to the wrong
// index in the Mesh array. This index could be passed in via another
// uniform instead but it's unnecessary for the example.
out.clip_position = mesh_position_local_to_clip(
mesh[0].model,
get_model_matrix(0),
vec4<f32>(position, 1.0)
);
out.color = vertex.i_color;
Expand Down
29 changes: 29 additions & 0 deletions crates/bevy_math/src/affine3.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
use glam::{Affine3A, Mat3, Vec3};

/// Reduced-size version of `glam::Affine3A` for use when storage has
/// significant performance impact. Convert to `glam::Affine3A` to do
/// non-trivial calculations.
pub struct Affine3 {
/// Scaling, rotation, shears, and other non-translation affine transforms
pub matrix3: Mat3,
/// Translation
pub translation: Vec3,
}

impl From<&Affine3A> for Affine3 {
fn from(affine: &Affine3A) -> Self {
Self {
matrix3: affine.matrix3.into(),
translation: affine.translation.into(),
}
}
}

impl From<&Affine3> for Affine3A {
fn from(affine3: &Affine3) -> Self {
Self {
matrix3: affine3.matrix3.into(),
translation: affine3.translation.into(),
}
}
}
2 changes: 2 additions & 0 deletions crates/bevy_math/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,10 +7,12 @@
#![allow(clippy::type_complexity)]
#![warn(missing_docs)]

mod affine3;
pub mod cubic_splines;
mod ray;
mod rects;

pub use affine3::*;
pub use ray::Ray;
pub use rects::*;

Expand Down
6 changes: 3 additions & 3 deletions crates/bevy_pbr/src/light.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2025,7 +2025,7 @@ pub fn check_light_mesh_visibility(
view_frusta.iter().zip(view_visible_entities)
{
// Disable near-plane culling, as a shadow caster could lie before the near plane.
if !frustum.intersects_obb(aabb, &transform.compute_matrix(), false, true) {
if !frustum.intersects_obb(aabb, &transform.affine(), false, true) {
continue;
}

Expand Down Expand Up @@ -2098,7 +2098,7 @@ pub fn check_light_mesh_visibility(

// If we have an aabb and transform, do frustum culling
if let (Some(aabb), Some(transform)) = (maybe_aabb, maybe_transform) {
let model_to_world = transform.compute_matrix();
let model_to_world = transform.affine();
// Do a cheap sphere vs obb test to prune out most meshes outside the sphere of the light
if !light_sphere.intersects_obb(aabb, &model_to_world) {
continue;
Expand Down Expand Up @@ -2162,7 +2162,7 @@ pub fn check_light_mesh_visibility(

// If we have an aabb and transform, do frustum culling
if let (Some(aabb), Some(transform)) = (maybe_aabb, maybe_transform) {
let model_to_world = transform.compute_matrix();
let model_to_world = transform.affine();
// Do a cheap sphere vs obb test to prune out most meshes outside the sphere of the light
if !light_sphere.intersects_obb(aabb, &model_to_world) {
continue;
Expand Down
9 changes: 5 additions & 4 deletions crates/bevy_pbr/src/material.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
use crate::{
render, AlphaMode, DrawMesh, DrawPrepass, EnvironmentMapLight, MeshPipeline, MeshPipelineKey,
MeshUniform, PrepassPipelinePlugin, PrepassPlugin, RenderLightSystems,
MeshTransforms, MeshUniform, PrepassPipelinePlugin, PrepassPlugin, RenderLightSystems,
ScreenSpaceAmbientOcclusionSettings, SetMeshBindGroup, SetMeshViewBindGroup, Shadow,
};
use bevy_app::{App, Plugin};
Expand Down Expand Up @@ -382,7 +382,7 @@ pub fn queue_material_meshes<M: Material>(
material_meshes: Query<(
&Handle<M>,
&Handle<Mesh>,
&MeshUniform,
&MeshTransforms,
&GpuArrayBufferIndex<MeshUniform>,
)>,
images: Res<RenderAssets<Image>>,
Expand Down Expand Up @@ -468,7 +468,7 @@ pub fn queue_material_meshes<M: Material>(

let rangefinder = view.rangefinder3d();
for visible_entity in &visible_entities.entities {
if let Ok((material_handle, mesh_handle, mesh_uniform, batch_indices)) =
if let Ok((material_handle, mesh_handle, mesh_transforms, batch_indices)) =
material_meshes.get(*visible_entity)
{
if let (Some(mesh), Some(material)) = (
Expand Down Expand Up @@ -516,7 +516,8 @@ pub fn queue_material_meshes<M: Material>(
}
};

let distance = rangefinder.distance(&mesh_uniform.transform)
let distance = rangefinder
.distance_translation(&mesh_transforms.transform.translation)
+ material.properties.depth_bias;
match material.properties.alpha_mode {
AlphaMode::Opaque => {
Expand Down
18 changes: 9 additions & 9 deletions crates/bevy_pbr/src/prepass/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ use bevy_ecs::{
SystemParamItem,
},
};
use bevy_math::Mat4;
use bevy_math::{Affine3A, Mat4};
use bevy_reflect::TypeUuid;
use bevy_render::{
globals::{GlobalsBuffer, GlobalsUniform},
Expand Down Expand Up @@ -46,8 +46,8 @@ use bevy_utils::tracing::error;

use crate::{
prepare_lights, setup_morph_and_skinning_defs, AlphaMode, DrawMesh, Material, MaterialPipeline,
MaterialPipelineKey, MeshLayouts, MeshPipeline, MeshPipelineKey, MeshUniform, RenderMaterials,
SetMaterialBindGroup, SetMeshBindGroup,
MaterialPipelineKey, MeshLayouts, MeshPipeline, MeshPipelineKey, MeshTransforms, MeshUniform,
RenderMaterials, SetMaterialBindGroup, SetMeshBindGroup,
};

use std::{hash::Hash, marker::PhantomData};
Expand Down Expand Up @@ -203,7 +203,7 @@ pub fn update_previous_view_projections(
}

#[derive(Component)]
pub struct PreviousGlobalTransform(pub Mat4);
pub struct PreviousGlobalTransform(pub Affine3A);

pub fn update_mesh_previous_global_transforms(
mut commands: Commands,
Expand All @@ -216,7 +216,7 @@ pub fn update_mesh_previous_global_transforms(
for (entity, transform) in &meshes {
commands
.entity(entity)
.insert(PreviousGlobalTransform(transform.compute_matrix()));
.insert(PreviousGlobalTransform(transform.affine()));
}
}
}
Expand Down Expand Up @@ -762,7 +762,7 @@ pub fn queue_prepass_material_meshes<M: Material>(
material_meshes: Query<(
&Handle<M>,
&Handle<Mesh>,
&MeshUniform,
&MeshTransforms,
&GpuArrayBufferIndex<MeshUniform>,
)>,
mut views: Query<(
Expand Down Expand Up @@ -809,7 +809,7 @@ pub fn queue_prepass_material_meshes<M: Material>(
let rangefinder = view.rangefinder3d();

for visible_entity in &visible_entities.entities {
let Ok((material_handle, mesh_handle, mesh_uniform, batch_indices)) = material_meshes.get(*visible_entity) else {
let Ok((material_handle, mesh_handle, mesh_transforms, batch_indices)) = material_meshes.get(*visible_entity) else {
continue;
};

Expand Down Expand Up @@ -852,8 +852,8 @@ pub fn queue_prepass_material_meshes<M: Material>(
}
};

let distance =
rangefinder.distance(&mesh_uniform.transform) + material.properties.depth_bias;
let distance = rangefinder.distance_translation(&mesh_transforms.transform.translation)
+ material.properties.depth_bias;
match alpha_mode {
AlphaMode::Opaque => {
opaque_phase.add(Opaque3dPrepass {
Expand Down
10 changes: 5 additions & 5 deletions crates/bevy_pbr/src/prepass/prepass.wgsl
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
#import bevy_pbr::skinning
#import bevy_pbr::morph
#import bevy_pbr::mesh_bindings mesh
#import bevy_render::instance_index
#import bevy_render::instance_index get_instance_index

// Most of these attributes are not used in the default prepass fragment shader, but they are still needed so we can
// pass them to custom prepass shaders like pbr_prepass.wgsl.
Expand Down Expand Up @@ -92,7 +92,7 @@ fn vertex(vertex_no_morph: Vertex) -> VertexOutput {
#else // SKINNED
// Use vertex_no_morph.instance_index instead of vertex.instance_index to work around a wgpu dx12 bug.
// See https://github.com/gfx-rs/naga/issues/2416
var model = mesh[bevy_render::instance_index::get_instance_index(vertex_no_morph.instance_index)].model;
var model = bevy_pbr::mesh_functions::get_model_matrix(vertex_no_morph.instance_index);
#endif // SKINNED

out.clip_position = bevy_pbr::mesh_functions::mesh_position_local_to_clip(model, vec4(vertex.position, 1.0));
Expand All @@ -113,7 +113,7 @@ fn vertex(vertex_no_morph: Vertex) -> VertexOutput {
vertex.normal,
// Use vertex_no_morph.instance_index instead of vertex.instance_index to work around a wgpu dx12 bug.
// See https://github.com/gfx-rs/naga/issues/2416
bevy_render::instance_index::get_instance_index(vertex_no_morph.instance_index)
get_instance_index(vertex_no_morph.instance_index)
);
#endif // SKINNED

Expand All @@ -123,7 +123,7 @@ fn vertex(vertex_no_morph: Vertex) -> VertexOutput {
vertex.tangent,
// Use vertex_no_morph.instance_index instead of vertex.instance_index to work around a wgpu dx12 bug.
// See https://github.com/gfx-rs/naga/issues/2416
bevy_render::instance_index::get_instance_index(vertex_no_morph.instance_index)
get_instance_index(vertex_no_morph.instance_index)
);
#endif // VERTEX_TANGENTS
#endif // NORMAL_PREPASS
Expand All @@ -133,7 +133,7 @@ fn vertex(vertex_no_morph: Vertex) -> VertexOutput {
// Use vertex_no_morph.instance_index instead of vertex.instance_index to work around a wgpu dx12 bug.
// See https://github.com/gfx-rs/naga/issues/2416
out.previous_world_position = bevy_pbr::mesh_functions::mesh_position_local_to_world(
mesh[bevy_render::instance_index::get_instance_index(vertex_no_morph.instance_index)].previous_model,
bevy_pbr::mesh_functions::get_previous_model_matrix(vertex_no_morph.instance_index),
vec4<f32>(vertex.position, 1.0)
);
#endif // MOTION_VECTOR_PREPASS
Expand Down
Loading

0 comments on commit 0a11af9

Please sign in to comment.