From b2187ccde7e26f1b1e60e4a44593bbd562c1997d Mon Sep 17 00:00:00 2001 From: TheGrimsey Date: Tue, 8 Oct 2024 20:32:10 +0200 Subject: [PATCH] Optimize mesh tangent generation (Many times faster) & SIMD for the noise application (30% reduction in time) --- examples/many_tiles.rs | 22 +++++- src/lib.rs | 56 ++++++++++----- src/material.rs | 29 ++++++-- src/meshing.rs | 144 ++++++++++++++++++++++++++++++++------ src/noise.rs | 152 ++++++++++++++++++++++++++++++++++++++--- 5 files changed, 350 insertions(+), 53 deletions(-) diff --git a/examples/many_tiles.rs b/examples/many_tiles.rs index 74a0089..83f22dd 100644 --- a/examples/many_tiles.rs +++ b/examples/many_tiles.rs @@ -17,8 +17,8 @@ use bevy_editor_pls::{default_windows::cameras::ActiveEditorCamera, editor_windo use bevy_lookup_curve::{editor::{LookupCurveEditor, LookupCurveEguiEditor}, LookupCurve}; use bevy_world_seed::{ material::{ - GlobalTexturingRules, TerrainTexturingSettings, TexturingRule, TexturingRuleEvaluator, - }, noise::{NoiseCache, TerrainNoiseDetailLayer, TerrainNoiseSettings, TerrainNoiseSplineLayer}, terrain::{Terrain, TileToTerrain}, RebuildTile, TerrainPlugin, TerrainSettings + GlobalTexturingRules, TerrainTextureRebuildQueue, TerrainTexturingSettings, TexturingRule, TexturingRuleEvaluator + }, meshing::TerrainMeshRebuildQueue, noise::{NoiseCache, TerrainNoiseDetailLayer, TerrainNoiseSettings, TerrainNoiseSplineLayer}, terrain::{Terrain, TileToTerrain}, RebuildTile, TerrainHeightRebuildQueue, TerrainPlugin, TerrainSettings }; fn main() { @@ -193,6 +193,24 @@ impl EditorWindow for NoiseDebugWindow { world.send_event_batch(tiles.into_iter().map(RebuildTile)); } + + let heights_queue = world.resource::(); + let mesh_queue = world.resource::(); + let texture_queue = world.resource::(); + + if !heights_queue.is_empty() || !mesh_queue.is_empty() || !texture_queue.is_empty() { + ui.heading("Queued"); + + ui.columns(3, |ui| { + ui[0].label("Heights"); + ui[1].label("Meshes"); + ui[2].label("Textures"); + + ui[0].label(heights_queue.count().to_string()); + ui[1].label(mesh_queue.count().to_string()); + ui[2].label(texture_queue.count().to_string()); + }); + } let mut query_state = world.query_filtered::<&GlobalTransform, With>(); let lookup_curves = world.resource::>(); diff --git a/src/lib.rs b/src/lib.rs index 09ddba6..c6ca994 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -5,7 +5,7 @@ use std::num::NonZeroU8; use bevy::{ app::{App, Plugin, PostUpdate}, asset::Assets, log::info_span, math::{FloatExt, IVec2, Vec2, Vec3, Vec3Swizzles}, prelude::{ - any_with_component, resource_changed, AnyOf, Component, Deref, Event, EventReader, EventWriter, IntoSystemConfigs, Local, Query, ReflectResource, Res, ResMut, Resource, SystemSet, TransformSystem + any_with_component, resource_changed, AnyOf, Component, Deref, DetectChanges, Event, EventReader, EventWriter, IntoSystemConfigs, Local, Query, ReflectResource, Res, ResMut, Resource, SystemSet, TransformSystem }, reflect::Reflect, transform::components::GlobalTransform }; use bevy_lookup_curve::{LookupCurve, LookupCurvePlugin}; @@ -22,7 +22,7 @@ use modifiers::{ ModifierStrengthLimitProperty, ShapeModifier, TerrainSplineCached, TerrainSplineProperties, TerrainSplineShape, TileToModifierMapping, }; -use noise::{NoiseCache, TerrainNoiseDetailLayer, TerrainNoiseSettings}; +use noise::{apply_noise_simd, NoiseCache, TerrainNoiseDetailLayer, TerrainNoiseSettings}; use snap_to_terrain::TerrainSnapToTerrainPlugin; use terrain::{insert_components, update_tiling, Holes, Terrain, TileToTerrain}; use utils::{distance_squared_to_line_segment, index_to_x_z}; @@ -35,7 +35,7 @@ mod debug_draw; #[cfg(feature = "rendering")] pub mod material; #[cfg(feature = "rendering")] -mod meshing; +pub mod meshing; pub mod snap_to_terrain; pub mod noise; @@ -127,6 +127,8 @@ impl Plugin for TerrainPlugin { app.register_type::() .register_type::(); } + + app.init_resource::(); } } @@ -172,6 +174,21 @@ pub struct TileHeightsRebuilt(pub IVec2); #[derive(Component, Deref, Debug)] pub struct Heights(Box<[f32]>); +/// Queue of terrain tiles which [`Heights`] are to be rebuilt. +#[derive(Resource, Default)] +pub struct TerrainHeightRebuildQueue(Vec); +impl TerrainHeightRebuildQueue { + pub fn get(&self) -> &[IVec2] { + &self.0 + } + pub fn count(&self) -> usize { + self.0.len() + } + pub fn is_empty(&self) -> bool { + self.0.is_empty() + } +} + fn update_terrain_heights( terrain_noise_layers: Option>, shape_modifier_query: Query<( @@ -192,15 +209,27 @@ fn update_terrain_heights( terrain_settings: Res, tile_to_modifier: Res, tile_to_terrain: Res, - mut tile_generate_queue: Local>, + mut tile_generate_queue: ResMut, mut noise_cache: ResMut, mut event_reader: EventReader, mut tile_rebuilt_events: EventWriter, - lookup_curves: Res> + lookup_curves: Res>, + mut noise_spline_index_cache: Local>, + mut noise_detail_index_cache: Local> ) { + // Cache indexes into the noise cache for each terrain noise. + // Saves having to do the checks and insertions for every iteration when applying the noise. + if let Some(terrain_noise_layers) = terrain_noise_layers.as_ref().filter(|noise_layers| noise_layers.is_changed()) { + noise_spline_index_cache.clear(); + noise_spline_index_cache.extend(terrain_noise_layers.splines.iter().map(|spline| noise_cache.get_simplex_index(spline.seed) as u32)); + + noise_detail_index_cache.clear(); + noise_detail_index_cache.extend(terrain_noise_layers.layers.iter().map(|layer| noise_cache.get_simplex_index(layer.seed) as u32)); + } + for RebuildTile(tile) in event_reader.read() { - if !tile_generate_queue.contains(tile) { - tile_generate_queue.push(*tile); + if !tile_generate_queue.0.contains(tile) { + tile_generate_queue.0.push(*tile); } } @@ -221,10 +250,10 @@ fn update_terrain_heights( let inv_tile_size_scale = scale * (7.0 / tile_size); let tiles_to_generate = tile_generate_queue - .len() + .count() .min(terrain_settings.max_tile_updates_per_frame.get() as usize); - for tile in tile_generate_queue.drain(..tiles_to_generate) { + for tile in tile_generate_queue.0.drain(..tiles_to_generate) { let Some(tiles) = tile_to_terrain.0.get(&tile) else { continue; }; @@ -241,14 +270,7 @@ fn update_terrain_heights( // First, set by noise. if let Some(terrain_noise_layers) = terrain_noise_layers.as_ref() { let _span = info_span!("Apply noise").entered(); - for (i, val) in heights.0.iter_mut().enumerate() { - let (x, z) = index_to_x_z(i, terrain_settings.edge_points as usize); - - let vertex_position = - terrain_translation + Vec2::new(x as f32 * scale, z as f32 * scale); - - *val += terrain_noise_layers.sample_position(&mut noise_cache, vertex_position, &lookup_curves); - } + apply_noise_simd(&mut heights.0, &terrain_settings, terrain_translation, scale, &noise_cache, &noise_spline_index_cache, &noise_detail_index_cache, &lookup_curves, terrain_noise_layers); } // Secondly, set by shape-modifiers. diff --git a/src/material.rs b/src/material.rs index 148d0b6..764e66c 100644 --- a/src/material.rs +++ b/src/material.rs @@ -8,7 +8,7 @@ use bevy::{ pbr::{ExtendedMaterial, MaterialExtension, MaterialPlugin, StandardMaterial}, prelude::{ default, Commands, Component, Entity, EventReader, GlobalTransform, Image, - IntoSystemConfigs, Local, Mesh, Query, ReflectComponent, ReflectDefault, ReflectResource, + IntoSystemConfigs, Mesh, Query, ReflectComponent, ReflectDefault, ReflectResource, Res, ResMut, Resource, Shader, With, Without, }, reflect::Reflect, @@ -63,6 +63,8 @@ impl Plugin for TerrainTexturingPlugin { ) .chain(), ); + + app.init_resource::(); } } @@ -400,6 +402,21 @@ fn insert_texture_map( }); } +/// Queue of terrain tiles which textures are to be rebuilt. +#[derive(Resource, Default)] +pub struct TerrainTextureRebuildQueue(Vec); +impl TerrainTextureRebuildQueue { + pub fn get(&self) -> &[IVec2] { + &self.0 + } + pub fn count(&self) -> usize { + self.0.len() + } + pub fn is_empty(&self) -> bool { + self.0.is_empty() + } +} + fn update_terrain_texture_maps( shape_modifier_query: Query<( &TextureModifierOperation, @@ -426,15 +443,15 @@ fn update_terrain_texture_maps( tile_to_modifier: Res, tile_to_terrain: Res, mut event_reader: EventReader, - mut tile_generate_queue: Local>, + mut tile_generate_queue: ResMut, mut materials: ResMut>, mut images: ResMut>, meshes: Res>, texturing_rules: Res, ) { for TerrainMeshRebuilt(tile) in event_reader.read() { - if !tile_generate_queue.contains(tile) { - tile_generate_queue.push(*tile); + if !tile_generate_queue.0.contains(tile) { + tile_generate_queue.0.push(*tile); } } @@ -449,13 +466,13 @@ fn update_terrain_texture_maps( let inv_tile_size_scale = scale * (7.0 / tile_size); let tiles_to_generate = tile_generate_queue - .len() + .count() .min(texture_settings.max_tile_updates_per_frame.get() as usize); tiles_query .iter_many( tile_generate_queue - .drain(..tiles_to_generate) + .0.drain(..tiles_to_generate) .filter_map(|tile| tile_to_terrain.0.get(&tile)) .flatten(), ) diff --git a/src/meshing.rs b/src/meshing.rs index 57515d2..9056391 100644 --- a/src/meshing.rs +++ b/src/meshing.rs @@ -1,16 +1,12 @@ use bevy::{ - app::{App, Plugin, PostUpdate}, - asset::{Assets, Handle}, - math::{IVec2, Vec3, Vec3A}, - prelude::{ - Commands, Entity, Event, EventReader, EventWriter, IntoSystemConfigs, Local, Mesh, Query, - Res, ResMut, - }, - render::{ - mesh::{Indices, PrimitiveTopology}, + app::{App, Plugin, PostUpdate}, asset::{Assets, Handle}, log::{info, info_span}, math::{IVec2, Vec2, Vec3, Vec3A, Vec4}, prelude::{ + Commands, Entity, Event, EventReader, EventWriter, IntoSystemConfigs, Mesh, Query, + Res, ResMut, Resource, + }, render::{ + mesh::{Indices, PrimitiveTopology, VertexAttributeValues}, primitives::Aabb, render_asset::RenderAssetUsages, - }, + } }; use crate::{ @@ -26,6 +22,22 @@ impl Plugin for TerrainMeshingPlugin { ); app.add_event::(); + app.init_resource::(); + } +} + +/// Queue of terrain tiles which meshes are to be rebuilt. +#[derive(Resource, Default)] +pub struct TerrainMeshRebuildQueue(Vec); +impl TerrainMeshRebuildQueue { + pub fn get(&self) -> &[IVec2] { + &self.0 + } + pub fn count(&self) -> usize { + self.0.len() + } + pub fn is_empty(&self) -> bool { + self.0.is_empty() } } @@ -46,13 +58,13 @@ fn update_mesh_from_heights( heights_query: Query<&Heights>, terrain_settings: Res, tile_to_terrain: Res, - mut tile_generate_queue: Local>, + mut tile_generate_queue: ResMut, mut tile_rebuilt_events: EventReader, mut repaint_texture_events: EventWriter, ) { for TileHeightsRebuilt(tile) in tile_rebuilt_events.read() { - if !tile_generate_queue.contains(tile) { - tile_generate_queue.push(*tile); + if !tile_generate_queue.0.contains(tile) { + tile_generate_queue.0.push(*tile); } // Queue neighbors as well to make sure normals are correct at the edges. @@ -64,8 +76,8 @@ fn update_mesh_from_heights( ]; for neighbor in neighbors.into_iter() { - if !tile_generate_queue.contains(&neighbor) { - tile_generate_queue.push(neighbor); + if !tile_generate_queue.0.contains(&neighbor) { + tile_generate_queue.0.push(neighbor); } } } @@ -76,10 +88,10 @@ fn update_mesh_from_heights( let tile_size = terrain_settings.tile_size(); let tiles_to_generate = tile_generate_queue - .len() + .0.len() .min(terrain_settings.max_tile_updates_per_frame.get() as usize); - for tile in tile_generate_queue.drain(..tiles_to_generate) { + for tile in tile_generate_queue.0.drain(..tiles_to_generate) { let Some(tiles) = tile_to_terrain.0.get(&tile) else { continue; }; @@ -114,6 +126,8 @@ fn update_mesh_from_heights( ]; while let Some((entity, heights, mesh_handle, holes, mut aabb)) = iter.fetch_next() { + let _span = info_span!("Build tile mesh").entered(); + let mesh = create_terrain_mesh( terrain_settings.tile_size(), terrain_settings.edge_points, @@ -291,7 +305,9 @@ fn create_terrain_mesh( let step = (1.0 / vertex_edge) * size; // -X direction. - if let Some(neighbors) = neighbours[0] { + if let Some(neighbors) = neighbours[0] { + let _span = info_span!("Add normals from -X neighbor").entered(); + // Corner { let x = 0; @@ -340,6 +356,8 @@ fn create_terrain_mesh( // +X direction. if let Some(neighbors) = neighbours[1] { + let _span = info_span!("Add normals from +X neighbor").entered(); + // Ignoring corners. for x in (0..(num_vertices - edge_length as usize)) .skip(edge_length as usize + edge_length as usize - 1) @@ -369,6 +387,8 @@ fn create_terrain_mesh( // -Y if let Some(neighbors) = neighbours[2] { + let _span = info_span!("Add normals from -Y neighbor").entered(); + let neighbor_row = &neighbors[edge_length as usize * (edge_length as usize - 2)..]; // Ignoring corners. @@ -391,6 +411,8 @@ fn create_terrain_mesh( } // +Y if let Some(neighbors) = neighbours[3] { + let _span = info_span!("Add normals from +Y neighbor").entered(); + let neighbor_row = &neighbors[edge_length as usize..(edge_length as usize * 2)]; // Ignoring corners. @@ -420,6 +442,13 @@ fn create_terrain_mesh( normals[i] = (normals[i] / (count as f32)).normalize(); } + let temp_indices: Vec = match &indices { + Indices::U16(vec) => vec.iter().map(|i| *i as usize).collect(), + Indices::U32(vec) => vec.iter().map(|i| *i as usize).collect(), + }; + + let generated_tangents = generate_tangents(&temp_indices, &positions, &uvs, &normals); + Mesh::new( PrimitiveTopology::TriangleList, RenderAssetUsages::default(), @@ -428,6 +457,81 @@ fn create_terrain_mesh( .with_inserted_attribute(Mesh::ATTRIBUTE_POSITION, positions) .with_inserted_attribute(Mesh::ATTRIBUTE_UV_0, uvs) .with_inserted_attribute(Mesh::ATTRIBUTE_NORMAL, normals) - .with_generated_tangents() - .unwrap() // TODO: Can this ever fail?? + .with_inserted_attribute(Mesh::ATTRIBUTE_TANGENT, generated_tangents) +} + +#[derive(Default, Clone)] +struct TangentSpace { + tangent: Vec3, // Change Vec3 to Vec4 + count: u32, +} + +/// Generate tangents by taking advantage of the invariants of our terrain. (We can't have degenerate triangles, no standalone faces, etc) +/// +/// This is much faster than the regular bevy generation with very minor errors (~10e-6) or so. +fn generate_tangents(indices: &[usize], positions: &[Vec3], uvs: &[[f32; 2]], normals: &[Vec3]) -> Vec { + let _span = info_span!("Generate tangents").entered(); + + let mut tangents = vec![TangentSpace::default(); positions.len()]; + + // Iterate over each triangle + for i in (0..indices.len()).step_by(3) { + let i0 = indices[i]; + let i1 = indices[i + 1]; + let i2 = indices[i + 2]; + + let p0 = positions[i0]; + let p1 = positions[i1]; + let p2 = positions[i2]; + + let uv0 = uvs[i0]; + let uv1 = uvs[i1]; + let uv2 = uvs[i2]; + + // Calculate edges of the triangle + let delta_pos1 = p1 - p0; // p1 - p0 + let delta_pos2 = p2 - p0; // p2 - p0 + + // Calculate UV deltas + let delta_uv1 = Vec2::new(uv1[0] - uv0[0], uv1[1] - uv0[1]); + let delta_uv2 = Vec2::new(uv2[0] - uv0[0], uv2[1] - uv0[1]); + + // Calculate the tangent + let r = 1.0 / (delta_uv1.x * delta_uv2.y - delta_uv1.y * delta_uv2.x); + let tangent = (delta_pos1 * delta_uv2.y - delta_pos2 * delta_uv1.y) * r; + + // Convert to Vec4 and set w = 1.0 + let tangent_4d = Vec3::new(tangent.x, tangent.y, tangent.z); + + // Add to the tangent space of each vertex + tangents[i0].tangent += tangent_4d; + tangents[i1].tangent += tangent_4d; + tangents[i2].tangent += tangent_4d; + + // Increment count for averaging later + tangents[i0].count += 1; + tangents[i1].count += 1; + tangents[i2].count += 1; + } + + // Finalize tangents by averaging and orthogonalizing against normals + let mut final_tangents = vec![Vec4::ZERO; positions.len()]; + + for i in 0..positions.len() { + if tangents[i].count > 0 { + // Average the tangents + let averaged_tangent = (tangents[i].tangent / tangents[i].count as f32).normalize(); + + // Get the normal for this vertex + let normal = normals[i]; + + // Use Gram-Schmidt to ensure the tangent is orthogonal to the normal + let orthogonal_tangent = averaged_tangent - normal * (averaged_tangent.dot(normal)); + let final_tangent = orthogonal_tangent.normalize(); // Normalize the tangent after orthogonalization + + final_tangents[i] = Vec4::new(final_tangent.x, final_tangent.y, final_tangent.z, 1.0); + } + } + + final_tangents } diff --git a/src/noise.rs b/src/noise.rs index b2e35ef..8ebe119 100644 --- a/src/noise.rs +++ b/src/noise.rs @@ -1,9 +1,11 @@ use bevy_lookup_curve::LookupCurve; use ::noise::{NoiseFn, Simplex}; use bevy::{ - asset::{Assets, Handle}, math::Vec2, prelude::{ReflectDefault, ReflectResource, Resource}, reflect::Reflect + asset::{Assets, Handle}, math::{Vec2, Vec4}, prelude::{ReflectDefault, ReflectResource, Resource}, reflect::Reflect }; +use crate::{utils::index_to_x_z, TerrainSettings}; + /// Cache of Simplex noise instances & which seeds they map to. #[derive(Default, Resource)] pub struct NoiseCache { @@ -15,17 +17,30 @@ pub struct NoiseCache { } impl NoiseCache { pub fn get(&mut self, seed: u32) -> &Simplex { + let index = self.get_simplex_index(seed); + + &self.noises[index] + } + + /// SAFETY: This is fine as long as the noise has already been initialized (using for example [`NoiseCache::get_simplex_index`]) + #[inline] + pub(super) unsafe fn get_by_index(&self, index: usize) -> &Simplex { + self.noises.get_unchecked(index) + } + + #[inline] + pub fn get_simplex_index(&mut self, seed: u32) -> usize { if let Some(index) = self .seeds .iter() .position(|existing_seed| *existing_seed == seed) { - &self.noises[index] + index } else { self.seeds.push(seed); self.noises.push(Simplex::new(seed)); - self.noises.last().unwrap() + self.noises.len() - 1 } } } @@ -45,12 +60,45 @@ impl TerrainNoiseSplineLayer { /// /// `noise` is expected to be a Simplex noise initialized with this `TerrainNoiseBaseLayer`'s `seed`. /// It is not contained within the noise layer to keep the size of a layer smaller. + #[inline] pub fn sample(&self, x: f32, z: f32, noise: &Simplex, lookup_curves: &Assets) -> f32 { let noise_value = noise.get([(x * self.frequency) as f64, (z * self.frequency) as f64]) as f32; lookup_curves.get(&self.amplitude_curve).map_or(0.0, |curve| curve.lookup((noise_value / 2.0) + 0.5)) } + + #[inline] + pub fn sample_simd( + &self, + x: Vec4, + z: Vec4, + noise: &Simplex, + lookup_curves: &Assets, + ) -> Vec4 { + // Step 1: Get the noise values for all 4 positions (x, z) + let noise_values = Vec4::new( + noise.get([(x.x * self.frequency) as f64, (z.x * self.frequency) as f64]) as f32, + noise.get([(x.y * self.frequency) as f64, (z.y * self.frequency) as f64]) as f32, + noise.get([(x.z * self.frequency) as f64, (z.z * self.frequency) as f64]) as f32, + noise.get([(x.w * self.frequency) as f64, (z.w * self.frequency) as f64]) as f32, + ); + + // Step 2: Normalize noise values from [-1, 1] to [0, 1] + let normalized_noise = (noise_values / 2.0) + Vec4::splat(0.5); + + // Step 3: Fetch the lookup curve and apply it to all 4 noise values + if let Some(curve) = lookup_curves.get(&self.amplitude_curve) { + Vec4::new( + curve.lookup(normalized_noise.x), + curve.lookup(normalized_noise.y), + curve.lookup(normalized_noise.z), + curve.lookup(normalized_noise.w), + ) + } else { + Vec4::ZERO // Default to 0 if the curve isn't found + } + } } #[derive(Reflect, Clone)] @@ -72,10 +120,25 @@ impl TerrainNoiseDetailLayer { /// /// `noise` is expected to be a Simplex noise initialized with this `TerrainNoiseLayer`'s `seed`. /// It is not contained within the noise layer to keep the size of a layer smaller. + #[inline] pub fn sample(&self, x: f32, z: f32, noise: &Simplex) -> f32 { noise.get([(x * self.frequency) as f64, (z * self.frequency) as f64]) as f32 * self.amplitude } + + #[inline] + pub fn sample_simd(&self, x: Vec4, z: Vec4, noise: &Simplex) -> Vec4 { + // Step 1: Get the noise values for all 4 positions (x, z) + let noise_values = Vec4::new( + noise.get([(x.x * self.frequency) as f64, (z.x * self.frequency) as f64]) as f32, + noise.get([(x.y * self.frequency) as f64, (z.y * self.frequency) as f64]) as f32, + noise.get([(x.z * self.frequency) as f64, (z.z * self.frequency) as f64]) as f32, + noise.get([(x.w * self.frequency) as f64, (z.w * self.frequency) as f64]) as f32, + ); + + // Step 2: Multiply by the amplitude + noise_values * Vec4::splat(self.amplitude) + } } impl Default for TerrainNoiseDetailLayer { fn default() -> Self { @@ -100,14 +163,87 @@ impl TerrainNoiseSettings { /// /// Returns 0.0 if there are no noise layers. pub fn sample_position(&self, noise_cache: &mut NoiseCache, pos: Vec2, lookup_curves: &Assets) -> f32 { - let spline_height = self.splines.iter().fold(0.0, |acc, layer| { - acc + layer.sample(pos.x, pos.y, noise_cache.get(layer.seed), lookup_curves) + let spline_height = self.splines + .iter() + .fold(0.0, |acc, layer| { + acc + layer.sample(pos.x, pos.y, noise_cache.get(layer.seed), lookup_curves) }); - self.layers + let layer_height = self.layers .iter() - .fold(spline_height, |acc, layer: &TerrainNoiseDetailLayer| { + .fold(0.0, |acc, layer: &TerrainNoiseDetailLayer| { acc + layer.sample(pos.x, pos.y, noise_cache.get(layer.seed)) - }) + }); + + spline_height + layer_height } } + +pub(super) fn apply_noise_simd(heights: &mut [f32], terrain_settings: &TerrainSettings, terrain_translation: Vec2, scale: f32, noise_cache: &NoiseCache, noise_spline_index_cache: &[u32], noise_detail_index_cache: &[u32], lookup_curves: &Assets, terrain_noise_layers: &TerrainNoiseSettings) { + let edge_points = terrain_settings.edge_points as usize; + let length = heights.len(); + let simd_len = length / 4 * 4; // Length rounded down to the nearest multiple of 4 + + // Process in chunks of 4 + for i in (0..simd_len).step_by(4) { + // Unpack four (x, z) pairs in parallel + let (x1, z1) = index_to_x_z(i, edge_points); + let (x2, z2) = index_to_x_z(i + 1, edge_points); + let (x3, z3) = index_to_x_z(i + 2, edge_points); + let (x4, z4) = index_to_x_z(i + 3, edge_points); + + // Create SIMD vectors for x and z positions + let x_positions = Vec4::new(x1 as f32 * scale, x2 as f32 * scale, x3 as f32 * scale, x4 as f32 * scale); + let z_positions = Vec4::new(z1 as f32 * scale, z2 as f32 * scale, z3 as f32 * scale, z4 as f32 * scale); + + // Add terrain translation to the positions + let x_translated = x_positions + Vec4::splat(terrain_translation.x); + let z_translated = z_positions + Vec4::splat(terrain_translation.y); + + // Accumulate spline and layer heights for all 4 points in parallel + let mut spline_heights = Vec4::ZERO; + let mut layer_heights = Vec4::ZERO; + + unsafe { + // Process all spline layers + for (j, layer) in terrain_noise_layers.splines.iter().enumerate() { + let noise = noise_cache.get_by_index(noise_spline_index_cache[j] as usize); + let spline_values = layer.sample_simd(x_translated, z_translated, noise, lookup_curves); + spline_heights += spline_values; + } + + // Process all detail layers + for (j, layer) in terrain_noise_layers.layers.iter().enumerate() { + let noise = noise_cache.get_by_index(noise_detail_index_cache[j] as usize); + let layer_values = layer.sample_simd(x_translated, z_translated, noise); + layer_heights += layer_values; + } + + // Store the results back into the heights array + heights[i] = spline_heights.x + layer_heights.x; + heights[i + 1] = spline_heights.y + layer_heights.y; + heights[i + 2] = spline_heights.z + layer_heights.z; + heights[i + 3] = spline_heights.w + layer_heights.w; + } + } + + // Process any remaining heights that aren't divisible by 4 + for i in simd_len..length { + let (x, z) = index_to_x_z(i, edge_points); + let vertex_position = terrain_translation + Vec2::new(x as f32 * scale, z as f32 * scale); + + unsafe { + let mut spline_height = 0.0; + for (j, layer) in terrain_noise_layers.splines.iter().enumerate() { + spline_height += layer.sample(vertex_position.x, vertex_position.y, noise_cache.get_by_index(noise_spline_index_cache[j] as usize), lookup_curves); + } + + let mut layer_height = 0.0; + for (j, layer) in terrain_noise_layers.layers.iter().enumerate() { + layer_height += layer.sample(vertex_position.x, vertex_position.y, noise_cache.get_by_index(noise_detail_index_cache[j] as usize)); + } + + heights[i] = spline_height + layer_height; + } + } +} \ No newline at end of file