From 286bc8cce52add44e6f6f9c8cd778d26eaa1a761 Mon Sep 17 00:00:00 2001 From: James Liu Date: Sat, 30 Mar 2024 03:15:55 -0700 Subject: [PATCH] Store only the IDs needed for Query iteration (#12476) # Objective Other than the exposed functions for reading matched tables and archetypes, a `QueryState` does not actually need both internal Vecs for storing matched archetypes and tables. In practice, it will only use one of the two depending on if it uses dense or archetypal iteration. Same vein as #12474. The goal is to reduce the memory overhead of using queries, which Bevy itself, ecosystem plugins, and end users are already fairly liberally using. ## Solution Add `StorageId`, which is a union over `TableId` and `ArchetypeId`, and store only one of the two at runtime. Read the slice as if it was one ID depending on whether the query is dense or not. This follows in the same vein as #5085; however, this one directly impacts heap memory usage at runtime, while #5085 primarily targeted transient pointers that might not actually exist at runtime. --- ## Changelog Changed: `QueryState::matched_tables` now returns an iterator instead of a reference to a slice. Changed: `QueryState::matched_archetypes` now returns an iterator instead of a reference to a slice. ## Migration Guide `QueryState::matched_tables` and `QueryState::matched_archetypes` does not return a reference to a slice, but an iterator instead. You may need to use iterator combinators or collect them into a Vec to use it as a slice. --------- Co-authored-by: Alice Cecile --- crates/bevy_ecs/src/query/iter.rs | 47 +++++---- crates/bevy_ecs/src/query/par_iter.rs | 18 ++-- crates/bevy_ecs/src/query/state.rs | 131 ++++++++++++++++---------- 3 files changed, 109 insertions(+), 87 deletions(-) diff --git a/crates/bevy_ecs/src/query/iter.rs b/crates/bevy_ecs/src/query/iter.rs index 35346be6fa8b5..2cfcf37031c1d 100644 --- a/crates/bevy_ecs/src/query/iter.rs +++ b/crates/bevy_ecs/src/query/iter.rs @@ -1,9 +1,9 @@ use crate::{ - archetype::{Archetype, ArchetypeEntity, ArchetypeId, Archetypes}, + archetype::{Archetype, ArchetypeEntity, Archetypes}, component::Tick, entity::{Entities, Entity}, - query::{ArchetypeFilter, DebugCheckedUnwrap, QueryState}, - storage::{Table, TableId, TableRow, Tables}, + query::{ArchetypeFilter, DebugCheckedUnwrap, QueryState, StorageId}, + storage::{Table, TableRow, Tables}, world::unsafe_world_cell::UnsafeWorldCell, }; use std::{borrow::Borrow, iter::FusedIterator, mem::MaybeUninit, ops::Range}; @@ -239,22 +239,20 @@ impl<'w, 's, D: QueryData, F: QueryFilter> Iterator for QueryIter<'w, 's, D, F> let Some(item) = self.next() else { break }; accum = func(accum, item); } - if D::IS_DENSE && F::IS_DENSE { - for table_id in self.cursor.table_id_iter.clone() { + for id in self.cursor.storage_id_iter.clone() { + if D::IS_DENSE && F::IS_DENSE { // SAFETY: Matched table IDs are guaranteed to still exist. - let table = unsafe { self.tables.get(*table_id).debug_checked_unwrap() }; + let table = unsafe { self.tables.get(id.table_id).debug_checked_unwrap() }; accum = // SAFETY: // - The fetched table matches both D and F // - The provided range is equivalent to [0, table.entity_count) // - The if block ensures that D::IS_DENSE and F::IS_DENSE are both true unsafe { self.fold_over_table_range(accum, &mut func, table, 0..table.entity_count()) }; - } - } else { - for archetype_id in self.cursor.archetype_id_iter.clone() { + } else { let archetype = // SAFETY: Matched archetype IDs are guaranteed to still exist. - unsafe { self.archetypes.get(*archetype_id).debug_checked_unwrap() }; + unsafe { self.archetypes.get(id.archetype_id).debug_checked_unwrap() }; accum = // SAFETY: // - The fetched archetype matches both D and F @@ -650,8 +648,7 @@ impl<'w, 's, D: ReadOnlyQueryData, F: QueryFilter, const K: usize> FusedIterator } struct QueryIterationCursor<'w, 's, D: QueryData, F: QueryFilter> { - table_id_iter: std::slice::Iter<'s, TableId>, - archetype_id_iter: std::slice::Iter<'s, ArchetypeId>, + storage_id_iter: std::slice::Iter<'s, StorageId>, table_entities: &'w [Entity], archetype_entities: &'w [ArchetypeEntity], fetch: D::Fetch<'w>, @@ -665,8 +662,7 @@ struct QueryIterationCursor<'w, 's, D: QueryData, F: QueryFilter> { impl Clone for QueryIterationCursor<'_, '_, D, F> { fn clone(&self) -> Self { Self { - table_id_iter: self.table_id_iter.clone(), - archetype_id_iter: self.archetype_id_iter.clone(), + storage_id_iter: self.storage_id_iter.clone(), table_entities: self.table_entities, archetype_entities: self.archetype_entities, fetch: self.fetch.clone(), @@ -687,8 +683,7 @@ impl<'w, 's, D: QueryData, F: QueryFilter> QueryIterationCursor<'w, 's, D, F> { this_run: Tick, ) -> Self { QueryIterationCursor { - table_id_iter: [].iter(), - archetype_id_iter: [].iter(), + storage_id_iter: [].iter(), ..Self::init(world, query_state, last_run, this_run) } } @@ -709,8 +704,7 @@ impl<'w, 's, D: QueryData, F: QueryFilter> QueryIterationCursor<'w, 's, D, F> { filter, table_entities: &[], archetype_entities: &[], - table_id_iter: query_state.matched_table_ids.iter(), - archetype_id_iter: query_state.matched_archetype_ids.iter(), + storage_id_iter: query_state.matched_storage_ids.iter(), current_len: 0, current_row: 0, } @@ -746,12 +740,13 @@ impl<'w, 's, D: QueryData, F: QueryFilter> QueryIterationCursor<'w, 's, D, F> { /// Note that if `F::IS_ARCHETYPAL`, the return value /// will be **the exact count of remaining values**. fn max_remaining(&self, tables: &'w Tables, archetypes: &'w Archetypes) -> usize { + let ids = self.storage_id_iter.clone(); let remaining_matched: usize = if Self::IS_DENSE { - let ids = self.table_id_iter.clone(); - ids.map(|id| tables[*id].entity_count()).sum() + // SAFETY: The if check ensures that storage_id_iter stores TableIds + unsafe { ids.map(|id| tables[id.table_id].entity_count()).sum() } } else { - let ids = self.archetype_id_iter.clone(); - ids.map(|id| archetypes[*id].len()).sum() + // SAFETY: The if check ensures that storage_id_iter stores ArchetypeIds + unsafe { ids.map(|id| archetypes[id.archetype_id].len()).sum() } }; remaining_matched + self.current_len - self.current_row } @@ -773,8 +768,8 @@ impl<'w, 's, D: QueryData, F: QueryFilter> QueryIterationCursor<'w, 's, D, F> { loop { // we are on the beginning of the query, or finished processing a table, so skip to the next if self.current_row == self.current_len { - let table_id = self.table_id_iter.next()?; - let table = tables.get(*table_id).debug_checked_unwrap(); + let table_id = self.storage_id_iter.next()?.table_id; + let table = tables.get(table_id).debug_checked_unwrap(); // SAFETY: `table` is from the world that `fetch/filter` were created for, // `fetch_state`/`filter_state` are the states that `fetch/filter` were initialized with unsafe { @@ -809,8 +804,8 @@ impl<'w, 's, D: QueryData, F: QueryFilter> QueryIterationCursor<'w, 's, D, F> { } else { loop { if self.current_row == self.current_len { - let archetype_id = self.archetype_id_iter.next()?; - let archetype = archetypes.get(*archetype_id).debug_checked_unwrap(); + let archetype_id = self.storage_id_iter.next()?.archetype_id; + let archetype = archetypes.get(archetype_id).debug_checked_unwrap(); let table = tables.get(archetype.table_id()).debug_checked_unwrap(); // SAFETY: `archetype` and `tables` are from the world that `fetch/filter` were created for, // `fetch_state`/`filter_state` are the states that `fetch/filter` were initialized with diff --git a/crates/bevy_ecs/src/query/par_iter.rs b/crates/bevy_ecs/src/query/par_iter.rs index 0a7ec641d31e4..1d18b69ca0469 100644 --- a/crates/bevy_ecs/src/query/par_iter.rs +++ b/crates/bevy_ecs/src/query/par_iter.rs @@ -160,24 +160,22 @@ impl<'w, 's, D: QueryData, F: QueryFilter> QueryParIter<'w, 's, D, F> { thread_count > 0, "Attempted to run parallel iteration over a query with an empty TaskPool" ); + let id_iter = self.state.matched_storage_ids.iter(); let max_size = if D::IS_DENSE && F::IS_DENSE { // SAFETY: We only access table metadata. let tables = unsafe { &self.world.world_metadata().storages().tables }; - self.state - .matched_table_ids - .iter() - .map(|id| tables[*id].entity_count()) + id_iter + // SAFETY: The if check ensures that matched_storage_ids stores TableIds + .map(|id| unsafe { tables[id.table_id].entity_count() }) .max() - .unwrap_or(0) } else { let archetypes = &self.world.archetypes(); - self.state - .matched_archetype_ids - .iter() - .map(|id| archetypes[*id].len()) + id_iter + // SAFETY: The if check ensures that matched_storage_ids stores ArchetypeIds + .map(|id| unsafe { archetypes[id.archetype_id].len() }) .max() - .unwrap_or(0) }; + let max_size = max_size.unwrap_or(0); let batches = thread_count * self.batching_strategy.batches_per_thread; // Round up to the nearest batch size. diff --git a/crates/bevy_ecs/src/query/state.rs b/crates/bevy_ecs/src/query/state.rs index 23c3b34914131..5731fcb77b397 100644 --- a/crates/bevy_ecs/src/query/state.rs +++ b/crates/bevy_ecs/src/query/state.rs @@ -21,6 +21,25 @@ use super::{ QuerySingleError, ROQueryItem, }; +/// An ID for either a table or an archetype. Used for Query iteration. +/// +/// Query iteration is exclusively dense (over tables) or archetypal (over archetypes) based on whether +/// both `D::IS_DENSE` and `F::IS_DENSE` are true or not. +/// +/// This is a union instead of an enum as the usage is determined at compile time, as all [`StorageId`]s for +/// a [`QueryState`] will be all [`TableId`]s or all [`ArchetypeId`]s, and not a mixture of both. This +/// removes the need for discriminator to minimize memory usage and branching during iteration, but requires +/// a safety invariant be verified when disambiguating them. +/// +/// # Safety +/// Must be initialized and accessed as a [`TableId`], if both generic parameters to the query are dense. +/// Must be initialized and accessed as an [`ArchetypeId`] otherwise. +#[derive(Clone, Copy)] +pub(super) union StorageId { + pub(super) table_id: TableId, + pub(super) archetype_id: ArchetypeId, +} + /// Provides scoped access to a [`World`] state according to a given [`QueryData`] and [`QueryFilter`]. /// /// This data is cached between system runs, and is used to: @@ -47,10 +66,8 @@ pub struct QueryState { /// [`FilteredAccess`] computed by combining the `D` and `F` access. Used to check which other queries /// this query can run in parallel with. pub(crate) component_access: FilteredAccess, - // NOTE: we maintain both a TableId bitset and a vec because iterating the vec is faster - pub(crate) matched_table_ids: Vec, - // NOTE: we maintain both a ArchetypeId bitset and a vec because iterating the vec is faster - pub(crate) matched_archetype_ids: Vec, + // NOTE: we maintain both a bitset and a vec because iterating the vec is faster + pub(super) matched_storage_ids: Vec, pub(crate) fetch_state: D::State, pub(crate) filter_state: F::State, #[cfg(feature = "trace")] @@ -61,8 +78,11 @@ impl fmt::Debug for QueryState { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { f.debug_struct("QueryState") .field("world_id", &self.world_id) - .field("matched_table_count", &self.matched_table_ids.len()) - .field("matched_archetype_count", &self.matched_archetype_ids.len()) + .field("matched_table_count", &self.matched_tables.count_ones(..)) + .field( + "matched_archetype_count", + &self.matched_archetypes.count_ones(..), + ) .finish_non_exhaustive() } } @@ -116,13 +136,13 @@ impl QueryState { } /// Returns the tables matched by this query. - pub fn matched_tables(&self) -> &[TableId] { - &self.matched_table_ids + pub fn matched_tables(&self) -> impl Iterator + '_ { + self.matched_tables.ones().map(TableId::from_usize) } /// Returns the archetypes matched by this query. - pub fn matched_archetypes(&self) -> &[ArchetypeId] { - &self.matched_archetype_ids + pub fn matched_archetypes(&self) -> impl Iterator + '_ { + self.matched_archetypes.ones().map(ArchetypeId::new) } } @@ -173,8 +193,7 @@ impl QueryState { Self { world_id: world.id(), archetype_generation: ArchetypeGeneration::initial(), - matched_table_ids: Vec::new(), - matched_archetype_ids: Vec::new(), + matched_storage_ids: Vec::new(), fetch_state, filter_state, component_access, @@ -198,8 +217,7 @@ impl QueryState { let mut state = Self { world_id: builder.world().id(), archetype_generation: ArchetypeGeneration::initial(), - matched_table_ids: Vec::new(), - matched_archetype_ids: Vec::new(), + matched_storage_ids: Vec::new(), fetch_state, filter_state, component_access: builder.access().clone(), @@ -376,12 +394,20 @@ impl QueryState { let archetype_index = archetype.id().index(); if !self.matched_archetypes.contains(archetype_index) { self.matched_archetypes.grow_and_insert(archetype_index); - self.matched_archetype_ids.push(archetype.id()); + if !D::IS_DENSE || !F::IS_DENSE { + self.matched_storage_ids.push(StorageId { + archetype_id: archetype.id(), + }); + } } let table_index = archetype.table_id().as_usize(); if !self.matched_tables.contains(table_index) { self.matched_tables.grow_and_insert(table_index); - self.matched_table_ids.push(archetype.table_id()); + if D::IS_DENSE && F::IS_DENSE { + self.matched_storage_ids.push(StorageId { + table_id: archetype.table_id(), + }); + } } true } else { @@ -462,8 +488,7 @@ impl QueryState { QueryState { world_id: self.world_id, archetype_generation: self.archetype_generation, - matched_table_ids: self.matched_table_ids.clone(), - matched_archetype_ids: self.matched_archetype_ids.clone(), + matched_storage_ids: self.matched_storage_ids.clone(), fetch_state, filter_state, component_access: self.component_access.clone(), @@ -553,24 +578,30 @@ impl QueryState { } // take the intersection of the matched ids - let matched_tables: FixedBitSet = self - .matched_tables - .intersection(&other.matched_tables) - .collect(); - let matched_table_ids: Vec = - matched_tables.ones().map(TableId::from_usize).collect(); - let matched_archetypes: FixedBitSet = self - .matched_archetypes - .intersection(&other.matched_archetypes) - .collect(); - let matched_archetype_ids: Vec = - matched_archetypes.ones().map(ArchetypeId::new).collect(); + let mut matched_tables = self.matched_tables.clone(); + let mut matched_archetypes = self.matched_archetypes.clone(); + matched_tables.intersect_with(&other.matched_tables); + matched_archetypes.intersect_with(&other.matched_archetypes); + let matched_storage_ids = if NewD::IS_DENSE && NewF::IS_DENSE { + matched_tables + .ones() + .map(|id| StorageId { + table_id: TableId::from_usize(id), + }) + .collect() + } else { + matched_archetypes + .ones() + .map(|id| StorageId { + archetype_id: ArchetypeId::new(id), + }) + .collect() + }; QueryState { world_id: self.world_id, archetype_generation: self.archetype_generation, - matched_table_ids, - matched_archetype_ids, + matched_storage_ids, fetch_state: new_fetch_state, filter_state: new_filter_state, component_access: joined_component_access, @@ -1356,12 +1387,15 @@ impl QueryState { ) { // NOTE: If you are changing query iteration code, remember to update the following places, where relevant: // QueryIter, QueryIterationCursor, QueryManyIter, QueryCombinationIter, QueryState::for_each_unchecked_manual, QueryState::par_for_each_unchecked_manual + bevy_tasks::ComputeTaskPool::get().scope(|scope| { - if D::IS_DENSE && F::IS_DENSE { - // SAFETY: We only access table data that has been registered in `self.archetype_component_access`. - let tables = unsafe { &world.storages().tables }; - for table_id in &self.matched_table_ids { - let table = &tables[*table_id]; + // SAFETY: We only access table data that has been registered in `self.archetype_component_access`. + let tables = unsafe { &world.storages().tables }; + let archetypes = world.archetypes(); + for storage_id in &self.matched_storage_ids { + if D::IS_DENSE && F::IS_DENSE { + let table_id = storage_id.table_id; + let table = &tables[table_id]; if table.is_empty() { continue; } @@ -1370,39 +1404,34 @@ impl QueryState { while offset < table.entity_count() { let mut func = func.clone(); let len = batch_size.min(table.entity_count() - offset); + let batch = offset..offset + len; scope.spawn(async move { #[cfg(feature = "trace")] let _span = self.par_iter_span.enter(); - let table = &world - .storages() - .tables - .get(*table_id) - .debug_checked_unwrap(); - let batch = offset..offset + len; + let table = + &world.storages().tables.get(table_id).debug_checked_unwrap(); self.iter_unchecked_manual(world, last_run, this_run) .for_each_in_table_range(&mut func, table, batch); }); offset += batch_size; } - } - } else { - let archetypes = world.archetypes(); - for archetype_id in &self.matched_archetype_ids { - let mut offset = 0; - let archetype = &archetypes[*archetype_id]; + } else { + let archetype_id = storage_id.archetype_id; + let archetype = &archetypes[archetype_id]; if archetype.is_empty() { continue; } + let mut offset = 0; while offset < archetype.len() { let mut func = func.clone(); let len = batch_size.min(archetype.len() - offset); + let batch = offset..offset + len; scope.spawn(async move { #[cfg(feature = "trace")] let _span = self.par_iter_span.enter(); let archetype = - world.archetypes().get(*archetype_id).debug_checked_unwrap(); - let batch = offset..offset + len; + world.archetypes().get(archetype_id).debug_checked_unwrap(); self.iter_unchecked_manual(world, last_run, this_run) .for_each_in_archetype_range(&mut func, archetype, batch); });