From f9eb6600e312baf705213544ba0b871131f0c1c1 Mon Sep 17 00:00:00 2001 From: Clement Rey Date: Tue, 12 Nov 2024 19:13:57 +0100 Subject: [PATCH 1/2] `EntityTree`: only check for entity deletions when necessary (#8103) * Fixes https://github.com/rerun-io/rerun/issues/8101 Before: ![image](https://github.com/user-attachments/assets/339211da-e82d-4646-9946-784f24cea88f) After: ![image](https://github.com/user-attachments/assets/4a89eae9-6692-4415-a8c9-c336b6afe558) --- crates/store/re_entity_db/src/entity_db.rs | 21 +++- crates/store/re_entity_db/src/entity_tree.rs | 109 ++++++++++++++++++- 2 files changed, 121 insertions(+), 9 deletions(-) diff --git a/crates/store/re_entity_db/src/entity_db.rs b/crates/store/re_entity_db/src/entity_db.rs index 8f76c26535c2..e9132f9b2b8a 100644 --- a/crates/store/re_entity_db/src/entity_db.rs +++ b/crates/store/re_entity_db/src/entity_db.rs @@ -5,8 +5,8 @@ use parking_lot::Mutex; use re_chunk::{Chunk, ChunkResult, RowId, TimeInt}; use re_chunk_store::{ - ChunkStore, ChunkStoreChunkStats, ChunkStoreConfig, ChunkStoreEvent, ChunkStoreHandle, - ChunkStoreSubscriber, GarbageCollectionOptions, GarbageCollectionTarget, + ChunkStore, ChunkStoreChunkStats, ChunkStoreConfig, ChunkStoreDiffKind, ChunkStoreEvent, + ChunkStoreHandle, ChunkStoreSubscriber, GarbageCollectionOptions, GarbageCollectionTarget, }; use re_log_types::{ ApplicationId, EntityPath, EntityPathHash, LogMsg, ResolvedTimeRange, ResolvedTimeRangeF, @@ -385,7 +385,13 @@ impl EntityDb { // It is possible for writes to trigger deletions: specifically in the case of // overwritten static data leading to dangling chunks. - self.tree.on_store_deletions(&engine, &store_events); + let entity_paths_with_deletions = store_events + .iter() + .filter(|event| event.kind == ChunkStoreDiffKind::Deletion) + .map(|event| event.chunk.entity_path().clone()) + .collect(); + self.tree + .on_store_deletions(&engine, &entity_paths_with_deletions, &store_events); // We inform the stats last, since it measures e2e latency. self.stats.on_events(&store_events); @@ -444,7 +450,7 @@ impl EntityDb { store_events } - fn gc(&mut self, gc_options: &GarbageCollectionOptions) -> Vec { + pub(crate) fn gc(&mut self, gc_options: &GarbageCollectionOptions) -> Vec { re_tracing::profile_function!(); let mut engine = self.storage_engine.write(); @@ -544,7 +550,12 @@ impl EntityDb { time_histogram_per_timeline.on_events(store_events); let engine = engine.downgrade(); - tree.on_store_deletions(&engine, store_events); + let entity_paths_with_deletions = store_events + .iter() + .filter(|event| event.kind == ChunkStoreDiffKind::Deletion) + .map(|event| event.chunk.entity_path().clone()) + .collect(); + tree.on_store_deletions(&engine, &entity_paths_with_deletions, store_events); } /// Key used for sorting recordings in the UI. diff --git a/crates/store/re_entity_db/src/entity_tree.rs b/crates/store/re_entity_db/src/entity_tree.rs index 169d9a42780a..54159429e1ce 100644 --- a/crates/store/re_entity_db/src/entity_tree.rs +++ b/crates/store/re_entity_db/src/entity_tree.rs @@ -1,7 +1,7 @@ use std::collections::BTreeMap; use ahash::HashSet; -use nohash_hasher::IntMap; +use nohash_hasher::{IntMap, IntSet}; use re_chunk::RowId; use re_chunk_store::{ChunkStoreDiffKind, ChunkStoreEvent, ChunkStoreSubscriber}; @@ -14,6 +14,7 @@ use re_types_core::ComponentName; /// A recursive, manually updated [`ChunkStoreSubscriber`] that maintains the entity hierarchy. /// /// The tree contains a list of subtrees, and so on recursively. +#[derive(Debug)] pub struct EntityTree { /// Full path prefix to the root of this (sub)tree. pub path: EntityPath, @@ -123,7 +124,12 @@ impl EntityTree { } /// Returns `true` if this entity has no children and no data. - pub fn is_empty(&self, engine: &StorageEngineReadGuard<'_>) -> bool { + /// + /// Checking for the absence of data is neither costly nor totally free: do it a few hundreds or + /// thousands times a frame and it will absolutely kill framerate. + /// Don't blindly call this on every existing entity every frame: use [`ChunkStoreEvent`]s to make + /// sure anything changed at all first. + pub fn check_is_empty(&self, engine: &StorageEngineReadGuard<'_>) -> bool { self.children.is_empty() && !engine.store().entity_has_data(&self.path) } @@ -160,6 +166,7 @@ impl EntityTree { pub fn on_store_deletions( &mut self, engine: &StorageEngineReadGuard<'_>, + entity_paths_with_deletions: &IntSet, events: &[ChunkStoreEvent], ) { re_tracing::profile_function!(); @@ -171,9 +178,22 @@ impl EntityTree { self.children.retain(|_, entity| { // this is placed first, because we'll only know if the child entity is empty after telling it to clear itself. - entity.on_store_deletions(engine, events); + entity.on_store_deletions(engine, entity_paths_with_deletions, events); + + let has_children = || !entity.children.is_empty(); + // Checking for lack of data is not free, so make sure there is any reason to believe + // that any relevant data has changed first. + let has_recursive_deletion_events = || { + entity_paths_with_deletions + .iter() + .any(|removed_entity_path| removed_entity_path.starts_with(&entity.path)) + }; + let has_data = || engine.store().entity_has_data(&entity.path); + + let should_be_removed = + !has_children() && (has_recursive_deletion_events() && !has_data()); - !entity.is_empty(engine) + !should_be_removed }); } @@ -237,3 +257,84 @@ impl EntityTree { visit(self, &mut predicate) } } + +#[cfg(test)] +mod tests { + use std::sync::Arc; + + use re_chunk::{Chunk, RowId}; + use re_log_types::{example_components::MyPoint, EntityPath, StoreId, TimePoint, Timeline}; + + use crate::EntityDb; + + #[test] + fn deleting_descendants() -> anyhow::Result<()> { + re_log::setup_logging(); + + let mut db = EntityDb::new(StoreId::random(re_log_types::StoreKind::Recording)); + + let timeline_frame = Timeline::new_sequence("frame"); + + let entity_path_parent: EntityPath = "parent".into(); + let entity_path_child: EntityPath = "parent/child1".into(); + let entity_path_grandchild: EntityPath = "parent/child1/grandchild".into(); + + assert!(db.tree().check_is_empty(&db.storage_engine())); + + { + let row_id = RowId::new(); + let timepoint = TimePoint::from_iter([(timeline_frame, 10)]); + let point = MyPoint::new(1.0, 2.0); + let chunk = Chunk::builder(entity_path_grandchild.clone()) + .with_component_batches(row_id, timepoint, [&[point] as _]) + .build()?; + + db.add_chunk(&Arc::new(chunk))?; + } + + { + let parent = db + .tree() + .find_first_child_recursive(|entity_path| *entity_path == entity_path_parent) + .unwrap(); + let child = db + .tree() + .find_first_child_recursive(|entity_path| *entity_path == entity_path_child) + .unwrap(); + let grandchild = db + .tree() + .find_first_child_recursive(|entity_path| *entity_path == entity_path_grandchild) + .unwrap(); + + assert_eq!(1, parent.children.len()); + assert_eq!(1, child.children.len()); + assert_eq!(0, grandchild.children.len()); + + assert!(!db.tree().check_is_empty(&db.storage_engine())); + assert!(!parent.check_is_empty(&db.storage_engine())); + assert!(!child.check_is_empty(&db.storage_engine())); + assert!(!grandchild.check_is_empty(&db.storage_engine())); + } + + let _store_events = db.gc(&re_chunk_store::GarbageCollectionOptions::gc_everything()); + + { + let parent = db + .tree() + .find_first_child_recursive(|entity_path| *entity_path == entity_path_parent); + let child = db + .tree() + .find_first_child_recursive(|entity_path| *entity_path == entity_path_child); + let grandchild = db + .tree() + .find_first_child_recursive(|entity_path| *entity_path == entity_path_grandchild); + + assert!(db.tree().check_is_empty(&db.storage_engine())); + assert!(parent.is_none()); + assert!(child.is_none()); + assert!(grandchild.is_none()); + } + + Ok(()) + } +} From 81ccf443962fafb0a9307e09fddc9c5a20b39be2 Mon Sep 17 00:00:00 2001 From: Samvel Petrosyan Date: Wed, 13 Nov 2024 11:58:37 +0400 Subject: [PATCH 2/2] Fix typo in "custom data" documentation (#8105) --- docs/content/howto/logging/custom-data.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/content/howto/logging/custom-data.md b/docs/content/howto/logging/custom-data.md index ee51ce1092db..8200fc88f701 100644 --- a/docs/content/howto/logging/custom-data.md +++ b/docs/content/howto/logging/custom-data.md @@ -35,7 +35,7 @@ import rerun as rr # pip install rerun-sdk @dataclass class LabeledPoints: points: np.ndarray - labels: List[str]) + labels: List[str] def as_component_batches(self) -> Iterable[rr.ComponentBatch]: return rr.Points3D(positions=self.points,