From 87902e62a80cc9d5ead85d9a697bd444d8e87297 Mon Sep 17 00:00:00 2001 From: Clement Rey Date: Mon, 6 Jan 2025 10:01:57 +0100 Subject: [PATCH] Remove all legacy Chunk iteration APIs (#8556) * DNM: requires #8555 --- Cargo.lock | 4 - crates/store/re_chunk/src/iter.rs | 344 ------------------ crates/viewer/re_view/Cargo.toml | 2 - crates/viewer/re_view/src/results_ext.rs | 89 ----- .../re_view_graph/src/visualizers/edges.rs | 2 +- crates/viewer/re_view_spatial/Cargo.toml | 2 - .../visualizers/utilities/entity_iterator.rs | 95 +---- 7 files changed, 4 insertions(+), 534 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index b801adefce9d..821ba67e5e1b 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -6580,12 +6580,10 @@ version = "0.22.0-alpha.1+dev" dependencies = [ "ahash", "arrow", - "bytemuck", "egui", "glam", "itertools 0.13.0", "nohash-hasher", - "re_arrow2", "re_chunk_store", "re_entity_db", "re_log", @@ -6697,7 +6695,6 @@ version = "0.22.0-alpha.1+dev" dependencies = [ "ahash", "anyhow", - "arrow", "bitflags 2.6.0", "bytemuck", "criterion", @@ -6710,7 +6707,6 @@ dependencies = [ "nohash-hasher", "once_cell", "ordered-float", - "re_arrow2", "re_chunk_store", "re_data_ui", "re_entity_db", diff --git a/crates/store/re_chunk/src/iter.rs b/crates/store/re_chunk/src/iter.rs index e4dc3ff857ce..fba566278edb 100644 --- a/crates/store/re_chunk/src/iter.rs +++ b/crates/store/re_chunk/src/iter.rs @@ -201,350 +201,6 @@ impl Chunk { } } - /// Returns an iterator over the raw primitive values of a [`Chunk`], for a given component. - /// - /// This is a very fast path: the entire column will be downcasted at once, and then every - /// component batch will be a slice reference into that global slice. - /// Use this when working with simple arrow datatypes and performance matters (e.g. scalars, - /// points, etc). - /// - /// See also: - /// * [`Self::iter_primitive_array`] - /// * [`Self::iter_primitive_array_list`] - /// * [`Self::iter_string`] - /// * [`Self::iter_buffer`]. - /// * [`Self::iter_component`]. - #[inline] - pub fn iter_primitive( - &self, - component_name: &ComponentName, - ) -> impl Iterator + '_ { - let Some(list_array) = self.get_first_component(component_name) else { - return Either::Left(std::iter::empty()); - }; - - let Some(values) = list_array - .values() - .as_any() - .downcast_ref::>() - else { - if cfg!(debug_assertions) { - panic!("downcast failed for {component_name}, data discarded"); - } else { - re_log::error_once!("downcast failed for {component_name}, data discarded"); - } - return Either::Left(std::iter::empty()); - }; - let values = values.values().as_slice(); - - // NOTE: No need for validity checks here, `iter_offsets` already takes care of that. - Either::Right( - self.iter_component_offsets(component_name) - .map(move |(idx, len)| &values[idx..idx + len]), - ) - } - - /// Returns an iterator over the raw boolean values of a [`Chunk`], for a given component. - /// - /// This is a very fast path: the entire column will be downcasted at once, and then every - /// component batch will be a slice reference into that global slice. - /// Use this when working with simple arrow datatypes and performance matters. - /// - /// See also: - /// * [`Self::iter_primitive_array`] - /// * [`Self::iter_primitive_array_list`] - /// * [`Self::iter_string`] - /// * [`Self::iter_buffer`]. - /// * [`Self::iter_component`]. - #[inline] - pub fn iter_bool( - &self, - component_name: &ComponentName, - ) -> impl Iterator + '_ { - let Some(list_array) = self.get_first_component(component_name) else { - return Either::Left(std::iter::empty()); - }; - - let Some(values) = list_array - .values() - .as_any() - .downcast_ref::() - else { - if cfg!(debug_assertions) { - panic!("downcast failed for {component_name}, data discarded"); - } else { - re_log::error_once!("downcast failed for {component_name}, data discarded"); - } - return Either::Left(std::iter::empty()); - }; - let values = values.values().clone(); - - // NOTE: No need for validity checks here, `iter_offsets` already takes care of that. - Either::Right( - self.iter_component_offsets(component_name) - .map(move |(idx, len)| values.clone().sliced(idx, len)), - ) - } - - /// Returns an iterator over the raw primitive arrays of a [`Chunk`], for a given component. - /// - /// This is a very fast path: the entire column will be downcasted at once, and then every - /// component batch will be a slice reference into that global slice. - /// Use this when working with simple arrow datatypes and performance matters (e.g. scalars, - /// points, etc). - /// - /// See also: - /// * [`Self::iter_primitive`] - /// * [`Self::iter_string`] - /// * [`Self::iter_buffer`]. - /// * [`Self::iter_component`]. - pub fn iter_primitive_array( - &self, - component_name: &ComponentName, - ) -> impl Iterator + '_ - where - [T; N]: bytemuck::Pod, - { - let Some(list_array) = self.get_first_component(component_name) else { - return Either::Left(std::iter::empty()); - }; - - let Some(fixed_size_list_array) = list_array - .values() - .as_any() - .downcast_ref::() - else { - if cfg!(debug_assertions) { - panic!("downcast failed for {component_name}, data discarded"); - } else { - re_log::error_once!("downcast failed for {component_name}, data discarded"); - } - return Either::Left(std::iter::empty()); - }; - - let Some(values) = fixed_size_list_array - .values() - .as_any() - .downcast_ref::>() - else { - if cfg!(debug_assertions) { - panic!("downcast failed for {component_name}, data discarded"); - } else { - re_log::error_once!("downcast failed for {component_name}, data discarded"); - } - return Either::Left(std::iter::empty()); - }; - - let size = fixed_size_list_array.size(); - let values = values.values().as_slice(); - - // NOTE: No need for validity checks here, `iter_offsets` already takes care of that. - Either::Right( - self.iter_component_offsets(component_name) - .map(move |(idx, len)| { - bytemuck::cast_slice(&values[idx * size..idx * size + len * size]) - }), - ) - } - - /// Returns an iterator over the raw list of primitive arrays of a [`Chunk`], for a given component. - /// - /// This is a very fast path: the entire column will be downcasted at once, and then every - /// component batch will be a slice reference into that global slice. - /// Use this when working with simple arrow datatypes and performance matters (e.g. strips, etc). - /// - /// See also: - /// * [`Self::iter_primitive`] - /// * [`Self::iter_primitive_array`] - /// * [`Self::iter_string`] - /// * [`Self::iter_buffer`]. - /// * [`Self::iter_component`]. - pub fn iter_primitive_array_list( - &self, - component_name: &ComponentName, - ) -> impl Iterator> + '_ - where - [T; N]: bytemuck::Pod, - { - let Some(list_array) = self.get_first_component(component_name) else { - return Either::Left(std::iter::empty()); - }; - - let Some(inner_list_array) = list_array - .values() - .as_any() - .downcast_ref::>() - else { - if cfg!(debug_assertions) { - panic!("downcast failed for {component_name}, data discarded"); - } else { - re_log::error_once!("downcast failed for {component_name}, data discarded"); - } - return Either::Left(std::iter::empty()); - }; - - let inner_offsets = inner_list_array.offsets(); - let inner_lengths = inner_list_array.offsets().lengths().collect_vec(); - - let Some(fixed_size_list_array) = inner_list_array - .values() - .as_any() - .downcast_ref::() - else { - if cfg!(debug_assertions) { - panic!("downcast failed for {component_name}, data discarded"); - } else { - re_log::error_once!("downcast failed for {component_name}, data discarded"); - } - return Either::Left(std::iter::empty()); - }; - - let Some(values) = fixed_size_list_array - .values() - .as_any() - .downcast_ref::>() - else { - if cfg!(debug_assertions) { - panic!("downcast failed for {component_name}, data discarded"); - } else { - re_log::error_once!("downcast failed for {component_name}, data discarded"); - } - return Either::Left(std::iter::empty()); - }; - - let size = fixed_size_list_array.size(); - let values = values.values(); - - // NOTE: No need for validity checks here, `iter_offsets` already takes care of that. - Either::Right( - self.iter_component_offsets(component_name) - .map(move |(idx, len)| { - let inner_offsets = &inner_offsets.as_slice()[idx..idx + len]; - let inner_lengths = &inner_lengths.as_slice()[idx..idx + len]; - izip!(inner_offsets, inner_lengths) - .map(|(&idx, &len)| { - let idx = idx as usize; - bytemuck::cast_slice(&values[idx * size..idx * size + len * size]) - }) - .collect_vec() - }), - ) - } - - /// Returns an iterator over the raw strings of a [`Chunk`], for a given component. - /// - /// This is a very fast path: the entire column will be downcasted at once, and then every - /// component batch will be a slice reference into that global slice. - /// Use this when working with simple arrow datatypes and performance matters (e.g. labels, etc). - /// - /// See also: - /// * [`Self::iter_primitive`] - /// * [`Self::iter_primitive_array`] - /// * [`Self::iter_primitive_array_list`] - /// * [`Self::iter_buffer`]. - /// * [`Self::iter_component`]. - pub fn iter_string( - &self, - component_name: &ComponentName, - ) -> impl Iterator> + '_ { - let Some(list_array) = self.get_first_component(component_name) else { - return Either::Left(std::iter::empty()); - }; - - let Some(utf8_array) = list_array - .values() - .as_any() - .downcast_ref::>() - else { - if cfg!(debug_assertions) { - panic!("downcast failed for {component_name}, data discarded"); - } else { - re_log::error_once!("downcast failed for {component_name}, data discarded"); - } - return Either::Left(std::iter::empty()); - }; - - let values = utf8_array.values(); - let offsets = utf8_array.offsets(); - let lengths = utf8_array.offsets().lengths().collect_vec(); - - // NOTE: No need for validity checks here, `iter_offsets` already takes care of that. - Either::Right( - self.iter_component_offsets(component_name) - .map(move |(idx, len)| { - let offsets = &offsets.as_slice()[idx..idx + len]; - let lengths = &lengths.as_slice()[idx..idx + len]; - izip!(offsets, lengths) - .map(|(&idx, &len)| ArrowString::from(values.clone().sliced(idx as _, len))) - .collect_vec() - }), - ) - } - - /// Returns an iterator over the raw buffers of a [`Chunk`], for a given component. - /// - /// This is a very fast path: the entire column will be downcasted at once, and then every - /// component batch will be a slice reference into that global slice. - /// Use this when working with simple arrow datatypes and performance matters (e.g. blobs, etc). - /// - /// See also: - /// * [`Self::iter_primitive`] - /// * [`Self::iter_primitive_array`] - /// * [`Self::iter_primitive_array_list`] - /// * [`Self::iter_string`]. - /// * [`Self::iter_component`]. - pub fn iter_buffer( - &self, - component_name: &ComponentName, - ) -> impl Iterator>> + '_ { - let Some(list_array) = self.get_first_component(component_name) else { - return Either::Left(std::iter::empty()); - }; - - let Some(inner_list_array) = list_array - .values() - .as_any() - .downcast_ref::>() - else { - if cfg!(debug_assertions) { - panic!("downcast failed for {component_name}, data discarded"); - } else { - re_log::error_once!("downcast failed for {component_name}, data discarded"); - } - return Either::Left(std::iter::empty()); - }; - - let Some(values) = inner_list_array - .values() - .as_any() - .downcast_ref::>() - else { - if cfg!(debug_assertions) { - panic!("downcast failed for {component_name}, data discarded"); - } else { - re_log::error_once!("downcast failed for {component_name}, data discarded"); - } - return Either::Left(std::iter::empty()); - }; - - let values = values.values(); - let offsets = inner_list_array.offsets(); - let lengths = inner_list_array.offsets().lengths().collect_vec(); - - // NOTE: No need for validity checks here, `iter_offsets` already takes care of that. - Either::Right( - self.iter_component_offsets(component_name) - .map(move |(idx, len)| { - let offsets = &offsets.as_slice()[idx..idx + len]; - let lengths = &lengths.as_slice()[idx..idx + len]; - izip!(offsets, lengths) - // NOTE: Not an actual clone, just a refbump of the underlying buffer. - .map(|(&idx, &len)| values.clone().sliced(idx as _, len).into()) - .collect_vec() - }), - ) - } - /// Returns an iterator over the all the sliced component batches in a [`Chunk`]'s column, for /// a given component. /// diff --git a/crates/viewer/re_view/Cargo.toml b/crates/viewer/re_view/Cargo.toml index 29e1973862a3..38102cd74f2a 100644 --- a/crates/viewer/re_view/Cargo.toml +++ b/crates/viewer/re_view/Cargo.toml @@ -37,8 +37,6 @@ re_viewport_blueprint.workspace = true ahash.workspace = true arrow.workspace = true -arrow2.workspace = true -bytemuck.workspace = true egui.workspace = true glam.workspace = true itertools.workspace = true diff --git a/crates/viewer/re_view/src/results_ext.rs b/crates/viewer/re_view/src/results_ext.rs index 1d353e110617..8dbe97606db0 100644 --- a/crates/viewer/re_view/src/results_ext.rs +++ b/crates/viewer/re_view/src/results_ext.rs @@ -3,7 +3,6 @@ use std::{borrow::Cow, sync::Arc}; use itertools::Itertools as _; use re_chunk_store::{Chunk, LatestAtQuery, RangeQuery, UnitChunkShared}; -use re_log_types::external::arrow2::bitmap::Bitmap as Arrow2Bitmap; use re_log_types::hash::Hash64; use re_query::{LatestAtResults, RangeResults}; use re_types_core::ComponentName; @@ -435,94 +434,6 @@ impl<'a> HybridResultsChunkIter<'a> { }) } - /// Iterate as indexed booleans. - /// - /// See [`Chunk::iter_bool`] for more information. - pub fn bool(&'a self) -> impl Iterator + 'a { - self.chunks.iter().flat_map(move |chunk| { - itertools::izip!( - chunk.iter_component_indices(&self.timeline, &self.component_name), - chunk.iter_bool(&self.component_name) - ) - }) - } - - /// Iterate as indexed primitives. - /// - /// See [`Chunk::iter_primitive`] for more information. - pub fn primitive( - &'a self, - ) -> impl Iterator + 'a { - self.chunks.iter().flat_map(move |chunk| { - itertools::izip!( - chunk.iter_component_indices(&self.timeline, &self.component_name), - chunk.iter_primitive::(&self.component_name) - ) - }) - } - - /// Iterate as indexed primitive arrays. - /// - /// See [`Chunk::iter_primitive_array`] for more information. - pub fn primitive_array( - &'a self, - ) -> impl Iterator + 'a - where - [T; N]: bytemuck::Pod, - { - self.chunks.iter().flat_map(move |chunk| { - itertools::izip!( - chunk.iter_component_indices(&self.timeline, &self.component_name), - chunk.iter_primitive_array::(&self.component_name) - ) - }) - } - - /// Iterate as indexed list of primitive arrays. - /// - /// See [`Chunk::iter_primitive_array_list`] for more information. - pub fn primitive_array_list( - &'a self, - ) -> impl Iterator)> + 'a - where - [T; N]: bytemuck::Pod, - { - self.chunks.iter().flat_map(move |chunk| { - itertools::izip!( - chunk.iter_component_indices(&self.timeline, &self.component_name), - chunk.iter_primitive_array_list::(&self.component_name) - ) - }) - } - - /// Iterate as indexed UTF-8 strings. - /// - /// See [`Chunk::iter_string`] for more information. - pub fn string( - &'a self, - ) -> impl Iterator)> + 'a { - self.chunks.iter().flat_map(|chunk| { - itertools::izip!( - chunk.iter_component_indices(&self.timeline, &self.component_name), - chunk.iter_string(&self.component_name) - ) - }) - } - - /// Iterate as indexed buffers. - /// - /// See [`Chunk::iter_buffer`] for more information. - pub fn buffer( - &'a self, - ) -> impl Iterator>)> + 'a { - self.chunks.iter().flat_map(|chunk| { - itertools::izip!( - chunk.iter_component_indices(&self.timeline, &self.component_name), - chunk.iter_buffer(&self.component_name) - ) - }) - } - /// Iterate as indexed, sliced, deserialized component batches. /// /// See [`Chunk::iter_slices`] for more information. diff --git a/crates/viewer/re_view_graph/src/visualizers/edges.rs b/crates/viewer/re_view_graph/src/visualizers/edges.rs index 71411402b4d8..d2a8445e01b2 100644 --- a/crates/viewer/re_view_graph/src/visualizers/edges.rs +++ b/crates/viewer/re_view_graph/src/visualizers/edges.rs @@ -1,4 +1,4 @@ -use re_chunk::{ArchetypeFieldName, LatestAtQuery}; +use re_chunk::LatestAtQuery; use re_log_types::{EntityPath, Instance}; use re_types::{self, archetypes, components, datatypes, Component as _}; use re_view::{DataResultQuery, RangeResultsExt}; diff --git a/crates/viewer/re_view_spatial/Cargo.toml b/crates/viewer/re_view_spatial/Cargo.toml index 71c69a1082df..a9d82c77cf83 100644 --- a/crates/viewer/re_view_spatial/Cargo.toml +++ b/crates/viewer/re_view_spatial/Cargo.toml @@ -41,8 +41,6 @@ re_video.workspace = true re_viewer_context.workspace = true re_viewport_blueprint.workspace = true -arrow.workspace = true -arrow2.workspace = true ahash.workspace = true anyhow.workspace = true bitflags.workspace = true diff --git a/crates/viewer/re_view_spatial/src/visualizers/utilities/entity_iterator.rs b/crates/viewer/re_view_spatial/src/visualizers/utilities/entity_iterator.rs index f9195f510b6e..2b5718a26e92 100644 --- a/crates/viewer/re_view_spatial/src/visualizers/utilities/entity_iterator.rs +++ b/crates/viewer/re_view_spatial/src/visualizers/utilities/entity_iterator.rs @@ -134,8 +134,10 @@ use re_chunk_store::external::re_chunk; /// Iterate `chunks` as indexed deserialized batches. /// +/// For simple cases (i.e. everything up to flat structs), prefer [`iter_slices`] instead which is +/// faster. +/// /// See [`Chunk::iter_component`] for more information. -#[allow(unused)] pub fn iter_component<'a, C: re_types::Component>( chunks: &'a std::borrow::Cow<'a, [Chunk]>, timeline: Timeline, @@ -149,97 +151,6 @@ pub fn iter_component<'a, C: re_types::Component>( }) } -/// Iterate `chunks` as indexed primitives. -/// -/// See [`Chunk::iter_primitive`] for more information. -#[allow(unused)] -pub fn iter_primitive<'a, T: arrow2::types::NativeType>( - chunks: &'a std::borrow::Cow<'a, [Chunk]>, - timeline: Timeline, - component_name: ComponentName, -) -> impl Iterator + 'a { - chunks.iter().flat_map(move |chunk| { - itertools::izip!( - chunk.iter_component_indices(&timeline, &component_name), - chunk.iter_primitive::(&component_name) - ) - }) -} - -/// Iterate `chunks` as indexed primitive arrays. -/// -/// See [`Chunk::iter_primitive_array`] for more information. -#[allow(unused)] -pub fn iter_primitive_array<'a, const N: usize, T: arrow2::types::NativeType>( - chunks: &'a std::borrow::Cow<'a, [Chunk]>, - timeline: Timeline, - component_name: ComponentName, -) -> impl Iterator + 'a -where - [T; N]: bytemuck::Pod, -{ - chunks.iter().flat_map(move |chunk| { - itertools::izip!( - chunk.iter_component_indices(&timeline, &component_name), - chunk.iter_primitive_array::(&component_name) - ) - }) -} - -/// Iterate `chunks` as indexed list of primitive arrays. -/// -/// See [`Chunk::iter_primitive_array_list`] for more information. -#[allow(unused)] -pub fn iter_primitive_array_list<'a, const N: usize, T: arrow2::types::NativeType>( - chunks: &'a std::borrow::Cow<'a, [Chunk]>, - timeline: Timeline, - component_name: ComponentName, -) -> impl Iterator)> + 'a -where - [T; N]: bytemuck::Pod, -{ - chunks.iter().flat_map(move |chunk| { - itertools::izip!( - chunk.iter_component_indices(&timeline, &component_name), - chunk.iter_primitive_array_list::(&component_name) - ) - }) -} - -/// Iterate `chunks` as indexed UTF-8 strings. -/// -/// See [`Chunk::iter_string`] for more information. -#[allow(unused)] -pub fn iter_string<'a>( - chunks: &'a std::borrow::Cow<'a, [Chunk]>, - timeline: Timeline, - component_name: ComponentName, -) -> impl Iterator)> + 'a { - chunks.iter().flat_map(move |chunk| { - itertools::izip!( - chunk.iter_component_indices(&timeline, &component_name), - chunk.iter_string(&component_name) - ) - }) -} - -/// Iterate `chunks` as indexed buffers. -/// -/// See [`Chunk::iter_buffer`] for more information. -#[allow(unused)] -pub fn iter_buffer<'a, T: arrow::datatypes::ArrowNativeType + arrow2::types::NativeType>( - chunks: &'a std::borrow::Cow<'a, [Chunk]>, - timeline: Timeline, - component_name: ComponentName, -) -> impl Iterator>)> + 'a { - chunks.iter().flat_map(move |chunk| { - itertools::izip!( - chunk.iter_component_indices(&timeline, &component_name), - chunk.iter_buffer(&component_name) - ) - }) -} - /// Iterate `chunks` as indexed primitives. /// /// See [`Chunk::iter_slices`] for more information.