From b5a4456b8b725c839faa0914d37623e580039590 Mon Sep 17 00:00:00 2001 From: Clement Rey Date: Mon, 14 Oct 2024 10:25:33 +0200 Subject: [PATCH] no more encodings --- crates/store/re_chunk_store/src/dataframe.rs | 85 +------------------- crates/store/re_chunk_store/src/lib.rs | 4 +- crates/store/re_dataframe/src/query.rs | 17 +--- rerun_notebook/package-lock.json | 2 +- rerun_py/src/dataframe.rs | 18 ----- 5 files changed, 5 insertions(+), 121 deletions(-) diff --git a/crates/store/re_chunk_store/src/dataframe.rs b/crates/store/re_chunk_store/src/dataframe.rs index 3916deb4a76d..66f80a8ca1f0 100644 --- a/crates/store/re_chunk_store/src/dataframe.rs +++ b/crates/store/re_chunk_store/src/dataframe.rs @@ -15,47 +15,6 @@ use crate::{ChunkStore, ColumnMetadata}; // --- Descriptors --- -/// When selecting secondary component columns, specify how the joined data should be encoded. -/// -/// Because range-queries often involve repeating the same joined-in data multiple times, -/// the strategy we choose for joining can have a significant impact on the size and memory -/// overhead of the `RecordBatch`. -#[derive(Debug, Default, Clone, Copy, PartialEq, Eq, Hash)] -pub enum JoinEncoding { - /// Slice the `RecordBatch` to minimal overlapping sub-ranges. - /// - /// This is the default, and should always be used for the POV component which defines - /// the optimal size for `RecordBatch`. - /// - /// This minimizes the need for allocation, but at the cost of `RecordBatch`es that are - /// almost always smaller than the optimal size. In the common worst-case, this will result - /// in single-row `RecordBatch`es. - #[default] - OverlappingSlice, - - /// Dictionary-encode the joined column. - /// - /// Using dictionary-encoding allows any repeated data to be shared between rows, - /// but comes with the cost of an extra dictionary-lookup indirection. - /// - /// Note that this changes the physical type of the returned column. - /// - /// Using this encoding for complex types is incompatible with some arrow libraries. - DictionaryEncode, - // - // TODO(jleibs): - // RepeatCopy, - // - // Repeat the joined column by physically copying the data. - // - // This will always allocate a new column in the `RecordBatch`, matching the size of the - // POV component. - // - // This is the most expensive option, but can make working with the data more efficient, - // especially when the copied column is small. - // -} - // TODO(#6889): At some point all these descriptors needs to be interned and have handles or // something. And of course they need to be codegen. But we'll get there once we're back to // natively tagged components. @@ -184,11 +143,6 @@ pub struct ComponentColumnDescriptor { /// we introduce mono-type optimization, this might be a native type instead. pub store_datatype: ArrowDatatype, - /// How the data will be joined into the resulting `RecordBatch`. - // - // TODO(cmc): remove with the old re_dataframe. - pub join_encoding: JoinEncoding, - /// Whether this column represents static data. pub is_static: bool, @@ -219,7 +173,6 @@ impl Ord for ComponentColumnDescriptor { archetype_name, archetype_field_name, component_name, - join_encoding: _, store_datatype: _, is_static: _, is_indicator: _, @@ -242,7 +195,6 @@ impl std::fmt::Display for ComponentColumnDescriptor { archetype_name, archetype_field_name, component_name, - join_encoding: _, store_datatype: _, is_static, is_indicator: _, @@ -288,7 +240,6 @@ impl ComponentColumnDescriptor { archetype_name, archetype_field_name, component_name, - join_encoding: _, store_datatype: _, is_static, is_indicator, @@ -324,14 +275,7 @@ impl ComponentColumnDescriptor { #[inline] pub fn returned_datatype(&self) -> ArrowDatatype { - match self.join_encoding { - JoinEncoding::OverlappingSlice => self.store_datatype.clone(), - JoinEncoding::DictionaryEncode => ArrowDatatype::Dictionary( - arrow2::datatypes::IntegerType::Int32, - std::sync::Arc::new(self.store_datatype.clone()), - true, - ), - } + self.store_datatype.clone() } #[inline] @@ -348,12 +292,6 @@ impl ComponentColumnDescriptor { // TODO(#6889): This needs some proper sorbetization -- I just threw these names randomly. .with_metadata(self.metadata()) } - - #[inline] - pub fn with_join_encoding(mut self, join_encoding: JoinEncoding) -> Self { - self.join_encoding = join_encoding; - self - } } // --- Selectors --- @@ -426,11 +364,6 @@ pub struct ComponentColumnSelector { /// This string will be flexibly matched against the available component names. /// Valid matches are case invariant matches of either the full name or the short name. pub component_name: String, - - /// How to join the data into the `RecordBatch`. - // - // TODO(cmc): remove once old `re_dataframe` is gone. - pub join_encoding: JoinEncoding, } impl From for ComponentColumnSelector { @@ -439,7 +372,6 @@ impl From for ComponentColumnSelector { Self { entity_path: desc.entity_path.clone(), component_name: desc.component_name.to_string(), - join_encoding: desc.join_encoding, } } } @@ -451,7 +383,6 @@ impl ComponentColumnSelector { Self { entity_path, component_name: C::name().to_string(), - join_encoding: JoinEncoding::default(), } } @@ -461,16 +392,8 @@ impl ComponentColumnSelector { Self { entity_path, component_name: component_name.to_string(), - join_encoding: JoinEncoding::default(), } } - - /// Specify how the data should be joined into the `RecordBatch`. - #[inline] - pub fn with_join_encoding(mut self, join_encoding: JoinEncoding) -> Self { - self.join_encoding = join_encoding; - self - } } impl std::fmt::Display for ComponentColumnSelector { @@ -478,7 +401,6 @@ impl std::fmt::Display for ComponentColumnSelector { let Self { entity_path, component_name, - join_encoding: _, } = self; f.write_fmt(format_args!("{entity_path}:{component_name}")) @@ -497,9 +419,6 @@ pub struct ArchetypeFieldColumnSelector { /// The field within the `Archetype` associated with this data. field: String, - - /// How to join the data into the `RecordBatch`. - join_encoding: JoinEncoding, } */ @@ -743,7 +662,6 @@ impl ChunkStore { // It might be wrapped further in e.g. a dict, but at the very least // it's a list. store_datatype: ArrowListArray::::default_datatype(datatype.clone()), - join_encoding: JoinEncoding::default(), is_static, is_indicator, is_tombstone, @@ -827,7 +745,6 @@ impl ChunkStore { archetype_field_name: None, component_name, store_datatype: ArrowListArray::::default_datatype(datatype.clone()), - join_encoding: selector.join_encoding, is_static, is_indicator, is_tombstone, diff --git a/crates/store/re_chunk_store/src/lib.rs b/crates/store/re_chunk_store/src/lib.rs index 8deb7e52f2be..2dc7aaffd6bf 100644 --- a/crates/store/re_chunk_store/src/lib.rs +++ b/crates/store/re_chunk_store/src/lib.rs @@ -26,8 +26,8 @@ mod writes; pub use self::dataframe::{ ColumnDescriptor, ColumnSelector, ComponentColumnDescriptor, ComponentColumnSelector, Index, - IndexRange, IndexValue, JoinEncoding, QueryExpression, SparseFillStrategy, - TimeColumnDescriptor, TimeColumnSelector, ViewContentsSelector, + IndexRange, IndexValue, QueryExpression, SparseFillStrategy, TimeColumnDescriptor, + TimeColumnSelector, ViewContentsSelector, }; pub use self::events::{ChunkStoreDiff, ChunkStoreDiffKind, ChunkStoreEvent}; pub use self::gc::{GarbageCollectionOptions, GarbageCollectionTarget}; diff --git a/crates/store/re_dataframe/src/query.rs b/crates/store/re_dataframe/src/query.rs index 2c4589f715d1..4e5d1fc66a91 100644 --- a/crates/store/re_dataframe/src/query.rs +++ b/crates/store/re_dataframe/src/query.rs @@ -23,8 +23,7 @@ use re_chunk::{ }; use re_chunk_store::{ ColumnDescriptor, ColumnSelector, ComponentColumnDescriptor, ComponentColumnSelector, Index, - IndexValue, JoinEncoding, QueryExpression, SparseFillStrategy, TimeColumnDescriptor, - TimeColumnSelector, + IndexValue, QueryExpression, SparseFillStrategy, TimeColumnDescriptor, TimeColumnSelector, }; use re_log_types::ResolvedTimeRange; use re_types_core::components::ClearIsRecursive; @@ -399,7 +398,6 @@ impl QueryHandle<'_> { let ComponentColumnSelector { entity_path: selected_entity_path, component_name: selected_component_name, - join_encoding: _, } = selected_column; view_contents @@ -425,7 +423,6 @@ impl QueryHandle<'_> { selected_component_name.clone(), ), store_datatype: arrow2::datatypes::DataType::Null, - join_encoding: JoinEncoding::default(), is_static: false, is_indicator: false, is_tombstone: false, @@ -1575,7 +1572,6 @@ mod tests { filtered_point_of_view: Some(ComponentColumnSelector { entity_path: "no/such/entity".into(), component_name: MyPoint::name().to_string(), - join_encoding: Default::default(), }), ..Default::default() }; @@ -1605,7 +1601,6 @@ mod tests { filtered_point_of_view: Some(ComponentColumnSelector { entity_path: entity_path.clone(), component_name: "AComponentColumnThatDoesntExist".into(), - join_encoding: Default::default(), }), ..Default::default() }; @@ -1635,7 +1630,6 @@ mod tests { filtered_point_of_view: Some(ComponentColumnSelector { entity_path: entity_path.clone(), component_name: MyPoint::name().to_string(), - join_encoding: Default::default(), }), ..Default::default() }; @@ -1675,7 +1669,6 @@ mod tests { filtered_point_of_view: Some(ComponentColumnSelector { entity_path: entity_path.clone(), component_name: MyColor::name().to_string(), - join_encoding: Default::default(), }), ..Default::default() }; @@ -1901,22 +1894,18 @@ mod tests { ColumnSelector::Component(ComponentColumnSelector { entity_path: entity_path.clone(), component_name: MyColor::name().to_string(), - join_encoding: Default::default(), }), ColumnSelector::Component(ComponentColumnSelector { entity_path: entity_path.clone(), component_name: MyColor::name().to_string(), - join_encoding: Default::default(), }), ColumnSelector::Component(ComponentColumnSelector { entity_path: "non_existing_entity".into(), component_name: MyColor::name().to_string(), - join_encoding: Default::default(), }), ColumnSelector::Component(ComponentColumnSelector { entity_path: entity_path.clone(), component_name: "AComponentColumnThatDoesntExist".into(), - join_encoding: Default::default(), }), ]), ..Default::default() @@ -1993,17 +1982,14 @@ mod tests { ColumnSelector::Component(ComponentColumnSelector { entity_path: entity_path.clone(), component_name: MyPoint::name().to_string(), - join_encoding: Default::default(), }), ColumnSelector::Component(ComponentColumnSelector { entity_path: entity_path.clone(), component_name: MyColor::name().to_string(), - join_encoding: Default::default(), }), ColumnSelector::Component(ComponentColumnSelector { entity_path: entity_path.clone(), component_name: MyLabel::name().to_string(), - join_encoding: Default::default(), }), ]), ..Default::default() @@ -2197,7 +2183,6 @@ mod tests { filtered_point_of_view: Some(ComponentColumnSelector { entity_path: entity_path.clone(), component_name: MyPoint::name().to_string(), - join_encoding: Default::default(), }), ..Default::default() }; diff --git a/rerun_notebook/package-lock.json b/rerun_notebook/package-lock.json index 6ecaddb6a52d..02a9de157345 100644 --- a/rerun_notebook/package-lock.json +++ b/rerun_notebook/package-lock.json @@ -17,7 +17,7 @@ }, "../rerun_js/web-viewer": { "name": "@rerun-io/web-viewer", - "version": "0.19.0-alpha.1+dev", + "version": "0.19.0-alpha.3", "license": "MIT", "devDependencies": { "dts-buddy": "^0.3.0", diff --git a/rerun_py/src/dataframe.rs b/rerun_py/src/dataframe.rs index b20654717afe..32a0aedd8c12 100644 --- a/rerun_py/src/dataframe.rs +++ b/rerun_py/src/dataframe.rs @@ -94,14 +94,6 @@ impl From for PyComponentColumnDescriptor { #[pymethods] impl PyComponentColumnDescriptor { - fn with_dictionary_encoding(&self) -> Self { - Self( - self.0 - .clone() - .with_join_encoding(re_chunk_store::JoinEncoding::DictionaryEncode), - ) - } - fn __repr__(&self) -> String { format!( "Component({}:{})", @@ -134,18 +126,9 @@ impl PyComponentColumnSelector { Self(ComponentColumnSelector { entity_path: entity_path.into(), component_name: component_name.0, - join_encoding: Default::default(), }) } - fn with_dictionary_encoding(&self) -> Self { - Self( - self.0 - .clone() - .with_join_encoding(re_chunk_store::JoinEncoding::DictionaryEncode), - ) - } - fn __repr__(&self) -> String { format!( "Component({}:{})", @@ -630,7 +613,6 @@ impl PyRecording { let selector = ComponentColumnSelector { entity_path: entity_path.clone(), component_name: component_name.into(), - join_encoding: Default::default(), }; self.store