no more encodings

rerun-io · Oct 14, 2024 · b5a4456 · b5a4456
1 parent ca6ad40
commit b5a4456
Show file tree

Hide file tree

Showing 5 changed files with 5 additions and 121 deletions.
diff --git a/crates/store/re_chunk_store/src/dataframe.rs b/crates/store/re_chunk_store/src/dataframe.rs
@@ -15,47 +15,6 @@ use crate::{ChunkStore, ColumnMetadata};
 
 // --- Descriptors ---
 
-/// When selecting secondary component columns, specify how the joined data should be encoded.
-///
-/// Because range-queries often involve repeating the same joined-in data multiple times,
-/// the strategy we choose for joining can have a significant impact on the size and memory
-/// overhead of the `RecordBatch`.
-#[derive(Debug, Default, Clone, Copy, PartialEq, Eq, Hash)]
-pub enum JoinEncoding {
-    /// Slice the `RecordBatch` to minimal overlapping sub-ranges.
-    ///
-    /// This is the default, and should always be used for the POV component which defines
-    /// the optimal size for `RecordBatch`.
-    ///
-    /// This minimizes the need for allocation, but at the cost of `RecordBatch`es that are
-    /// almost always smaller than the optimal size. In the common worst-case, this will result
-    /// in single-row `RecordBatch`es.
-    #[default]
-    OverlappingSlice,
-
-    /// Dictionary-encode the joined column.
-    ///
-    /// Using dictionary-encoding allows any repeated data to be shared between rows,
-    /// but comes with the cost of an extra dictionary-lookup indirection.
-    ///
-    /// Note that this changes the physical type of the returned column.
-    ///
-    /// Using this encoding for complex types is incompatible with some arrow libraries.
-    DictionaryEncode,
-    //
-    // TODO(jleibs):
-    // RepeatCopy,
-    //
-    // Repeat the joined column by physically copying the data.
-    //
-    // This will always allocate a new column in the `RecordBatch`, matching the size of the
-    // POV component.
-    //
-    // This is the most expensive option, but can make working with the data more efficient,
-    // especially when the copied column is small.
-    //
-}
-
 // TODO(#6889): At some point all these descriptors needs to be interned and have handles or
 // something. And of course they need to be codegen. But we'll get there once we're back to
 // natively tagged components.
@@ -184,11 +143,6 @@ pub struct ComponentColumnDescriptor {
     /// we introduce mono-type optimization, this might be a native type instead.
     pub store_datatype: ArrowDatatype,
 
-    /// How the data will be joined into the resulting `RecordBatch`.
-    //
-    // TODO(cmc): remove with the old re_dataframe.
-    pub join_encoding: JoinEncoding,
-
     /// Whether this column represents static data.
     pub is_static: bool,
 
@@ -219,7 +173,6 @@ impl Ord for ComponentColumnDescriptor {
             archetype_name,
             archetype_field_name,
             component_name,
-            join_encoding: _,
             store_datatype: _,
             is_static: _,
             is_indicator: _,
@@ -242,7 +195,6 @@ impl std::fmt::Display for ComponentColumnDescriptor {
             archetype_name,
             archetype_field_name,
             component_name,
-            join_encoding: _,
             store_datatype: _,
             is_static,
             is_indicator: _,
@@ -288,7 +240,6 @@ impl ComponentColumnDescriptor {
             archetype_name,
             archetype_field_name,
             component_name,
-            join_encoding: _,
             store_datatype: _,
             is_static,
             is_indicator,
@@ -324,14 +275,7 @@ impl ComponentColumnDescriptor {
 
     #[inline]
     pub fn returned_datatype(&self) -> ArrowDatatype {
-        match self.join_encoding {
-            JoinEncoding::OverlappingSlice => self.store_datatype.clone(),
-            JoinEncoding::DictionaryEncode => ArrowDatatype::Dictionary(
-                arrow2::datatypes::IntegerType::Int32,
-                std::sync::Arc::new(self.store_datatype.clone()),
-                true,
-            ),
-        }
+        self.store_datatype.clone()
     }
 
     #[inline]
@@ -348,12 +292,6 @@ impl ComponentColumnDescriptor {
         // TODO(#6889): This needs some proper sorbetization -- I just threw these names randomly.
         .with_metadata(self.metadata())
     }
-
-    #[inline]
-    pub fn with_join_encoding(mut self, join_encoding: JoinEncoding) -> Self {
-        self.join_encoding = join_encoding;
-        self
-    }
 }
 
 // --- Selectors ---
@@ -426,11 +364,6 @@ pub struct ComponentColumnSelector {
     /// This string will be flexibly matched against the available component names.
     /// Valid matches are case invariant matches of either the full name or the short name.
     pub component_name: String,
-
-    /// How to join the data into the `RecordBatch`.
-    //
-    // TODO(cmc): remove once old `re_dataframe` is gone.
-    pub join_encoding: JoinEncoding,
 }
 
 impl From<ComponentColumnDescriptor> for ComponentColumnSelector {
@@ -439,7 +372,6 @@ impl From<ComponentColumnDescriptor> for ComponentColumnSelector {
         Self {
             entity_path: desc.entity_path.clone(),
             component_name: desc.component_name.to_string(),
-            join_encoding: desc.join_encoding,
         }
     }
 }
@@ -451,7 +383,6 @@ impl ComponentColumnSelector {
         Self {
             entity_path,
             component_name: C::name().to_string(),
-            join_encoding: JoinEncoding::default(),
         }
     }
 
@@ -461,24 +392,15 @@ impl ComponentColumnSelector {
         Self {
             entity_path,
             component_name: component_name.to_string(),
-            join_encoding: JoinEncoding::default(),
         }
     }
-
-    /// Specify how the data should be joined into the `RecordBatch`.
-    #[inline]
-    pub fn with_join_encoding(mut self, join_encoding: JoinEncoding) -> Self {
-        self.join_encoding = join_encoding;
-        self
-    }
 }
 
 impl std::fmt::Display for ComponentColumnSelector {
     fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
         let Self {
             entity_path,
             component_name,
-            join_encoding: _,
         } = self;
 
         f.write_fmt(format_args!("{entity_path}:{component_name}"))
@@ -497,9 +419,6 @@ pub struct ArchetypeFieldColumnSelector {
 
     /// The field within the `Archetype` associated with this data.
     field: String,
-
-    /// How to join the data into the `RecordBatch`.
-    join_encoding: JoinEncoding,
 }
 */
 
@@ -743,7 +662,6 @@ impl ChunkStore {
                     // It might be wrapped further in e.g. a dict, but at the very least
                     // it's a list.
                     store_datatype: ArrowListArray::<i32>::default_datatype(datatype.clone()),
-                    join_encoding: JoinEncoding::default(),
                     is_static,
                     is_indicator,
                     is_tombstone,
@@ -827,7 +745,6 @@ impl ChunkStore {
             archetype_field_name: None,
             component_name,
             store_datatype: ArrowListArray::<i32>::default_datatype(datatype.clone()),
-            join_encoding: selector.join_encoding,
             is_static,
             is_indicator,
             is_tombstone,

diff --git a/crates/store/re_chunk_store/src/lib.rs b/crates/store/re_chunk_store/src/lib.rs
@@ -26,8 +26,8 @@ mod writes;
 
 pub use self::dataframe::{
     ColumnDescriptor, ColumnSelector, ComponentColumnDescriptor, ComponentColumnSelector, Index,
-    IndexRange, IndexValue, JoinEncoding, QueryExpression, SparseFillStrategy,
-    TimeColumnDescriptor, TimeColumnSelector, ViewContentsSelector,
+    IndexRange, IndexValue, QueryExpression, SparseFillStrategy, TimeColumnDescriptor,
+    TimeColumnSelector, ViewContentsSelector,
 };
 pub use self::events::{ChunkStoreDiff, ChunkStoreDiffKind, ChunkStoreEvent};
 pub use self::gc::{GarbageCollectionOptions, GarbageCollectionTarget};

diff --git a/crates/store/re_dataframe/src/query.rs b/crates/store/re_dataframe/src/query.rs
@@ -23,8 +23,7 @@ use re_chunk::{
 };
 use re_chunk_store::{
     ColumnDescriptor, ColumnSelector, ComponentColumnDescriptor, ComponentColumnSelector, Index,
-    IndexValue, JoinEncoding, QueryExpression, SparseFillStrategy, TimeColumnDescriptor,
-    TimeColumnSelector,
+    IndexValue, QueryExpression, SparseFillStrategy, TimeColumnDescriptor, TimeColumnSelector,
 };
 use re_log_types::ResolvedTimeRange;
 use re_types_core::components::ClearIsRecursive;
@@ -399,7 +398,6 @@ impl QueryHandle<'_> {
                         let ComponentColumnSelector {
                             entity_path: selected_entity_path,
                             component_name: selected_component_name,
-                            join_encoding: _,
                         } = selected_column;
 
                         view_contents
@@ -425,7 +423,6 @@ impl QueryHandle<'_> {
                                                 selected_component_name.clone(),
                                             ),
                                             store_datatype: arrow2::datatypes::DataType::Null,
-                                            join_encoding: JoinEncoding::default(),
                                             is_static: false,
                                             is_indicator: false,
                                             is_tombstone: false,
@@ -1575,7 +1572,6 @@ mod tests {
                 filtered_point_of_view: Some(ComponentColumnSelector {
                     entity_path: "no/such/entity".into(),
                     component_name: MyPoint::name().to_string(),
-                    join_encoding: Default::default(),
                 }),
                 ..Default::default()
             };
@@ -1605,7 +1601,6 @@ mod tests {
                 filtered_point_of_view: Some(ComponentColumnSelector {
                     entity_path: entity_path.clone(),
                     component_name: "AComponentColumnThatDoesntExist".into(),
-                    join_encoding: Default::default(),
                 }),
                 ..Default::default()
             };
@@ -1635,7 +1630,6 @@ mod tests {
                 filtered_point_of_view: Some(ComponentColumnSelector {
                     entity_path: entity_path.clone(),
                     component_name: MyPoint::name().to_string(),
-                    join_encoding: Default::default(),
                 }),
                 ..Default::default()
             };
@@ -1675,7 +1669,6 @@ mod tests {
                 filtered_point_of_view: Some(ComponentColumnSelector {
                     entity_path: entity_path.clone(),
                     component_name: MyColor::name().to_string(),
-                    join_encoding: Default::default(),
                 }),
                 ..Default::default()
             };
@@ -1901,22 +1894,18 @@ mod tests {
                     ColumnSelector::Component(ComponentColumnSelector {
                         entity_path: entity_path.clone(),
                         component_name: MyColor::name().to_string(),
-                        join_encoding: Default::default(),
                     }),
                     ColumnSelector::Component(ComponentColumnSelector {
                         entity_path: entity_path.clone(),
                         component_name: MyColor::name().to_string(),
-                        join_encoding: Default::default(),
                     }),
                     ColumnSelector::Component(ComponentColumnSelector {
                         entity_path: "non_existing_entity".into(),
                         component_name: MyColor::name().to_string(),
-                        join_encoding: Default::default(),
                     }),
                     ColumnSelector::Component(ComponentColumnSelector {
                         entity_path: entity_path.clone(),
                         component_name: "AComponentColumnThatDoesntExist".into(),
-                        join_encoding: Default::default(),
                     }),
                 ]),
                 ..Default::default()
@@ -1993,17 +1982,14 @@ mod tests {
                     ColumnSelector::Component(ComponentColumnSelector {
                         entity_path: entity_path.clone(),
                         component_name: MyPoint::name().to_string(),
-                        join_encoding: Default::default(),
                     }),
                     ColumnSelector::Component(ComponentColumnSelector {
                         entity_path: entity_path.clone(),
                         component_name: MyColor::name().to_string(),
-                        join_encoding: Default::default(),
                     }),
                     ColumnSelector::Component(ComponentColumnSelector {
                         entity_path: entity_path.clone(),
                         component_name: MyLabel::name().to_string(),
-                        join_encoding: Default::default(),
                     }),
                 ]),
                 ..Default::default()
@@ -2197,7 +2183,6 @@ mod tests {
                 filtered_point_of_view: Some(ComponentColumnSelector {
                     entity_path: entity_path.clone(),
                     component_name: MyPoint::name().to_string(),
-                    join_encoding: Default::default(),
                 }),
                 ..Default::default()
             };

diff --git a/rerun_notebook/package-lock.json b/rerun_notebook/package-lock.json
diff --git a/rerun_py/src/dataframe.rs b/rerun_py/src/dataframe.rs
@@ -94,14 +94,6 @@ impl From<ComponentColumnDescriptor> for PyComponentColumnDescriptor {
 
 #[pymethods]
 impl PyComponentColumnDescriptor {
-    fn with_dictionary_encoding(&self) -> Self {
-        Self(
-            self.0
-                .clone()
-                .with_join_encoding(re_chunk_store::JoinEncoding::DictionaryEncode),
-        )
-    }
-
     fn __repr__(&self) -> String {
         format!(
             "Component({}:{})",
@@ -134,18 +126,9 @@ impl PyComponentColumnSelector {
         Self(ComponentColumnSelector {
             entity_path: entity_path.into(),
             component_name: component_name.0,
-            join_encoding: Default::default(),
         })
     }
 
-    fn with_dictionary_encoding(&self) -> Self {
-        Self(
-            self.0
-                .clone()
-                .with_join_encoding(re_chunk_store::JoinEncoding::DictionaryEncode),
-        )
-    }
-
     fn __repr__(&self) -> String {
         format!(
             "Component({}:{})",
@@ -630,7 +613,6 @@ impl PyRecording {
         let selector = ComponentColumnSelector {
             entity_path: entity_path.clone(),
             component_name: component_name.into(),
-            join_encoding: Default::default(),
         };
 
         self.store