From e9de566fe7f9876306f7e8112b80ee34fa711d85 Mon Sep 17 00:00:00 2001 From: Clement Rey Date: Fri, 23 Aug 2024 15:29:43 +0200 Subject: [PATCH] Remove `Loggable{Batch}::arrow_field` (#7257) It doesn't make any sense for a `ComponentBatch` to have any say in what the final `ArrowField` should look like. An `ArrowField` is a `Chunk`/`RecordBatch`/`Schema`-level concern that only makes sense during IO/transport/FFI/storage/etc, and which requires external context that a single `ComponentBatch` on its own has no idea of. --- Part of a lot of clean up I want to while we head towards: * https://github.com/rerun-io/rerun/issues/7245 * #3741 --- crates/store/re_types_core/src/archetype.rs | 42 ++------------ crates/store/re_types_core/src/lib.rs | 9 ++- crates/store/re_types_core/src/loggable.rs | 22 ------- .../store/re_types_core/src/loggable_batch.rs | 58 ------------------- 4 files changed, 12 insertions(+), 119 deletions(-) diff --git a/crates/store/re_types_core/src/archetype.rs b/crates/store/re_types_core/src/archetype.rs index dff626618ab1..c5e9fb0b3ad4 100644 --- a/crates/store/re_types_core/src/archetype.rs +++ b/crates/store/re_types_core/src/archetype.rs @@ -1,5 +1,3 @@ -use std::sync::Arc; - use crate::{ ComponentBatch, ComponentName, DeserializationResult, MaybeOwnedComponentBatch, SerializationResult, _Backtrace, @@ -220,26 +218,10 @@ impl crate::LoggableBatch for GenericIndicatorComponent { 1 } - #[inline] - fn arrow_field(&self) -> arrow2::datatypes::Field { - let name = self.name().to_string(); - arrow2::datatypes::Field::new( - name.clone(), - arrow2::datatypes::DataType::Extension( - name, - Arc::new(arrow2::datatypes::DataType::Null), - None, - ), - false, - ) - } - #[inline] fn to_arrow(&self) -> SerializationResult> { - Ok( - arrow2::array::NullArray::new(arrow2::datatypes::DataType::Null, self.num_instances()) - .boxed(), - ) + let datatype = arrow2::datatypes::DataType::Null; + Ok(arrow2::array::NullArray::new(datatype, self.num_instances()).boxed()) } } @@ -278,26 +260,10 @@ impl crate::LoggableBatch for NamedIndicatorComponent { 1 } - #[inline] - fn arrow_field(&self) -> arrow2::datatypes::Field { - let name = self.name().to_string(); - arrow2::datatypes::Field::new( - name.clone(), - arrow2::datatypes::DataType::Extension( - name, - Arc::new(arrow2::datatypes::DataType::Null), - None, - ), - false, - ) - } - #[inline] fn to_arrow(&self) -> SerializationResult> { - Ok( - arrow2::array::NullArray::new(arrow2::datatypes::DataType::Null, self.num_instances()) - .boxed(), - ) + let datatype = arrow2::datatypes::DataType::Null; + Ok(arrow2::array::NullArray::new(datatype, self.num_instances()).boxed()) } } diff --git a/crates/store/re_types_core/src/lib.rs b/crates/store/re_types_core/src/lib.rs index 86fa6374cf42..76c231cd42ff 100644 --- a/crates/store/re_types_core/src/lib.rs +++ b/crates/store/re_types_core/src/lib.rs @@ -68,7 +68,14 @@ pub trait AsComponents { comp_batch .as_ref() .to_arrow() - .map(|array| (comp_batch.as_ref().arrow_field(), array)) + .map(|array| { + let field = arrow2::datatypes::Field::new( + comp_batch.name().to_string(), + array.data_type().clone(), + false, + ); + (field, array) + }) .with_context(comp_batch.as_ref().name()) }) .collect() diff --git a/crates/store/re_types_core/src/loggable.rs b/crates/store/re_types_core/src/loggable.rs index cd84f6d9ca56..b574ce7f92c2 100644 --- a/crates/store/re_types_core/src/loggable.rs +++ b/crates/store/re_types_core/src/loggable.rs @@ -31,7 +31,6 @@ pub trait Loggable: 'static + Send + Sync + Clone + Sized + SizeBytes { /// Given an iterator of options of owned or reference values to the current /// [`Loggable`], serializes them into an Arrow array. - /// The Arrow array's datatype will match [`Loggable::arrow_field`]. /// /// When using Rerun's builtin components & datatypes, this can only fail if the data /// exceeds the maximum number of entries in an Arrow array (2^31 for standard arrays, @@ -57,25 +56,10 @@ pub trait Loggable: 'static + Send + Sync + Clone + Sized + SizeBytes { ) } - /// The underlying [`arrow2::datatypes::Field`], including datatype extensions. - /// - /// The default implementation will simply wrap [`Self::extended_arrow_datatype`] in a - /// [`arrow2::datatypes::Field`], which is what you want in most cases (e.g. because you want - /// to declare the field as nullable). - #[inline] - fn arrow_field() -> arrow2::datatypes::Field { - arrow2::datatypes::Field::new( - Self::name().to_string(), - Self::extended_arrow_datatype(), - false, - ) - } - // --- Optional serialization methods --- /// Given an iterator of owned or reference values to the current [`Loggable`], serializes /// them into an Arrow array. - /// The Arrow array's datatype will match [`Loggable::arrow_field`]. /// /// When using Rerun's builtin components & datatypes, this can only fail if the data /// exceeds the maximum number of entries in an Arrow array (2^31 for standard arrays, @@ -94,9 +78,6 @@ pub trait Loggable: 'static + Send + Sync + Clone + Sized + SizeBytes { // --- Optional deserialization methods --- /// Given an Arrow array, deserializes it into a collection of [`Loggable`]s. - /// - /// This will _never_ fail if the Arrow array's datatype matches the one returned by - /// [`Loggable::arrow_field`]. #[inline] fn from_arrow(data: &dyn ::arrow2::array::Array) -> DeserializationResult> { re_tracing::profile_function!(); @@ -112,9 +93,6 @@ pub trait Loggable: 'static + Send + Sync + Clone + Sized + SizeBytes { } /// Given an Arrow array, deserializes it into a collection of optional [`Loggable`]s. - /// - /// This will _never_ fail if the Arrow array's datatype matches the one returned by - /// [`Loggable::arrow_field`]. fn from_arrow_opt( data: &dyn ::arrow2::array::Array, ) -> DeserializationResult>> { diff --git a/crates/store/re_types_core/src/loggable_batch.rs b/crates/store/re_types_core/src/loggable_batch.rs index d7a9d6b5e2b2..4e55d67fac50 100644 --- a/crates/store/re_types_core/src/loggable_batch.rs +++ b/crates/store/re_types_core/src/loggable_batch.rs @@ -27,9 +27,6 @@ pub trait LoggableBatch { /// The number of component instances stored into this batch. fn num_instances(&self) -> usize; - /// The underlying [`arrow2::datatypes::Field`], including datatype extensions. - fn arrow_field(&self) -> arrow2::datatypes::Field; - /// Serializes the batch into an Arrow array. fn to_arrow(&self) -> SerializationResult>; } @@ -98,11 +95,6 @@ impl<'a> LoggableBatch for MaybeOwnedComponentBatch<'a> { self.as_ref().num_instances() } - #[inline] - fn arrow_field(&self) -> arrow2::datatypes::Field { - self.as_ref().arrow_field() - } - #[inline] fn to_arrow(&self) -> SerializationResult> { self.as_ref().to_arrow() @@ -126,11 +118,6 @@ impl LoggableBatch for L { 1 } - #[inline] - fn arrow_field(&self) -> arrow2::datatypes::Field { - L::arrow_field() - } - #[inline] fn to_arrow(&self) -> SerializationResult> { L::to_arrow([std::borrow::Cow::Borrowed(self)]) @@ -154,11 +141,6 @@ impl LoggableBatch for Option { self.is_some() as usize } - #[inline] - fn arrow_field(&self) -> arrow2::datatypes::Field { - L::arrow_field() - } - #[inline] fn to_arrow(&self) -> SerializationResult> { L::to_arrow(self.iter().map(|v| std::borrow::Cow::Borrowed(v))) @@ -182,11 +164,6 @@ impl LoggableBatch for Vec { self.len() } - #[inline] - fn arrow_field(&self) -> arrow2::datatypes::Field { - L::arrow_field() - } - #[inline] fn to_arrow(&self) -> SerializationResult> { L::to_arrow(self.iter().map(|v| std::borrow::Cow::Borrowed(v))) @@ -210,11 +187,6 @@ impl LoggableBatch for Vec> { self.len() } - #[inline] - fn arrow_field(&self) -> arrow2::datatypes::Field { - L::arrow_field() - } - #[inline] fn to_arrow(&self) -> SerializationResult> { L::to_arrow_opt( @@ -241,11 +213,6 @@ impl LoggableBatch for [L; N] { N } - #[inline] - fn arrow_field(&self) -> arrow2::datatypes::Field { - L::arrow_field() - } - #[inline] fn to_arrow(&self) -> SerializationResult> { L::to_arrow(self.iter().map(|v| std::borrow::Cow::Borrowed(v))) @@ -269,11 +236,6 @@ impl LoggableBatch for [Option; N] { N } - #[inline] - fn arrow_field(&self) -> arrow2::datatypes::Field { - L::arrow_field() - } - #[inline] fn to_arrow(&self) -> SerializationResult> { L::to_arrow_opt( @@ -300,11 +262,6 @@ impl<'a, L: Loggable> LoggableBatch for &'a [L] { self.len() } - #[inline] - fn arrow_field(&self) -> arrow2::datatypes::Field { - L::arrow_field() - } - #[inline] fn to_arrow(&self) -> SerializationResult> { L::to_arrow(self.iter().map(|v| std::borrow::Cow::Borrowed(v))) @@ -328,11 +285,6 @@ impl<'a, L: Loggable> LoggableBatch for &'a [Option] { self.len() } - #[inline] - fn arrow_field(&self) -> arrow2::datatypes::Field { - L::arrow_field() - } - #[inline] fn to_arrow(&self) -> SerializationResult> { L::to_arrow_opt( @@ -359,11 +311,6 @@ impl<'a, L: Loggable, const N: usize> LoggableBatch for &'a [L; N] { N } - #[inline] - fn arrow_field(&self) -> arrow2::datatypes::Field { - L::arrow_field() - } - #[inline] fn to_arrow(&self) -> SerializationResult> { L::to_arrow(self.iter().map(|v| std::borrow::Cow::Borrowed(v))) @@ -387,11 +334,6 @@ impl<'a, L: Loggable, const N: usize> LoggableBatch for &'a [Option; N] { N } - #[inline] - fn arrow_field(&self) -> arrow2::datatypes::Field { - L::arrow_field() - } - #[inline] fn to_arrow(&self) -> SerializationResult> { L::to_arrow_opt(