From ce890c56f7bd2372476186b7b950f1c6fad8e6f3 Mon Sep 17 00:00:00 2001 From: Emil Ernerfeldt Date: Wed, 28 Aug 2024 18:54:58 +0200 Subject: [PATCH] Revert "Remove `Loggable{Batch}::arrow_field` (#7257)" This reverts commit e9de566fe7f9876306f7e8112b80ee34fa711d85. --- crates/store/re_types_core/src/archetype.rs | 42 ++++++++++++-- crates/store/re_types_core/src/lib.rs | 9 +-- crates/store/re_types_core/src/loggable.rs | 22 +++++++ .../store/re_types_core/src/loggable_batch.rs | 58 +++++++++++++++++++ 4 files changed, 119 insertions(+), 12 deletions(-) diff --git a/crates/store/re_types_core/src/archetype.rs b/crates/store/re_types_core/src/archetype.rs index c5e9fb0b3ad4..dff626618ab1 100644 --- a/crates/store/re_types_core/src/archetype.rs +++ b/crates/store/re_types_core/src/archetype.rs @@ -1,3 +1,5 @@ +use std::sync::Arc; + use crate::{ ComponentBatch, ComponentName, DeserializationResult, MaybeOwnedComponentBatch, SerializationResult, _Backtrace, @@ -218,10 +220,26 @@ impl crate::LoggableBatch for GenericIndicatorComponent { 1 } + #[inline] + fn arrow_field(&self) -> arrow2::datatypes::Field { + let name = self.name().to_string(); + arrow2::datatypes::Field::new( + name.clone(), + arrow2::datatypes::DataType::Extension( + name, + Arc::new(arrow2::datatypes::DataType::Null), + None, + ), + false, + ) + } + #[inline] fn to_arrow(&self) -> SerializationResult> { - let datatype = arrow2::datatypes::DataType::Null; - Ok(arrow2::array::NullArray::new(datatype, self.num_instances()).boxed()) + Ok( + arrow2::array::NullArray::new(arrow2::datatypes::DataType::Null, self.num_instances()) + .boxed(), + ) } } @@ -260,10 +278,26 @@ impl crate::LoggableBatch for NamedIndicatorComponent { 1 } + #[inline] + fn arrow_field(&self) -> arrow2::datatypes::Field { + let name = self.name().to_string(); + arrow2::datatypes::Field::new( + name.clone(), + arrow2::datatypes::DataType::Extension( + name, + Arc::new(arrow2::datatypes::DataType::Null), + None, + ), + false, + ) + } + #[inline] fn to_arrow(&self) -> SerializationResult> { - let datatype = arrow2::datatypes::DataType::Null; - Ok(arrow2::array::NullArray::new(datatype, self.num_instances()).boxed()) + Ok( + arrow2::array::NullArray::new(arrow2::datatypes::DataType::Null, self.num_instances()) + .boxed(), + ) } } diff --git a/crates/store/re_types_core/src/lib.rs b/crates/store/re_types_core/src/lib.rs index 443e0254d0a0..8a57f641a96d 100644 --- a/crates/store/re_types_core/src/lib.rs +++ b/crates/store/re_types_core/src/lib.rs @@ -68,14 +68,7 @@ pub trait AsComponents { comp_batch .as_ref() .to_arrow() - .map(|array| { - let field = arrow2::datatypes::Field::new( - comp_batch.name().to_string(), - array.data_type().clone(), - false, - ); - (field, array) - }) + .map(|array| (comp_batch.as_ref().arrow_field(), array)) .with_context(comp_batch.as_ref().name()) }) .collect() diff --git a/crates/store/re_types_core/src/loggable.rs b/crates/store/re_types_core/src/loggable.rs index b574ce7f92c2..cd84f6d9ca56 100644 --- a/crates/store/re_types_core/src/loggable.rs +++ b/crates/store/re_types_core/src/loggable.rs @@ -31,6 +31,7 @@ pub trait Loggable: 'static + Send + Sync + Clone + Sized + SizeBytes { /// Given an iterator of options of owned or reference values to the current /// [`Loggable`], serializes them into an Arrow array. + /// The Arrow array's datatype will match [`Loggable::arrow_field`]. /// /// When using Rerun's builtin components & datatypes, this can only fail if the data /// exceeds the maximum number of entries in an Arrow array (2^31 for standard arrays, @@ -56,10 +57,25 @@ pub trait Loggable: 'static + Send + Sync + Clone + Sized + SizeBytes { ) } + /// The underlying [`arrow2::datatypes::Field`], including datatype extensions. + /// + /// The default implementation will simply wrap [`Self::extended_arrow_datatype`] in a + /// [`arrow2::datatypes::Field`], which is what you want in most cases (e.g. because you want + /// to declare the field as nullable). + #[inline] + fn arrow_field() -> arrow2::datatypes::Field { + arrow2::datatypes::Field::new( + Self::name().to_string(), + Self::extended_arrow_datatype(), + false, + ) + } + // --- Optional serialization methods --- /// Given an iterator of owned or reference values to the current [`Loggable`], serializes /// them into an Arrow array. + /// The Arrow array's datatype will match [`Loggable::arrow_field`]. /// /// When using Rerun's builtin components & datatypes, this can only fail if the data /// exceeds the maximum number of entries in an Arrow array (2^31 for standard arrays, @@ -78,6 +94,9 @@ pub trait Loggable: 'static + Send + Sync + Clone + Sized + SizeBytes { // --- Optional deserialization methods --- /// Given an Arrow array, deserializes it into a collection of [`Loggable`]s. + /// + /// This will _never_ fail if the Arrow array's datatype matches the one returned by + /// [`Loggable::arrow_field`]. #[inline] fn from_arrow(data: &dyn ::arrow2::array::Array) -> DeserializationResult> { re_tracing::profile_function!(); @@ -93,6 +112,9 @@ pub trait Loggable: 'static + Send + Sync + Clone + Sized + SizeBytes { } /// Given an Arrow array, deserializes it into a collection of optional [`Loggable`]s. + /// + /// This will _never_ fail if the Arrow array's datatype matches the one returned by + /// [`Loggable::arrow_field`]. fn from_arrow_opt( data: &dyn ::arrow2::array::Array, ) -> DeserializationResult>> { diff --git a/crates/store/re_types_core/src/loggable_batch.rs b/crates/store/re_types_core/src/loggable_batch.rs index 4e55d67fac50..d7a9d6b5e2b2 100644 --- a/crates/store/re_types_core/src/loggable_batch.rs +++ b/crates/store/re_types_core/src/loggable_batch.rs @@ -27,6 +27,9 @@ pub trait LoggableBatch { /// The number of component instances stored into this batch. fn num_instances(&self) -> usize; + /// The underlying [`arrow2::datatypes::Field`], including datatype extensions. + fn arrow_field(&self) -> arrow2::datatypes::Field; + /// Serializes the batch into an Arrow array. fn to_arrow(&self) -> SerializationResult>; } @@ -95,6 +98,11 @@ impl<'a> LoggableBatch for MaybeOwnedComponentBatch<'a> { self.as_ref().num_instances() } + #[inline] + fn arrow_field(&self) -> arrow2::datatypes::Field { + self.as_ref().arrow_field() + } + #[inline] fn to_arrow(&self) -> SerializationResult> { self.as_ref().to_arrow() @@ -118,6 +126,11 @@ impl LoggableBatch for L { 1 } + #[inline] + fn arrow_field(&self) -> arrow2::datatypes::Field { + L::arrow_field() + } + #[inline] fn to_arrow(&self) -> SerializationResult> { L::to_arrow([std::borrow::Cow::Borrowed(self)]) @@ -141,6 +154,11 @@ impl LoggableBatch for Option { self.is_some() as usize } + #[inline] + fn arrow_field(&self) -> arrow2::datatypes::Field { + L::arrow_field() + } + #[inline] fn to_arrow(&self) -> SerializationResult> { L::to_arrow(self.iter().map(|v| std::borrow::Cow::Borrowed(v))) @@ -164,6 +182,11 @@ impl LoggableBatch for Vec { self.len() } + #[inline] + fn arrow_field(&self) -> arrow2::datatypes::Field { + L::arrow_field() + } + #[inline] fn to_arrow(&self) -> SerializationResult> { L::to_arrow(self.iter().map(|v| std::borrow::Cow::Borrowed(v))) @@ -187,6 +210,11 @@ impl LoggableBatch for Vec> { self.len() } + #[inline] + fn arrow_field(&self) -> arrow2::datatypes::Field { + L::arrow_field() + } + #[inline] fn to_arrow(&self) -> SerializationResult> { L::to_arrow_opt( @@ -213,6 +241,11 @@ impl LoggableBatch for [L; N] { N } + #[inline] + fn arrow_field(&self) -> arrow2::datatypes::Field { + L::arrow_field() + } + #[inline] fn to_arrow(&self) -> SerializationResult> { L::to_arrow(self.iter().map(|v| std::borrow::Cow::Borrowed(v))) @@ -236,6 +269,11 @@ impl LoggableBatch for [Option; N] { N } + #[inline] + fn arrow_field(&self) -> arrow2::datatypes::Field { + L::arrow_field() + } + #[inline] fn to_arrow(&self) -> SerializationResult> { L::to_arrow_opt( @@ -262,6 +300,11 @@ impl<'a, L: Loggable> LoggableBatch for &'a [L] { self.len() } + #[inline] + fn arrow_field(&self) -> arrow2::datatypes::Field { + L::arrow_field() + } + #[inline] fn to_arrow(&self) -> SerializationResult> { L::to_arrow(self.iter().map(|v| std::borrow::Cow::Borrowed(v))) @@ -285,6 +328,11 @@ impl<'a, L: Loggable> LoggableBatch for &'a [Option] { self.len() } + #[inline] + fn arrow_field(&self) -> arrow2::datatypes::Field { + L::arrow_field() + } + #[inline] fn to_arrow(&self) -> SerializationResult> { L::to_arrow_opt( @@ -311,6 +359,11 @@ impl<'a, L: Loggable, const N: usize> LoggableBatch for &'a [L; N] { N } + #[inline] + fn arrow_field(&self) -> arrow2::datatypes::Field { + L::arrow_field() + } + #[inline] fn to_arrow(&self) -> SerializationResult> { L::to_arrow(self.iter().map(|v| std::borrow::Cow::Borrowed(v))) @@ -334,6 +387,11 @@ impl<'a, L: Loggable, const N: usize> LoggableBatch for &'a [Option; N] { N } + #[inline] + fn arrow_field(&self) -> arrow2::datatypes::Field { + L::arrow_field() + } + #[inline] fn to_arrow(&self) -> SerializationResult> { L::to_arrow_opt(