diff --git a/crates/store/re_chunk/src/iter.rs b/crates/store/re_chunk/src/iter.rs index 571c5995c48b..1534d9ac0a28 100644 --- a/crates/store/re_chunk/src/iter.rs +++ b/crates/store/re_chunk/src/iter.rs @@ -710,7 +710,7 @@ impl Chunk { Err(err) => { if cfg!(debug_assertions) { panic!( - "deserialization failed for {}, data discarded: {}", + "[DEBUG-ONLY] deserialization failed for {}, data discarded: {}", C::name(), re_error::format_ref(&err), ); diff --git a/crates/store/re_types/definitions/rerun/datatypes.fbs b/crates/store/re_types/definitions/rerun/datatypes.fbs index 5772f4c3b8d5..fa325413e155 100644 --- a/crates/store/re_types/definitions/rerun/datatypes.fbs +++ b/crates/store/re_types/definitions/rerun/datatypes.fbs @@ -27,7 +27,6 @@ include "./datatypes/rgba32.fbs"; include "./datatypes/rotation_axis_angle.fbs"; include "./datatypes/tensor_buffer.fbs"; include "./datatypes/tensor_data.fbs"; -include "./datatypes/tensor_dimension.fbs"; include "./datatypes/tensor_dimension_selection.fbs"; include "./datatypes/time_int.fbs"; include "./datatypes/uint16.fbs"; diff --git a/crates/store/re_types/definitions/rerun/datatypes/tensor_data.fbs b/crates/store/re_types/definitions/rerun/datatypes/tensor_data.fbs index 1624be6e00f3..6301c33c7abb 100644 --- a/crates/store/re_types/definitions/rerun/datatypes/tensor_data.fbs +++ b/crates/store/re_types/definitions/rerun/datatypes/tensor_data.fbs @@ -19,8 +19,17 @@ table TensorData ( "attr.python.array_aliases": "npt.ArrayLike", "attr.rust.derive": "PartialEq," ) { - /// The shape of the tensor, including optional names for each dimension. - shape: [rerun.datatypes.TensorDimension] (order: 200); + /// The shape of the tensor, i.e. the length of each dimension. + shape: [uint64] (order: 200); + + /// The names of the dimensions of the tensor (optional). + /// + /// If set, should be the same length as [datatypes.TensorData.shape]. + /// If it has a different length your names may show up improperly, + /// and some constructors may produce a warning or even an error. + /// + /// Example: `["height", "width", "channel", "batch"]`. + names: [string] (order: 250, nullable); /// The content/data. buffer: rerun.datatypes.TensorBuffer (order: 300); diff --git a/crates/store/re_types/definitions/rerun/datatypes/tensor_dimension.fbs b/crates/store/re_types/definitions/rerun/datatypes/tensor_dimension.fbs deleted file mode 100644 index 7d755a8b81e5..000000000000 --- a/crates/store/re_types/definitions/rerun/datatypes/tensor_dimension.fbs +++ /dev/null @@ -1,15 +0,0 @@ -namespace rerun.datatypes; - -// --- - -/// A single dimension within a multi-dimensional tensor. -// TODO(jleibs): Support for stride. -table TensorDimension ( - "attr.rust.derive_only": "Clone, Default, Eq, PartialEq" -) { - /// The length of this dimension. - size: ulong (order: 100); - - /// The name of this dimension, e.g. "width", "height", "channel", "batch', …. - name: string (order: 200, nullable); -} diff --git a/crates/store/re_types/src/archetypes/depth_image_ext.rs b/crates/store/re_types/src/archetypes/depth_image_ext.rs index b1c7044c0914..af72285a3edd 100644 --- a/crates/store/re_types/src/archetypes/depth_image_ext.rs +++ b/crates/store/re_types/src/archetypes/depth_image_ext.rs @@ -20,7 +20,7 @@ impl DepthImage { let tensor_data: TensorData = data .try_into() .map_err(ImageConstructionError::TensorDataConversion)?; - let shape = tensor_data.shape; + let TensorData { shape, buffer, .. } = tensor_data; let non_empty_dim_inds = find_non_empty_dim_indices(&shape); @@ -28,13 +28,11 @@ impl DepthImage { return Err(ImageConstructionError::BadImageShape(shape)); } - let (blob, datatype) = blob_and_datatype_from_tensor(tensor_data.buffer); + let (blob, datatype) = blob_and_datatype_from_tensor(buffer); - let (height, width) = (&shape[non_empty_dim_inds[0]], &shape[non_empty_dim_inds[1]]); - let height = height.size as u32; - let width = width.size as u32; + let (height, width) = (shape[non_empty_dim_inds[0]], shape[non_empty_dim_inds[1]]); - let image_format = ImageFormat::depth([width, height], datatype); + let image_format = ImageFormat::depth([width as u32, height as u32], datatype); Ok(Self { buffer: blob.into(), diff --git a/crates/store/re_types/src/archetypes/image_ext.rs b/crates/store/re_types/src/archetypes/image_ext.rs index 1beff5061887..0304cecb27fe 100644 --- a/crates/store/re_types/src/archetypes/image_ext.rs +++ b/crates/store/re_types/src/archetypes/image_ext.rs @@ -33,17 +33,17 @@ impl Image { let tensor_data: TensorData = data .try_into() .map_err(ImageConstructionError::TensorDataConversion)?; - let shape = tensor_data.shape; + let TensorData { shape, buffer, .. } = tensor_data; let non_empty_dim_inds = find_non_empty_dim_indices(&shape); let is_shape_correct = match color_model { ColorModel::L => non_empty_dim_inds.len() == 2, ColorModel::RGB | ColorModel::BGR => { - non_empty_dim_inds.len() == 3 && shape[non_empty_dim_inds[2]].size == 3 + non_empty_dim_inds.len() == 3 && shape[non_empty_dim_inds[2]] == 3 } ColorModel::RGBA | ColorModel::BGRA => { - non_empty_dim_inds.len() == 3 && shape[non_empty_dim_inds[2]].size == 4 + non_empty_dim_inds.len() == 3 && shape[non_empty_dim_inds[2]] == 4 } }; @@ -51,15 +51,13 @@ impl Image { return Err(ImageConstructionError::BadImageShape(shape)); } - let (blob, datatype) = blob_and_datatype_from_tensor(tensor_data.buffer); + let (blob, datatype) = blob_and_datatype_from_tensor(buffer); - let (height, width) = (&shape[non_empty_dim_inds[0]], &shape[non_empty_dim_inds[1]]); - let height = height.size as u32; - let width = width.size as u32; + let (height, width) = (shape[non_empty_dim_inds[0]], shape[non_empty_dim_inds[1]]); let image_format = ImageFormat { - width, - height, + width: width as _, + height: height as _, pixel_format: None, channel_datatype: Some(datatype), color_model: Some(color_model), diff --git a/crates/store/re_types/src/archetypes/segmentation_image_ext.rs b/crates/store/re_types/src/archetypes/segmentation_image_ext.rs index 4c2f71375b39..a90856eb24d3 100644 --- a/crates/store/re_types/src/archetypes/segmentation_image_ext.rs +++ b/crates/store/re_types/src/archetypes/segmentation_image_ext.rs @@ -19,7 +19,7 @@ impl SegmentationImage { let tensor_data: TensorData = data .try_into() .map_err(ImageConstructionError::TensorDataConversion)?; - let shape = tensor_data.shape; + let TensorData { shape, buffer, .. } = tensor_data; let non_empty_dim_inds = find_non_empty_dim_indices(&shape); @@ -27,13 +27,11 @@ impl SegmentationImage { return Err(ImageConstructionError::BadImageShape(shape)); } - let (blob, datatype) = blob_and_datatype_from_tensor(tensor_data.buffer); + let (blob, datatype) = blob_and_datatype_from_tensor(buffer); - let (height, width) = (&shape[non_empty_dim_inds[0]], &shape[non_empty_dim_inds[1]]); - let height = height.size as u32; - let width = width.size as u32; + let (height, width) = (shape[non_empty_dim_inds[0]], shape[non_empty_dim_inds[1]]); - let image_format = ImageFormat::segmentation([width, height], datatype); + let image_format = ImageFormat::segmentation([width as _, height as _], datatype); Ok(Self { buffer: blob.into(), diff --git a/crates/store/re_types/src/archetypes/tensor_ext.rs b/crates/store/re_types/src/archetypes/tensor_ext.rs index c7063b0b6264..2bfa7c1f08ef 100644 --- a/crates/store/re_types/src/archetypes/tensor_ext.rs +++ b/crates/store/re_types/src/archetypes/tensor_ext.rs @@ -1,4 +1,4 @@ -use crate::datatypes::{TensorData, TensorDimension}; +use crate::datatypes::TensorData; use re_types_core::ArrowString; @@ -24,37 +24,16 @@ impl Tensor { /// Update the `names` of the contained [`TensorData`] dimensions. /// - /// Any existing Dimension names will be overwritten. + /// Any existing names will be overwritten. /// - /// If too many, or too few names are provided, this function will warn and only - /// update the subset of names that it can. - pub fn with_dim_names(self, names: impl IntoIterator>) -> Self { - let names: Vec<_> = names.into_iter().map(|x| Some(x.into())).collect(); - if names.len() != self.data.0.shape.len() { - re_log::warn_once!( - "Wrong number of names provided for tensor dimension. {} provided but {} expected.", - names.len(), - self.data.0.shape.len(), - ); - } - Self { - data: TensorData { - shape: self - .data - .0 - .shape - .into_iter() - .zip(names.into_iter().chain(std::iter::repeat(None))) - .map(|(dim, name)| TensorDimension { - size: dim.size, - name: name.or(dim.name), - }) - .collect(), - buffer: self.data.0.buffer, - } - .into(), - value_range: None, - } + /// If the wrong number of names are given, a warning will be logged, + /// and the names might not show up correctly. + pub fn with_dim_names( + mut self, + names: impl IntoIterator>, + ) -> Self { + self.data.0 = self.data.0.with_dim_names(names); + self } } diff --git a/crates/store/re_types/src/datatypes/.gitattributes b/crates/store/re_types/src/datatypes/.gitattributes index 289cbd45793c..5995b6d6c716 100644 --- a/crates/store/re_types/src/datatypes/.gitattributes +++ b/crates/store/re_types/src/datatypes/.gitattributes @@ -25,7 +25,6 @@ rgba32.rs linguist-generated=true rotation_axis_angle.rs linguist-generated=true tensor_buffer.rs linguist-generated=true tensor_data.rs linguist-generated=true -tensor_dimension.rs linguist-generated=true tensor_dimension_index_selection.rs linguist-generated=true tensor_dimension_selection.rs linguist-generated=true utf8pair.rs linguist-generated=true diff --git a/crates/store/re_types/src/datatypes/mod.rs b/crates/store/re_types/src/datatypes/mod.rs index d1edf377eb08..f18d728fd794 100644 --- a/crates/store/re_types/src/datatypes/mod.rs +++ b/crates/store/re_types/src/datatypes/mod.rs @@ -46,8 +46,6 @@ mod tensor_buffer; mod tensor_buffer_ext; mod tensor_data; mod tensor_data_ext; -mod tensor_dimension; -mod tensor_dimension_ext; mod tensor_dimension_index_selection; mod tensor_dimension_selection; mod tensor_dimension_selection_ext; @@ -95,7 +93,6 @@ pub use self::rgba32::Rgba32; pub use self::rotation_axis_angle::RotationAxisAngle; pub use self::tensor_buffer::TensorBuffer; pub use self::tensor_data::TensorData; -pub use self::tensor_dimension::TensorDimension; pub use self::tensor_dimension_index_selection::TensorDimensionIndexSelection; pub use self::tensor_dimension_selection::TensorDimensionSelection; pub use self::utf8pair::Utf8Pair; diff --git a/crates/store/re_types/src/datatypes/tensor_data.rs b/crates/store/re_types/src/datatypes/tensor_data.rs index 0d19cb6190a6..4d75cc0c12e3 100644 --- a/crates/store/re_types/src/datatypes/tensor_data.rs +++ b/crates/store/re_types/src/datatypes/tensor_data.rs @@ -28,8 +28,17 @@ use ::re_types_core::{DeserializationError, DeserializationResult}; /// which stores a contiguous array of typed values. #[derive(Clone, Debug, PartialEq)] pub struct TensorData { - /// The shape of the tensor, including optional names for each dimension. - pub shape: Vec, + /// The shape of the tensor, i.e. the length of each dimension. + pub shape: ::re_types_core::ArrowBuffer, + + /// The names of the dimensions of the tensor (optional). + /// + /// If set, should be the same length as [`datatypes::TensorData::shape`][crate::datatypes::TensorData::shape]. + /// If it has a different length your names may show up improperly, + /// and some constructors may produce a warning or even an error. + /// + /// Example: `["height", "width", "channel", "batch"]`. + pub names: Option>, /// The content/data. pub buffer: crate::datatypes::TensorBuffer, @@ -47,11 +56,20 @@ impl ::re_types_core::Loggable for TensorData { "shape", DataType::List(std::sync::Arc::new(Field::new( "item", - ::arrow_datatype(), + DataType::UInt64, false, ))), false, ), + Field::new( + "names", + DataType::List(std::sync::Arc::new(Field::new( + "item", + DataType::Utf8, + false, + ))), + true, + ), Field::new( "buffer", ::arrow_datatype(), @@ -76,11 +94,20 @@ impl ::re_types_core::Loggable for TensorData { "shape", DataType::List(std::sync::Arc::new(Field::new( "item", - ::arrow_datatype(), + DataType::UInt64, false, ))), false, ), + Field::new( + "names", + DataType::List(std::sync::Arc::new(Field::new( + "item", + DataType::Utf8, + false, + ))), + true, + ), Field::new( "buffer", ::arrow_datatype(), @@ -113,29 +140,73 @@ impl ::re_types_core::Loggable for TensorData { let any_nones = somes.iter().any(|some| !*some); any_nones.then(|| somes.into()) }; + { + let offsets = + arrow::buffer::OffsetBuffer::::from_lengths(shape.iter().map( + |opt| opt.as_ref().map_or(0, |datum| datum.num_instances()), + )); + let shape_inner_data: ScalarBuffer<_> = shape + .iter() + .flatten() + .map(|b| b.as_slice()) + .collect::>() + .concat() + .into(); + let shape_inner_validity: Option = None; + as_array_ref(ListArray::try_new( + std::sync::Arc::new(Field::new("item", DataType::UInt64, false)), + offsets, + as_array_ref(PrimitiveArray::::new( + shape_inner_data, + shape_inner_validity, + )), + shape_validity, + )?) + } + }, + { + let (somes, names): (Vec<_>, Vec<_>) = data + .iter() + .map(|datum| { + let datum = + datum.as_ref().map(|datum| datum.names.clone()).flatten(); + (datum.is_some(), datum) + }) + .unzip(); + let names_validity: Option = { + let any_nones = somes.iter().any(|some| !*some); + any_nones.then(|| somes.into()) + }; { let offsets = arrow::buffer::OffsetBuffer::::from_lengths( - shape + names .iter() .map(|opt| opt.as_ref().map_or(0, |datum| datum.len())), ); - let shape_inner_data: Vec<_> = - shape.into_iter().flatten().flatten().collect(); - let shape_inner_validity: Option = None; + let names_inner_data: Vec<_> = + names.into_iter().flatten().flatten().collect(); + let names_inner_validity: Option = None; as_array_ref(ListArray::try_new( - std::sync::Arc::new(Field::new( - "item", - ::arrow_datatype(), - false, - )), + std::sync::Arc::new(Field::new("item", DataType::Utf8, false)), offsets, { - _ = shape_inner_validity; - crate::datatypes::TensorDimension::to_arrow_opt( - shape_inner_data.into_iter().map(Some), - )? + let offsets = arrow::buffer::OffsetBuffer::::from_lengths( + names_inner_data.iter().map(|datum| datum.len()), + ); + let inner_data: arrow::buffer::Buffer = names_inner_data + .into_iter() + .flat_map(|s| s.into_arrow2_buffer()) + .collect(); + #[allow(unsafe_code, clippy::undocumented_unsafe_blocks)] + as_array_ref(unsafe { + StringArray::new_unchecked( + offsets, + inner_data, + names_inner_validity, + ) + }) }, - shape_validity, + names_validity, )?) } }, @@ -208,7 +279,7 @@ impl ::re_types_core::Loggable for TensorData { .ok_or_else(|| { let expected = DataType::List(std::sync::Arc::new(Field::new( "item", - ::arrow_datatype(), + DataType::UInt64, false, ))); let actual = arrow_data.data_type().clone(); @@ -220,10 +291,133 @@ impl ::re_types_core::Loggable for TensorData { } else { let arrow_data_inner = { let arrow_data_inner = &**arrow_data.values(); - crate::datatypes::TensorDimension::from_arrow2_opt(arrow_data_inner) + arrow_data_inner + .as_any() + .downcast_ref::() + .ok_or_else(|| { + let expected = DataType::UInt64; + let actual = arrow_data_inner.data_type().clone(); + DeserializationError::datatype_mismatch(expected, actual) + }) .with_context("rerun.datatypes.TensorData#shape")? + .values() + }; + let offsets = arrow_data.offsets(); + arrow2::bitmap::utils::ZipValidity::new_with_validity( + offsets.windows(2), + arrow_data.validity(), + ) + .map(|elem| { + elem.map(|window| { + let start = window[0] as usize; + let end = window[1] as usize; + if arrow_data_inner.len() < end { + return Err(DeserializationError::offset_slice_oob( + (start, end), + arrow_data_inner.len(), + )); + } + + #[allow(unsafe_code, clippy::undocumented_unsafe_blocks)] + let data = unsafe { + arrow_data_inner + .clone() + .sliced_unchecked(start, end - start) + }; + let data = ::re_types_core::ArrowBuffer::from(data); + Ok(data) + }) + .transpose() + }) + .collect::>>>()? + } + .into_iter() + } + }; + let names = { + if !arrays_by_name.contains_key("names") { + return Err(DeserializationError::missing_struct_field( + Self::arrow_datatype(), + "names", + )) + .with_context("rerun.datatypes.TensorData"); + } + let arrow_data = &**arrays_by_name["names"]; + { + let arrow_data = arrow_data + .as_any() + .downcast_ref::>() + .ok_or_else(|| { + let expected = DataType::List(std::sync::Arc::new(Field::new( + "item", + DataType::Utf8, + false, + ))); + let actual = arrow_data.data_type().clone(); + DeserializationError::datatype_mismatch(expected, actual) + }) + .with_context("rerun.datatypes.TensorData#names")?; + if arrow_data.is_empty() { + Vec::new() + } else { + let arrow_data_inner = { + let arrow_data_inner = &**arrow_data.values(); + { + let arrow_data_inner = arrow_data_inner + .as_any() + .downcast_ref::>() + .ok_or_else(|| { + let expected = DataType::Utf8; + let actual = arrow_data_inner.data_type().clone(); + DeserializationError::datatype_mismatch( + expected, actual, + ) + }) + .with_context("rerun.datatypes.TensorData#names")?; + let arrow_data_inner_buf = arrow_data_inner.values(); + let offsets = arrow_data_inner.offsets(); + arrow2::bitmap::utils::ZipValidity::new_with_validity( + offsets.windows(2), + arrow_data_inner.validity(), + ) + .map(|elem| { + elem.map(|window| { + let start = window[0] as usize; + let end = window[1] as usize; + let len = end - start; + if arrow_data_inner_buf.len() < end { + return Err( + DeserializationError::offset_slice_oob( + (start, end), + arrow_data_inner_buf.len(), + ), + ); + } + + #[allow( + unsafe_code, + clippy::undocumented_unsafe_blocks + )] + let data = unsafe { + arrow_data_inner_buf + .clone() + .sliced_unchecked(start, len) + }; + Ok(data) + }) + .transpose() + }) + .map(|res_or_opt| { + res_or_opt.map(|res_or_opt| { + res_or_opt + .map(|v| ::re_types_core::ArrowString::from(v)) + }) + }) + .collect::>>>() + .with_context("rerun.datatypes.TensorData#names")? .into_iter() - .collect::>() + } + .collect::>() }; let offsets = arrow_data.offsets(); arrow2::bitmap::utils::ZipValidity::new_with_validity( @@ -272,15 +466,16 @@ impl ::re_types_core::Loggable for TensorData { .into_iter() }; arrow2::bitmap::utils::ZipValidity::new_with_validity( - ::itertools::izip!(shape, buffer), + ::itertools::izip!(shape, names, buffer), arrow_data.validity(), ) .map(|opt| { - opt.map(|(shape, buffer)| { + opt.map(|(shape, names, buffer)| { Ok(Self { shape: shape .ok_or_else(DeserializationError::missing_data) .with_context("rerun.datatypes.TensorData#shape")?, + names, buffer: buffer .ok_or_else(DeserializationError::missing_data) .with_context("rerun.datatypes.TensorData#buffer")?, @@ -298,12 +493,13 @@ impl ::re_types_core::Loggable for TensorData { impl ::re_types_core::SizeBytes for TensorData { #[inline] fn heap_size_bytes(&self) -> u64 { - self.shape.heap_size_bytes() + self.buffer.heap_size_bytes() + self.shape.heap_size_bytes() + self.names.heap_size_bytes() + self.buffer.heap_size_bytes() } #[inline] fn is_pod() -> bool { - >::is_pod() + <::re_types_core::ArrowBuffer>::is_pod() + && >>::is_pod() && ::is_pod() } } diff --git a/crates/store/re_types/src/datatypes/tensor_data_ext.rs b/crates/store/re_types/src/datatypes/tensor_data_ext.rs index bca0ddde0c2d..537368d08ace 100644 --- a/crates/store/re_types/src/datatypes/tensor_data_ext.rs +++ b/crates/store/re_types/src/datatypes/tensor_data_ext.rs @@ -1,3 +1,5 @@ +use re_types_core::{ArrowBuffer, ArrowString}; + use crate::tensor_data::{TensorCastError, TensorDataType, TensorElement}; #[cfg(feature = "image")] @@ -6,33 +8,69 @@ use crate::tensor_data::TensorImageLoadError; #[allow(unused_imports)] // Used for docstring links use crate::archetypes::EncodedImage; -use super::{TensorBuffer, TensorData, TensorDimension}; +use super::{TensorBuffer, TensorData}; // ---------------------------------------------------------------------------- impl TensorData { /// Create a new tensor. #[inline] - pub fn new(shape: Vec, buffer: TensorBuffer) -> Self { - Self { shape, buffer } + pub fn new(shape: impl Into>, buffer: TensorBuffer) -> Self { + Self { + shape: shape.into(), + names: None, + buffer, + } } - /// The shape of the tensor, including optional dimension names. + /// Set the names of the dimensions to the provided names. + /// + /// Any existing names will be overwritten. + /// + /// If the wrong number of names are given, a warning will be logged, + /// and the names might not show up correctly. + pub fn with_dim_names( + mut self, + names: impl IntoIterator>, + ) -> Self { + let names: Vec = names.into_iter().map(|x| x.into()).collect(); + + if names.len() != self.shape.len() { + re_log::warn_once!( + "Wrong number of names provided for tensor dimension. {} provided but {} expected. The names will be ignored.", + names.len(), + self.shape.len(), + ); + } + + self.names = Some(names); + + self + } + + /// The shape of the tensor. #[inline] - pub fn shape(&self) -> &[TensorDimension] { + pub fn shape(&self) -> &[u64] { self.shape.as_slice() } + /// Get the name of a specific dimension. + /// + /// Returns `None` if the dimension does not have a name. + pub fn dim_name(&self, dim: usize) -> Option<&ArrowString> { + self.names.as_ref().and_then(|names| names.get(dim)) + } + /// Returns the shape of the tensor with all leading & trailing dimensions of size 1 ignored. /// /// If all dimension sizes are one, this returns only the first dimension. #[inline] - pub fn shape_short(&self) -> &[TensorDimension] { + pub fn shape_short(&self) -> &[u64] { if self.shape.is_empty() { &self.shape } else { - let first_not_one = self.shape.iter().position(|dim| dim.size != 1); - let last_not_one = self.shape.iter().rev().position(|dim| dim.size != 1); + let first_not_one = self.shape.iter().position(|&dim| dim != 1); + let last_not_one = self.shape.iter().rev().position(|&dim| dim != 1); &self.shape[first_not_one.unwrap_or(0)..self.shape.len() - last_not_one.unwrap_or(0)] } } @@ -53,7 +91,7 @@ impl TensorData { if self.shape.is_empty() { false } else { - self.shape.iter().filter(|dim| dim.size > 1).count() <= 1 + self.shape.iter().filter(|&&dim| dim > 1).count() <= 1 } } @@ -63,12 +101,12 @@ impl TensorData { pub fn get(&self, index: &[u64]) -> Option { let mut stride: usize = 1; let mut offset: usize = 0; - for (TensorDimension { size, .. }, index) in self.shape.iter().zip(index).rev() { + for (&size, &index) in self.shape.iter().zip(index).rev() { if size <= index { return None; } - offset += *index as usize * stride; - stride *= *size as usize; + offset += index as usize * stride; + stride *= size as usize; } match &self.buffer { @@ -103,7 +141,8 @@ impl Default for TensorData { #[inline] fn default() -> Self { Self { - shape: Vec::new(), + shape: Default::default(), + names: None, buffer: TensorBuffer::U8(Vec::new().into()), } } @@ -117,7 +156,7 @@ macro_rules! ndarray_from_tensor { type Error = TensorCastError; fn try_from(value: &'a TensorData) -> Result { - let shape: Vec<_> = value.shape.iter().map(|d| d.size as usize).collect(); + let shape: Vec = value.shape.iter().map(|&d| d as usize).collect(); if let TensorBuffer::$variant(data) = &value.buffer { ndarray::ArrayViewD::from_shape(shape, data.as_slice()) @@ -138,26 +177,17 @@ macro_rules! tensor_from_ndarray { type Error = TensorCastError; fn try_from(view: ::ndarray::ArrayView<'a, $type, D>) -> Result { - let shape = view - .shape() - .iter() - .map(|dim| TensorDimension { - size: *dim as u64, - name: None, - }) - .collect(); + let shape = ArrowBuffer::from_iter(view.shape().iter().map(|&dim| dim as u64)); match view.to_slice() { - Some(slice) => Ok(TensorData { + Some(slice) => Ok(TensorData::new( shape, - buffer: TensorBuffer::$variant(Vec::from(slice).into()), - }), - None => Ok(TensorData { + TensorBuffer::$variant(Vec::from(slice).into()), + )), + None => Ok(TensorData::new( shape, - buffer: TensorBuffer::$variant( - view.iter().cloned().collect::>().into(), - ), - }), + TensorBuffer::$variant(view.iter().cloned().collect::>().into()), + )), } } } @@ -166,14 +196,7 @@ macro_rules! tensor_from_ndarray { type Error = TensorCastError; fn try_from(value: ndarray::Array<$type, D>) -> Result { - let shape = value - .shape() - .iter() - .map(|dim| TensorDimension { - size: *dim as u64, - name: None, - }) - .collect(); + let shape = ArrowBuffer::from_iter(value.shape().iter().map(|&dim| dim as u64)); let vec = if value.is_standard_layout() { let (mut vec, offset) = value.into_raw_vec_and_offset(); @@ -190,28 +213,22 @@ macro_rules! tensor_from_ndarray { value.into_iter().collect::>() }; - Ok(Self { - shape, - buffer: TensorBuffer::$variant(vec.into()), - }) + Ok(Self::new(shape, TensorBuffer::$variant(vec.into()))) } } impl From> for TensorData { fn from(vec: Vec<$type>) -> Self { - TensorData { - shape: vec![TensorDimension::unnamed(vec.len() as u64)], - buffer: TensorBuffer::$variant(vec.into()), - } + Self::new(vec![vec.len() as u64], TensorBuffer::$variant(vec.into())) } } impl From<&[$type]> for TensorData { fn from(slice: &[$type]) -> Self { - TensorData { - shape: vec![TensorDimension::unnamed(slice.len() as u64)], - buffer: TensorBuffer::$variant(slice.into()), - } + Self::new( + vec![slice.len() as u64], + TensorBuffer::$variant(slice.into()), + ) } } }; @@ -247,7 +264,7 @@ impl<'a> TryFrom<&'a TensorData> for ::ndarray::ArrayViewD<'a, u8> { fn try_from(value: &'a TensorData) -> Result { match &value.buffer { TensorBuffer::U8(data) => { - let shape: Vec<_> = value.shape.iter().map(|d| d.size as usize).collect(); + let shape: Vec = value.shape.iter().map(|&d| d as usize).collect(); ndarray::ArrayViewD::from_shape(shape, bytemuck::cast_slice(data.as_slice())) .map_err(|err| TensorCastError::BadTensorShape { source: err }) } @@ -363,19 +380,15 @@ impl TensorData { )); } }; - let shape = if depth == 1 { - vec![ - TensorDimension::height(h as _), - TensorDimension::width(w as _), - ] + let (shape, names) = if depth == 1 { + (vec![h as _, w as _], vec!["height", "width"]) } else { - vec![ - TensorDimension::height(h as _), - TensorDimension::width(w as _), - TensorDimension::depth(depth), - ] + ( + vec![h as _, w as _, depth], + vec!["height", "width", "depth"], + ) }; - Ok(Self { shape, buffer }) + Ok(Self::new(shape, buffer).with_dim_names(names)) } } diff --git a/crates/store/re_types/src/datatypes/tensor_dimension.rs b/crates/store/re_types/src/datatypes/tensor_dimension.rs deleted file mode 100644 index 619c954eea2f..000000000000 --- a/crates/store/re_types/src/datatypes/tensor_dimension.rs +++ /dev/null @@ -1,266 +0,0 @@ -// DO NOT EDIT! This file was auto-generated by crates/build/re_types_builder/src/codegen/rust/api.rs -// Based on "crates/store/re_types/definitions/rerun/datatypes/tensor_dimension.fbs". - -#![allow(unused_imports)] -#![allow(unused_parens)] -#![allow(clippy::clone_on_copy)] -#![allow(clippy::cloned_instead_of_copied)] -#![allow(clippy::map_flatten)] -#![allow(clippy::needless_question_mark)] -#![allow(clippy::new_without_default)] -#![allow(clippy::redundant_closure)] -#![allow(clippy::too_many_arguments)] -#![allow(clippy::too_many_lines)] - -use ::re_types_core::external::arrow2; -use ::re_types_core::SerializationResult; -use ::re_types_core::{ComponentBatch, ComponentBatchCowWithDescriptor}; -use ::re_types_core::{ComponentDescriptor, ComponentName}; -use ::re_types_core::{DeserializationError, DeserializationResult}; - -/// **Datatype**: A single dimension within a multi-dimensional tensor. -#[derive(Clone, Default, Eq, PartialEq)] -pub struct TensorDimension { - /// The length of this dimension. - pub size: u64, - - /// The name of this dimension, e.g. "width", "height", "channel", "batch', …. - pub name: Option<::re_types_core::ArrowString>, -} - -::re_types_core::macros::impl_into_cow!(TensorDimension); - -impl ::re_types_core::Loggable for TensorDimension { - #[inline] - fn arrow_datatype() -> arrow::datatypes::DataType { - #![allow(clippy::wildcard_imports)] - use arrow::datatypes::*; - DataType::Struct(Fields::from(vec![ - Field::new("size", DataType::UInt64, false), - Field::new("name", DataType::Utf8, true), - ])) - } - - fn to_arrow_opt<'a>( - data: impl IntoIterator>>>, - ) -> SerializationResult - where - Self: Clone + 'a, - { - #![allow(clippy::wildcard_imports)] - #![allow(clippy::manual_is_variant_and)] - use ::re_types_core::{arrow_helpers::as_array_ref, Loggable as _, ResultExt as _}; - use arrow::{array::*, buffer::*, datatypes::*}; - Ok({ - let fields = Fields::from(vec![ - Field::new("size", DataType::UInt64, false), - Field::new("name", DataType::Utf8, true), - ]); - let (somes, data): (Vec<_>, Vec<_>) = data - .into_iter() - .map(|datum| { - let datum: Option<::std::borrow::Cow<'a, Self>> = datum.map(Into::into); - (datum.is_some(), datum) - }) - .unzip(); - let validity: Option = { - let any_nones = somes.iter().any(|some| !*some); - any_nones.then(|| somes.into()) - }; - as_array_ref(StructArray::new( - fields, - vec![ - { - let (somes, size): (Vec<_>, Vec<_>) = data - .iter() - .map(|datum| { - let datum = datum.as_ref().map(|datum| datum.size.clone()); - (datum.is_some(), datum) - }) - .unzip(); - let size_validity: Option = { - let any_nones = somes.iter().any(|some| !*some); - any_nones.then(|| somes.into()) - }; - as_array_ref(PrimitiveArray::::new( - ScalarBuffer::from( - size.into_iter() - .map(|v| v.unwrap_or_default()) - .collect::>(), - ), - size_validity, - )) - }, - { - let (somes, name): (Vec<_>, Vec<_>) = data - .iter() - .map(|datum| { - let datum = - datum.as_ref().map(|datum| datum.name.clone()).flatten(); - (datum.is_some(), datum) - }) - .unzip(); - let name_validity: Option = { - let any_nones = somes.iter().any(|some| !*some); - any_nones.then(|| somes.into()) - }; - { - let offsets = - arrow::buffer::OffsetBuffer::::from_lengths(name.iter().map( - |opt| opt.as_ref().map(|datum| datum.len()).unwrap_or_default(), - )); - let inner_data: arrow::buffer::Buffer = name - .into_iter() - .flatten() - .flat_map(|s| s.into_arrow2_buffer()) - .collect(); - #[allow(unsafe_code, clippy::undocumented_unsafe_blocks)] - as_array_ref(unsafe { - StringArray::new_unchecked(offsets, inner_data, name_validity) - }) - } - }, - ], - validity, - )) - }) - } - - fn from_arrow2_opt( - arrow_data: &dyn arrow2::array::Array, - ) -> DeserializationResult>> - where - Self: Sized, - { - #![allow(clippy::wildcard_imports)] - use ::re_types_core::{Loggable as _, ResultExt as _}; - use arrow::datatypes::*; - use arrow2::{array::*, buffer::*}; - Ok({ - let arrow_data = arrow_data - .as_any() - .downcast_ref::() - .ok_or_else(|| { - let expected = Self::arrow_datatype(); - let actual = arrow_data.data_type().clone(); - DeserializationError::datatype_mismatch(expected, actual) - }) - .with_context("rerun.datatypes.TensorDimension")?; - if arrow_data.is_empty() { - Vec::new() - } else { - let (arrow_data_fields, arrow_data_arrays) = - (arrow_data.fields(), arrow_data.values()); - let arrays_by_name: ::std::collections::HashMap<_, _> = arrow_data_fields - .iter() - .map(|field| field.name.as_str()) - .zip(arrow_data_arrays) - .collect(); - let size = { - if !arrays_by_name.contains_key("size") { - return Err(DeserializationError::missing_struct_field( - Self::arrow_datatype(), - "size", - )) - .with_context("rerun.datatypes.TensorDimension"); - } - let arrow_data = &**arrays_by_name["size"]; - arrow_data - .as_any() - .downcast_ref::() - .ok_or_else(|| { - let expected = DataType::UInt64; - let actual = arrow_data.data_type().clone(); - DeserializationError::datatype_mismatch(expected, actual) - }) - .with_context("rerun.datatypes.TensorDimension#size")? - .into_iter() - .map(|opt| opt.copied()) - }; - let name = { - if !arrays_by_name.contains_key("name") { - return Err(DeserializationError::missing_struct_field( - Self::arrow_datatype(), - "name", - )) - .with_context("rerun.datatypes.TensorDimension"); - } - let arrow_data = &**arrays_by_name["name"]; - { - let arrow_data = arrow_data - .as_any() - .downcast_ref::>() - .ok_or_else(|| { - let expected = DataType::Utf8; - let actual = arrow_data.data_type().clone(); - DeserializationError::datatype_mismatch(expected, actual) - }) - .with_context("rerun.datatypes.TensorDimension#name")?; - let arrow_data_buf = arrow_data.values(); - let offsets = arrow_data.offsets(); - arrow2::bitmap::utils::ZipValidity::new_with_validity( - offsets.windows(2), - arrow_data.validity(), - ) - .map(|elem| { - elem.map(|window| { - let start = window[0] as usize; - let end = window[1] as usize; - let len = end - start; - if arrow_data_buf.len() < end { - return Err(DeserializationError::offset_slice_oob( - (start, end), - arrow_data_buf.len(), - )); - } - - #[allow(unsafe_code, clippy::undocumented_unsafe_blocks)] - let data = - unsafe { arrow_data_buf.clone().sliced_unchecked(start, len) }; - Ok(data) - }) - .transpose() - }) - .map(|res_or_opt| { - res_or_opt.map(|res_or_opt| { - res_or_opt.map(|v| ::re_types_core::ArrowString::from(v)) - }) - }) - .collect::>>>() - .with_context("rerun.datatypes.TensorDimension#name")? - .into_iter() - } - }; - arrow2::bitmap::utils::ZipValidity::new_with_validity( - ::itertools::izip!(size, name), - arrow_data.validity(), - ) - .map(|opt| { - opt.map(|(size, name)| { - Ok(Self { - size: size - .ok_or_else(DeserializationError::missing_data) - .with_context("rerun.datatypes.TensorDimension#size")?, - name, - }) - }) - .transpose() - }) - .collect::>>() - .with_context("rerun.datatypes.TensorDimension")? - } - }) - } -} - -impl ::re_types_core::SizeBytes for TensorDimension { - #[inline] - fn heap_size_bytes(&self) -> u64 { - self.size.heap_size_bytes() + self.name.heap_size_bytes() - } - - #[inline] - fn is_pod() -> bool { - ::is_pod() && >::is_pod() - } -} diff --git a/crates/store/re_types/src/datatypes/tensor_dimension_ext.rs b/crates/store/re_types/src/datatypes/tensor_dimension_ext.rs deleted file mode 100644 index 8f50ab11ecd9..000000000000 --- a/crates/store/re_types/src/datatypes/tensor_dimension_ext.rs +++ /dev/null @@ -1,60 +0,0 @@ -use super::TensorDimension; - -impl TensorDimension { - const DEFAULT_NAME_WIDTH: &'static str = "width"; - const DEFAULT_NAME_HEIGHT: &'static str = "height"; - const DEFAULT_NAME_DEPTH: &'static str = "depth"; - - /// Create a new dimension with a given size, and the name "height". - #[inline] - pub fn height(size: u64) -> Self { - Self::named(size, String::from(Self::DEFAULT_NAME_HEIGHT)) - } - - /// Create a new dimension with a given size, and the name "width". - #[inline] - pub fn width(size: u64) -> Self { - Self::named(size, String::from(Self::DEFAULT_NAME_WIDTH)) - } - - /// Create a new dimension with a given size, and the name "depth". - #[inline] - pub fn depth(size: u64) -> Self { - Self::named(size, String::from(Self::DEFAULT_NAME_DEPTH)) - } - - /// Create a named dimension. - #[inline] - pub fn named(size: u64, name: impl Into) -> Self { - Self { - size, - name: Some(name.into()), - } - } - - /// Create an unnamed dimension. - #[inline] - pub fn unnamed(size: u64) -> Self { - Self { size, name: None } - } -} - -impl std::fmt::Debug for TensorDimension { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - if let Some(name) = &self.name { - write!(f, "{}={}", name, self.size) - } else { - self.size.fmt(f) - } - } -} - -impl std::fmt::Display for TensorDimension { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - if let Some(name) = &self.name { - write!(f, "{}={}", name, self.size) - } else { - self.size.fmt(f) - } - } -} diff --git a/crates/store/re_types/src/image.rs b/crates/store/re_types/src/image.rs index 75e13f6f1e16..1041f07ff38b 100644 --- a/crates/store/re_types/src/image.rs +++ b/crates/store/re_types/src/image.rs @@ -1,10 +1,11 @@ //! Image-related utilities. +use re_types_core::ArrowBuffer; use smallvec::{smallvec, SmallVec}; use crate::{ datatypes::ChannelDatatype, - datatypes::{Blob, TensorBuffer, TensorData, TensorDimension}, + datatypes::{Blob, TensorBuffer, TensorData}, }; // ---------------------------------------------------------------------------- @@ -95,7 +96,7 @@ where /// The tensor did not have the right shape for an image (e.g. had too many dimensions). #[error("Could not create Image from TensorData with shape {0:?}")] - BadImageShape(Vec), + BadImageShape(ArrowBuffer), /// Happens if you try to cast `NV12` or `YUY2` to a depth image or segmentation image. #[error("Chroma downsampling is not supported for this image type (e.g. DepthImage or SegmentationImage)")] @@ -181,7 +182,7 @@ impl ImageChannelType for f64 { /// /// For instance: `[1, 480, 640, 3, 1]` would return `[1, 2, 3]`, /// the indices of the `[480, 640, 3]` dimensions. -pub fn find_non_empty_dim_indices(shape: &[TensorDimension]) -> SmallVec<[usize; 4]> { +pub fn find_non_empty_dim_indices(shape: &[u64]) -> SmallVec<[usize; 4]> { match shape.len() { 0 => return smallvec![], 1 => return smallvec![0], @@ -197,7 +198,7 @@ pub fn find_non_empty_dim_indices(shape: &[TensorDimension]) -> SmallVec<[usize; shape .iter() .enumerate() - .filter_map(|(ind, dim)| if dim.size != 1 { Some(ind) } else { None }); + .filter_map(|(ind, &dim)| if dim != 1 { Some(ind) } else { None }); // 0 is always a valid index. let mut min = non_unit_indices.next().unwrap_or(0); @@ -216,7 +217,7 @@ pub fn find_non_empty_dim_indices(shape: &[TensorDimension]) -> SmallVec<[usize; // Grow up to 3 if the inner dimension is already 3 or 4 (Color Images) // Otherwise, only grow up to 2. // (1x1x3) -> 1x1x3 rgb rather than 1x3 mono - let target_len = match shape[max].size { + let target_len = match shape[max] { 3 | 4 => 3, _ => 2, }; @@ -231,14 +232,7 @@ pub fn find_non_empty_dim_indices(shape: &[TensorDimension]) -> SmallVec<[usize; #[test] fn test_find_non_empty_dim_indices() { fn expect(shape: &[u64], expected: &[usize]) { - let dim: Vec<_> = shape - .iter() - .map(|s| TensorDimension { - size: *s, - name: None, - }) - .collect(); - let got = find_non_empty_dim_indices(&dim); + let got = find_non_empty_dim_indices(shape); assert!( got.as_slice() == expected, "Input: {shape:?}, got {got:?}, expected {expected:?}" diff --git a/crates/store/re_types/tests/types/tensor.rs b/crates/store/re_types/tests/types/tensor.rs index 3474ae7b1d35..2fb8a132217a 100644 --- a/crates/store/re_types/tests/types/tensor.rs +++ b/crates/store/re_types/tests/types/tensor.rs @@ -2,7 +2,7 @@ use std::collections::HashMap; use re_types::{ archetypes::Tensor, - datatypes::{TensorBuffer, TensorData, TensorDimension}, + datatypes::{TensorBuffer, TensorData}, tensor_data::TensorCastError, Archetype as _, AsComponents as _, }; @@ -12,20 +12,7 @@ use crate::util; #[test] fn tensor_roundtrip() { let all_expected = [Tensor { - data: TensorData { - shape: vec![ - TensorDimension { - size: 2, - name: None, - }, - TensorDimension { - size: 3, - name: None, - }, - ], - buffer: TensorBuffer::U8(vec![1, 2, 3, 4, 5, 6].into()), - } - .into(), + data: TensorData::new(vec![2, 3], TensorBuffer::U8(vec![1, 2, 3, 4, 5, 6].into())).into(), value_range: None, }]; @@ -60,14 +47,7 @@ fn tensor_roundtrip() { #[test] fn convert_tensor_to_ndarray_u8() { - let t = TensorData::new( - vec![ - TensorDimension::unnamed(3), - TensorDimension::unnamed(4), - TensorDimension::unnamed(5), - ], - TensorBuffer::U8(vec![0; 60].into()), - ); + let t = TensorData::new(vec![3, 4, 5], TensorBuffer::U8(vec![0; 60].into())); let n = ndarray::ArrayViewD::::try_from(&t).unwrap(); @@ -76,14 +56,7 @@ fn convert_tensor_to_ndarray_u8() { #[test] fn convert_tensor_to_ndarray_u16() { - let t = TensorData::new( - vec![ - TensorDimension::unnamed(3), - TensorDimension::unnamed(4), - TensorDimension::unnamed(5), - ], - TensorBuffer::U16(vec![0_u16; 60].into()), - ); + let t = TensorData::new(vec![3, 4, 5], TensorBuffer::U16(vec![0_u16; 60].into())); let n = ndarray::ArrayViewD::::try_from(&t).unwrap(); @@ -92,14 +65,7 @@ fn convert_tensor_to_ndarray_u16() { #[test] fn convert_tensor_to_ndarray_f32() { - let t = TensorData::new( - vec![ - TensorDimension::unnamed(3), - TensorDimension::unnamed(4), - TensorDimension::unnamed(5), - ], - TensorBuffer::F32(vec![0_f32; 60].into()), - ); + let t = TensorData::new(vec![3, 4, 5], TensorBuffer::F32(vec![0_f32; 60].into())); let n = ndarray::ArrayViewD::::try_from(&t).unwrap(); @@ -111,10 +77,7 @@ fn convert_ndarray_f64_to_tensor() { let n = ndarray::array![[1., 2., 3.], [4., 5., 6.]]; let t = TensorData::try_from(n).unwrap(); - assert_eq!( - t.shape(), - &[TensorDimension::unnamed(2), TensorDimension::unnamed(3)] - ); + assert_eq!(t.shape(), &[2, 3]); } #[test] @@ -123,7 +86,7 @@ fn convert_ndarray_slice_to_tensor() { let n = &n.slice(ndarray::s![.., 1]); let t = TensorData::try_from(*n).unwrap(); - assert_eq!(t.shape(), &[TensorDimension::unnamed(2)]); + assert_eq!(t.shape(), &[2]); } #[test] @@ -220,11 +183,7 @@ fn convert_ndarray_to_tensor_both_layouts_nonzero_offset() { #[test] fn check_slices() { let t = TensorData::new( - vec![ - TensorDimension::unnamed(3), - TensorDimension::unnamed(4), - TensorDimension::unnamed(5), - ], + vec![3, 4, 5], TensorBuffer::U16((0_u16..60).collect::>().into()), ); @@ -258,14 +217,7 @@ fn check_slices() { #[test] fn check_tensor_shape_error() { - let t = TensorData::new( - vec![ - TensorDimension::unnamed(3), - TensorDimension::unnamed(4), - TensorDimension::unnamed(5), - ], - TensorBuffer::U8(vec![0; 59].into()), - ); + let t = TensorData::new(vec![3, 4, 5], TensorBuffer::U8(vec![0; 59].into())); let n = ndarray::ArrayViewD::::try_from(&t); @@ -279,14 +231,7 @@ fn check_tensor_shape_error() { #[test] fn check_tensor_type_error() { - let t = TensorData::new( - vec![ - TensorDimension::unnamed(3), - TensorDimension::unnamed(4), - TensorDimension::unnamed(5), - ], - TensorBuffer::U16(vec![0; 60].into()), - ); + let t = TensorData::new(vec![3, 4, 5], TensorBuffer::U16(vec![0; 60].into())); let n = ndarray::ArrayViewD::::try_from(&t); diff --git a/crates/store/re_types_core/src/arrow_buffer.rs b/crates/store/re_types_core/src/arrow_buffer.rs index a8ed54e38d8d..a07e47ca2e88 100644 --- a/crates/store/re_types_core/src/arrow_buffer.rs +++ b/crates/store/re_types_core/src/arrow_buffer.rs @@ -133,6 +133,16 @@ impl FromIterator for ArrowBuffer { } } +impl<'a, T: ArrowNativeType> IntoIterator for &'a ArrowBuffer { + type Item = &'a T; + type IntoIter = std::slice::Iter<'a, T>; + + #[inline] + fn into_iter(self) -> Self::IntoIter { + self.as_slice().iter() + } +} + impl std::ops::Deref for ArrowBuffer { type Target = [T]; diff --git a/crates/top/rerun/src/commands/rrd/compare.rs b/crates/top/rerun/src/commands/rrd/compare.rs index 3472eacd8f90..c9c7c4778e4d 100644 --- a/crates/top/rerun/src/commands/rrd/compare.rs +++ b/crates/top/rerun/src/commands/rrd/compare.rs @@ -30,6 +30,8 @@ impl CompareCommand { full_dump, } = self; + re_log::debug!("Comparing {path_to_rrd1:?} to {path_to_rrd2:?}…"); + let path_to_rrd1 = PathBuf::from(path_to_rrd1); let path_to_rrd2 = PathBuf::from(path_to_rrd2); @@ -75,6 +77,8 @@ impl CompareCommand { ); } + re_log::debug!("{path_to_rrd1:?} and {path_to_rrd2:?} are similar enough."); + Ok(()) } } diff --git a/crates/top/rerun/src/lib.rs b/crates/top/rerun/src/lib.rs index 486e96a4ae7a..839360a43188 100644 --- a/crates/top/rerun/src/lib.rs +++ b/crates/top/rerun/src/lib.rs @@ -168,10 +168,10 @@ pub const EXTERNAL_DATA_LOADER_INCOMPATIBLE_EXIT_CODE: i32 = 66; pub mod external { pub use anyhow; - pub use re_build_info; - pub use re_entity_db; - pub use re_entity_db::external::*; - pub use re_format; + pub use ::re_build_info; + pub use ::re_entity_db; + pub use ::re_entity_db::external::*; + pub use ::re_format; #[cfg(feature = "run")] pub use re_data_source; diff --git a/crates/top/rerun/src/sdk.rs b/crates/top/rerun/src/sdk.rs index 4d8c70a28931..3eab417ce267 100644 --- a/crates/top/rerun/src/sdk.rs +++ b/crates/top/rerun/src/sdk.rs @@ -31,7 +31,7 @@ mod prelude { pub use re_types::datatypes::{ Angle, AnnotationInfo, ChannelDatatype, ClassDescription, ColorModel, Float32, KeypointPair, Mat3x3, PixelFormat, Quaternion, Rgba32, RotationAxisAngle, TensorBuffer, - TensorData, TensorDimension, Vec2D, Vec3D, Vec4D, + TensorData, Vec2D, Vec3D, Vec4D, }; } pub use prelude::*; diff --git a/crates/viewer/re_data_ui/src/tensor.rs b/crates/viewer/re_data_ui/src/tensor.rs index afe680f47228..badbb7f541b9 100644 --- a/crates/viewer/re_data_ui/src/tensor.rs +++ b/crates/viewer/re_data_ui/src/tensor.rs @@ -2,33 +2,38 @@ use itertools::Itertools as _; use re_chunk_store::RowId; use re_log_types::EntityPath; -use re_types::datatypes::{TensorData, TensorDimension}; +use re_types::datatypes::TensorData; use re_ui::UiExt as _; use re_viewer_context::{TensorStats, TensorStatsCache, UiLayout, ViewerContext}; use super::EntityDataUi; -pub fn format_tensor_shape_single_line(shape: &[TensorDimension]) -> String { +fn format_tensor_shape_single_line(tensor: &TensorData) -> String { const MAX_SHOWN: usize = 4; // should be enough for width/height/depth and then some! - let iter = shape.iter().take(MAX_SHOWN); - let labelled = iter.clone().any(|dim| dim.name.is_some()); - let shapes = iter - .map(|dim| { + let short_shape = &tensor.shape[0..tensor.shape.len().min(MAX_SHOWN)]; + let has_names = short_shape + .iter() + .enumerate() + .any(|(dim_idx, _)| tensor.dim_name(dim_idx).is_some()); + let shapes = short_shape + .iter() + .enumerate() + .map(|(dim_idx, dim_len)| { format!( "{}{}", - dim.size, - if let Some(name) = &dim.name { + dim_len, + if let Some(name) = tensor.dim_name(dim_idx) { format!(" ({name})") } else { String::new() } ) }) - .join(if labelled { " × " } else { "×" }); + .join(if has_names { " × " } else { "×" }); format!( "{shapes}{}", - if shape.len() > MAX_SHOWN { - if labelled { + if MAX_SHOWN < tensor.shape.len() { + if has_names { " × …" } else { "×…" @@ -75,7 +80,7 @@ pub fn tensor_ui( let text = format!( "{}, {}", tensor.dtype(), - format_tensor_shape_single_line(&tensor.shape) + format_tensor_shape_single_line(tensor) ); ui_layout.label(ui, text).on_hover_ui(|ui| { tensor_summary_ui(ui, tensor, &tensor_stats); @@ -94,7 +99,7 @@ pub fn tensor_summary_ui_grid_contents( tensor: &TensorData, tensor_stats: &TensorStats, ) { - let TensorData { shape, buffer: _ } = tensor; + let TensorData { shape, names, .. } = tensor; ui.grid_left_hand_label("Data type") .on_hover_text("Data type used for all individual elements within the tensor"); @@ -107,12 +112,12 @@ pub fn tensor_summary_ui_grid_contents( // For unnamed tensor dimension more than a single line usually doesn't make sense! // But what if some are named and some are not? // -> If more than 1 is named, make it a column! - if shape.iter().filter(|d| d.name.is_some()).count() > 1 { - for dim in shape { - ui.label(dim.to_string()); + if let Some(names) = names { + for (name, size) in itertools::izip!(names, shape) { + ui.label(format!("{name}={size}")); } } else { - ui.label(format_tensor_shape_single_line(shape)); + ui.label(format_tensor_shape_single_line(tensor)); } }); ui.end_row(); diff --git a/crates/viewer/re_view_tensor/src/dimension_mapping.rs b/crates/viewer/re_view_tensor/src/dimension_mapping.rs index f3804733b1d8..43ccab9a812b 100644 --- a/crates/viewer/re_view_tensor/src/dimension_mapping.rs +++ b/crates/viewer/re_view_tensor/src/dimension_mapping.rs @@ -1,11 +1,14 @@ use egui::NumExt as _; + use re_types::{ blueprint::{archetypes::TensorSliceSelection, components::TensorDimensionIndexSlider}, components::{TensorDimensionIndexSelection, TensorHeightDimension, TensorWidthDimension}, - datatypes::{TensorDimension, TensorDimensionSelection}, + datatypes::TensorDimensionSelection, }; use re_viewport_blueprint::ViewProperty; +use crate::TensorDimension; + /// Loads slice selection from blueprint and makes modifications (without writing back) such that it is valid /// for the given tensor shape. /// @@ -250,7 +253,7 @@ fn longest_and_second_longest_dim_indices(shape: &[TensorDimension]) -> (usize, #[cfg(test)] mod tests { - use re_types::datatypes::TensorDimension; + use crate::TensorDimension; use re_types::{ blueprint::components::TensorDimensionIndexSlider, components::TensorDimensionIndexSelection, diff --git a/crates/viewer/re_view_tensor/src/lib.rs b/crates/viewer/re_view_tensor/src/lib.rs index 2dda9636b659..19492b29bf34 100644 --- a/crates/viewer/re_view_tensor/src/lib.rs +++ b/crates/viewer/re_view_tensor/src/lib.rs @@ -1,6 +1,6 @@ -//! Rerun tensor View. +//! Rerun tensor view. //! -//! A View dedicated to visualizing tensors with arbitrary dimensionality. +//! A view dedicated to visualizing tensors with arbitrary dimensionality. // TODO(#6330): remove unwrap() #![allow(clippy::unwrap_used)] @@ -12,3 +12,34 @@ mod view_class; mod visualizer_system; pub use view_class::TensorView; + +/// Information about a dimension of a tensor. +struct TensorDimension { + pub size: u64, + pub name: Option, +} + +impl TensorDimension { + pub fn from_tensor_data(tensor_data: &re_types::datatypes::TensorData) -> Vec { + tensor_data + .shape + .iter() + .enumerate() + .map(|(dim_idx, dim_len)| Self { + size: *dim_len, + name: tensor_data.dim_name(dim_idx).cloned(), + }) + .collect() + } + + pub fn unnamed(size: u64) -> Self { + Self { size, name: None } + } + + pub fn named(size: u64, name: impl Into) -> Self { + Self { + size, + name: Some(name.into()), + } + } +} diff --git a/crates/viewer/re_view_tensor/src/tensor_dimension_mapper.rs b/crates/viewer/re_view_tensor/src/tensor_dimension_mapper.rs index ac224320bc12..07e6bed75cb1 100644 --- a/crates/viewer/re_view_tensor/src/tensor_dimension_mapper.rs +++ b/crates/viewer/re_view_tensor/src/tensor_dimension_mapper.rs @@ -1,11 +1,12 @@ use re_types::{ - blueprint::archetypes::TensorSliceSelection, - datatypes::{TensorDimension, TensorDimensionIndexSelection}, + blueprint::archetypes::TensorSliceSelection, datatypes::TensorDimensionIndexSelection, }; use re_ui::UiExt as _; use re_viewer_context::ViewerContext; use re_viewport_blueprint::ViewProperty; +use crate::TensorDimension; + #[derive(Clone, Copy, PartialEq, Eq)] enum DragDropAddress { None, diff --git a/crates/viewer/re_view_tensor/src/tensor_tests.rs b/crates/viewer/re_view_tensor/src/tensor_tests.rs index e8e46712330e..0da7b00c5394 100644 --- a/crates/viewer/re_view_tensor/src/tensor_tests.rs +++ b/crates/viewer/re_view_tensor/src/tensor_tests.rs @@ -1,6 +1,6 @@ #![allow(clippy::unwrap_used)] -use re_types::datatypes::{TensorBuffer, TensorData, TensorDimension}; +use re_types::datatypes::{TensorBuffer, TensorData}; use re_types::tensor_data::TensorCastError; #[test] diff --git a/crates/viewer/re_view_tensor/src/view_class.rs b/crates/viewer/re_view_tensor/src/view_class.rs index 191c327de4fe..0e8165f0101a 100644 --- a/crates/viewer/re_view_tensor/src/view_class.rs +++ b/crates/viewer/re_view_tensor/src/view_class.rs @@ -1,6 +1,5 @@ use egui::{epaint::TextShape, Align2, NumExt as _, Vec2}; use ndarray::Axis; -use re_view::{suggest_view_for_each_entity, view_property_ui}; use re_data_ui::tensor_summary_ui_grid_contents; use re_log_types::EntityPath; @@ -10,10 +9,11 @@ use re_types::{ components::ViewFit, }, components::{Colormap, GammaCorrection, MagnificationFilter, TensorDimensionIndexSelection}, - datatypes::{TensorData, TensorDimension}, + datatypes::TensorData, View, ViewClassIdentifier, }; use re_ui::{list_item, UiExt as _}; +use re_view::{suggest_view_for_each_entity, view_property_ui}; use re_viewer_context::{ gpu_bridge, ApplicableEntities, ColormapWithRange, IdentifiedViewSystem as _, IndicatedEntities, PerVisualizer, TensorStatsCache, TypedComponentFallbackProvider, ViewClass, @@ -26,6 +26,7 @@ use crate::{ dimension_mapping::load_tensor_slice_selection_and_make_valid, tensor_dimension_mapper::dimension_mapping_ui, visualizer_system::{TensorSystem, TensorVisualization}, + TensorDimension, }; #[derive(Default)] @@ -151,12 +152,20 @@ Note: select the view to configure which dimensions are shown." ctx.blueprint_query, view_id, ); - let slice_selection = - load_tensor_slice_selection_and_make_valid(&slice_property, tensor.shape())?; + let slice_selection = load_tensor_slice_selection_and_make_valid( + &slice_property, + &TensorDimension::from_tensor_data(tensor), + )?; ui.separator(); ui.strong("Dimension Mapping"); - dimension_mapping_ui(ctx, ui, tensor.shape(), &slice_selection, &slice_property); + dimension_mapping_ui( + ctx, + ui, + &TensorDimension::from_tensor_data(tensor), + &slice_selection, + &slice_property, + ); // TODO(andreas): this is a bit too inconsistent with the other UIs - we don't offer the same reset/option buttons here if ui @@ -242,8 +251,10 @@ impl TensorView { ctx.blueprint_query, view_id, ); - let slice_selection = - load_tensor_slice_selection_and_make_valid(&slice_property, tensor.shape())?; + let slice_selection = load_tensor_slice_selection_and_make_valid( + &slice_property, + &TensorDimension::from_tensor_data(tensor), + )?; let default_item_spacing = ui.spacing_mut().item_spacing; ui.spacing_mut().item_spacing.y = 0.0; // No extra spacing between sliders and tensor @@ -259,17 +270,26 @@ impl TensorView { } .show(ui, |ui| { ui.spacing_mut().item_spacing = default_item_spacing; // keep the default spacing between sliders - selectors_ui(ctx, ui, tensor.shape(), &slice_selection, &slice_property); + selectors_ui( + ctx, + ui, + &TensorDimension::from_tensor_data(tensor), + &slice_selection, + &slice_property, + ); }); } let dimension_labels = [ - slice_selection - .width - .map(|width| (dimension_name(&tensor.shape, width.dimension), width.invert)), + slice_selection.width.map(|width| { + ( + dimension_name(&TensorDimension::from_tensor_data(tensor), width.dimension), + width.invert, + ) + }), slice_selection.height.map(|height| { ( - dimension_name(&tensor.shape, height.dimension), + dimension_name(&TensorDimension::from_tensor_data(tensor), height.dimension), height.invert, ) }), diff --git a/docs/content/reference/migration/migration-0-21.md b/docs/content/reference/migration/migration-0-21.md index 15c149d369f1..84a720a41338 100644 --- a/docs/content/reference/migration/migration-0-21.md +++ b/docs/content/reference/migration/migration-0-21.md @@ -3,6 +3,9 @@ title: Migrating from 0.20 to 0.21 order: 989 --- +### File compatibility +We've changed how tensors are encoded in .rrd files, so tensors will no longer load from older .rrd files ([#8376](https://github.com/rerun-io/rerun/pull/8376)). + ### Near clip plane for `Spatial2D` views now defaults to `0.1` in 3D scene units. Previously, the clip plane was set an arbitrary value that worked reasonably for diff --git a/docs/content/reference/types/components/tensor_data.md b/docs/content/reference/types/components/tensor_data.md index 212c6f45a755..3de2724c3242 100644 --- a/docs/content/reference/types/components/tensor_data.md +++ b/docs/content/reference/types/components/tensor_data.md @@ -19,10 +19,8 @@ which stores a contiguous array of typed values. ## Arrow datatype ``` Struct { - shape: List + shape: List + names: nullable List buffer: DenseUnion { 0 = "_null_markers": nullable null 1 = "U8": List diff --git a/docs/content/reference/types/datatypes.md b/docs/content/reference/types/datatypes.md index 6902ee39e313..b12bd42d39db 100644 --- a/docs/content/reference/types/datatypes.md +++ b/docs/content/reference/types/datatypes.md @@ -34,7 +34,6 @@ Data types are the lowest layer of the data model hierarchy. They are re-usable * [`RotationAxisAngle`](datatypes/rotation_axis_angle.md): 3D rotation represented by a rotation around a given axis. * [`TensorBuffer`](datatypes/tensor_buffer.md): The underlying storage for [`archetypes.Tensor`](https://rerun.io/docs/reference/types/archetypes/tensor). * [`TensorData`](datatypes/tensor_data.md): An N-dimensional array of numbers. -* [`TensorDimension`](datatypes/tensor_dimension.md): A single dimension within a multi-dimensional tensor. * [`TensorDimensionIndexSelection`](datatypes/tensor_dimension_index_selection.md): Indexing a specific tensor dimension. * [`TensorDimensionSelection`](datatypes/tensor_dimension_selection.md): Selection of a single tensor dimension. * [`TimeInt`](datatypes/time_int.md): A 64-bit number describing either nanoseconds OR sequence numbers. diff --git a/docs/content/reference/types/datatypes/.gitattributes b/docs/content/reference/types/datatypes/.gitattributes index 3454eebe12d8..771fe1303bcf 100644 --- a/docs/content/reference/types/datatypes/.gitattributes +++ b/docs/content/reference/types/datatypes/.gitattributes @@ -28,7 +28,6 @@ rgba32.md linguist-generated=true rotation_axis_angle.md linguist-generated=true tensor_buffer.md linguist-generated=true tensor_data.md linguist-generated=true -tensor_dimension.md linguist-generated=true tensor_dimension_index_selection.md linguist-generated=true tensor_dimension_selection.md linguist-generated=true time_int.md linguist-generated=true diff --git a/docs/content/reference/types/datatypes/tensor_data.md b/docs/content/reference/types/datatypes/tensor_data.md index d8af8ecbb674..abc09ba17823 100644 --- a/docs/content/reference/types/datatypes/tensor_data.md +++ b/docs/content/reference/types/datatypes/tensor_data.md @@ -14,9 +14,20 @@ which stores a contiguous array of typed values. ## Fields #### `shape` -Type: List of [`TensorDimension`](../datatypes/tensor_dimension.md) +Type: List of `uint64` -The shape of the tensor, including optional names for each dimension. +The shape of the tensor, i.e. the length of each dimension. + +#### `names` +Type: nullable List of `utf8` + +The names of the dimensions of the tensor (optional). + +If set, should be the same length as [`datatypes.TensorData#shape`](https://rerun.io/docs/reference/types/datatypes/tensor_data). +If it has a different length your names may show up improperly, +and some constructors may produce a warning or even an error. + +Example: `["height", "width", "channel", "batch"]`. #### `buffer` Type: [`TensorBuffer`](../datatypes/tensor_buffer.md) @@ -27,10 +38,8 @@ The content/data. ## Arrow datatype ``` Struct { - shape: List + shape: List + names: nullable List buffer: DenseUnion { 0 = "_null_markers": nullable null 1 = "U8": List diff --git a/docs/content/reference/types/datatypes/tensor_dimension.md b/docs/content/reference/types/datatypes/tensor_dimension.md deleted file mode 100644 index 71d1446ed11e..000000000000 --- a/docs/content/reference/types/datatypes/tensor_dimension.md +++ /dev/null @@ -1,36 +0,0 @@ ---- -title: "TensorDimension" ---- - - -A single dimension within a multi-dimensional tensor. - -## Fields -#### `size` -Type: `uint64` - -The length of this dimension. - -#### `name` -Type: nullable `utf8` - -The name of this dimension, e.g. "width", "height", "channel", "batch', …. - - -## Arrow datatype -``` -Struct { - size: uint64 - name: nullable utf8 -} -``` - -## API reference links - * 🌊 [C++ API docs for `TensorDimension`](https://ref.rerun.io/docs/cpp/stable/structrerun_1_1datatypes_1_1TensorDimension.html) - * 🐍 [Python API docs for `TensorDimension`](https://ref.rerun.io/docs/python/stable/common/datatypes#rerun.datatypes.TensorDimension) - * 🦀 [Rust API docs for `TensorDimension`](https://docs.rs/rerun/latest/rerun/datatypes/struct.TensorDimension.html) - - -## Used by - -* [`TensorData`](../datatypes/tensor_data.md) diff --git a/docs/snippets/README.md b/docs/snippets/README.md index a221615ae85a..21d51dcb2c10 100644 --- a/docs/snippets/README.md +++ b/docs/snippets/README.md @@ -11,7 +11,7 @@ You can run each example individually using the following: - **C++**: - `pixi run -e cpp cpp-build-snippets` to compile all examples - `./build/debug/docs/snippets/all/` to run, e.g. `./build/debug/docs/snippets/all/point3d_random` -- **Python**: `pixi run -e py python .py`, e.g. `pixi run -e py python point3d_random.py`. +- **Python**: `pixi run py-build && pixi run -e py python .py`, e.g. `pixi run -e py python point3d_random.py`. - **Rust**: `cargo run -p snippets -- [args]`, e.g. `cargo run -p snippets -- point3d_random`. ## Comparison test @@ -23,8 +23,8 @@ These tests check that A) all of our SDKs yield the exact same data when used th ### Usage -To run the comparison tests, check out `./docs/snippets/compare_snippet_output.py --help`. -`./docs/snippets/compare_snippet_output.py` is a valid invocation that will build all 3 SDKs and run all tests for all of them. +To run the comparison tests, check out `pixi run -e py docs/snippets/compare_snippet_output.py --help`. +`pixi run -e py docs/snippets/compare_snippet_output.py` is a valid invocation that will build all 3 SDKs and run all tests for all of them. ### Implementing new tests diff --git a/rerun_cpp/src/rerun.hpp b/rerun_cpp/src/rerun.hpp index 8f4546fc4c01..4b2d4933db05 100644 --- a/rerun_cpp/src/rerun.hpp +++ b/rerun_cpp/src/rerun.hpp @@ -66,7 +66,6 @@ namespace rerun { using datatypes::RotationAxisAngle; using datatypes::TensorBuffer; using datatypes::TensorData; - using datatypes::TensorDimension; using datatypes::Vec2D; using datatypes::Vec3D; using datatypes::Vec4D; diff --git a/rerun_cpp/src/rerun/archetypes/tensor.hpp b/rerun_cpp/src/rerun/archetypes/tensor.hpp index 881c6f34bb60..8f2b553ab2c9 100644 --- a/rerun_cpp/src/rerun/archetypes/tensor.hpp +++ b/rerun_cpp/src/rerun/archetypes/tensor.hpp @@ -77,7 +77,7 @@ namespace rerun::archetypes { public: // START of extensions from tensor_ext.cpp: /// New Tensor from dimensions and tensor buffer. - Tensor(Collection shape, datatypes::TensorBuffer buffer) + Tensor(Collection shape, datatypes::TensorBuffer buffer) : Tensor(datatypes::TensorData(std::move(shape), std::move(buffer))) {} /// New tensor from dimensions and pointer to tensor data. @@ -88,7 +88,7 @@ namespace rerun::archetypes { /// \param data_ /// Target of the pointer must outlive the archetype. template - explicit Tensor(Collection shape, const TElement* data_) + explicit Tensor(Collection shape, const TElement* data_) : Tensor(datatypes::TensorData(std::move(shape), data_)) {} /// Update the `names` of the contained `TensorData` dimensions. diff --git a/rerun_cpp/src/rerun/archetypes/tensor_ext.cpp b/rerun_cpp/src/rerun/archetypes/tensor_ext.cpp index 850416c0bed9..d684252c1822 100644 --- a/rerun_cpp/src/rerun/archetypes/tensor_ext.cpp +++ b/rerun_cpp/src/rerun/archetypes/tensor_ext.cpp @@ -13,7 +13,7 @@ namespace rerun::archetypes { // /// New Tensor from dimensions and tensor buffer. - Tensor(Collection shape, datatypes::TensorBuffer buffer) + Tensor(Collection shape, datatypes::TensorBuffer buffer) : Tensor(datatypes::TensorData(std::move(shape), std::move(buffer))) {} /// New tensor from dimensions and pointer to tensor data. @@ -24,7 +24,7 @@ namespace rerun::archetypes { /// \param data_ /// Target of the pointer must outlive the archetype. template - explicit Tensor(Collection shape, const TElement* data_) + explicit Tensor(Collection shape, const TElement* data_) : Tensor(datatypes::TensorData(std::move(shape), data_)) {} /// Update the `names` of the contained `TensorData` dimensions. @@ -51,11 +51,7 @@ namespace rerun::archetypes { .handle(); } - auto new_shape = shape.to_vector(); - for (size_t i = 0; i < std::min(shape.size(), names.size()); ++i) { - new_shape[i].name = std::move(names[i]); - } - shape = std::move(new_shape); + this->data.data.names = std::move(names); return std::move(*this); } diff --git a/rerun_cpp/src/rerun/components/tensor_data.hpp b/rerun_cpp/src/rerun/components/tensor_data.hpp index d85bb7960091..a7ececf996fd 100644 --- a/rerun_cpp/src/rerun/components/tensor_data.hpp +++ b/rerun_cpp/src/rerun/components/tensor_data.hpp @@ -28,10 +28,7 @@ namespace rerun::components { /// /// \param shape Shape of the tensor. /// \param buffer The tensor buffer containing the tensor's data. - TensorData( - rerun::Collection shape, - rerun::datatypes::TensorBuffer buffer - ) + TensorData(rerun::Collection shape, rerun::datatypes::TensorBuffer buffer) : data(rerun::datatypes::TensorData(std::move(shape), std::move(buffer))) {} /// New tensor data from dimensions and pointer to tensor data. @@ -40,7 +37,7 @@ namespace rerun::components { /// \param shape Shape of the tensor. Determines the number of elements expected to be in `data_`. /// \param data_ Target of the pointer must outlive the archetype. template - explicit TensorData(Collection shape, const TElement* data_) + explicit TensorData(Collection shape, const TElement* data_) : data(rerun::datatypes::TensorData(std::move(shape), data_)) {} // END of extensions from tensor_data_ext.cpp, start of generated code: diff --git a/rerun_cpp/src/rerun/components/tensor_data_ext.cpp b/rerun_cpp/src/rerun/components/tensor_data_ext.cpp index 3da2dde1f4cf..3d797a51f227 100644 --- a/rerun_cpp/src/rerun/components/tensor_data_ext.cpp +++ b/rerun_cpp/src/rerun/components/tensor_data_ext.cpp @@ -11,7 +11,7 @@ namespace rerun::components { /// \param shape Shape of the tensor. /// \param buffer The tensor buffer containing the tensor's data. TensorData( - rerun::Collection shape, + rerun::Collection shape, rerun::datatypes::TensorBuffer buffer ) : data(rerun::datatypes::TensorData(std::move(shape), std::move(buffer))) {} @@ -22,7 +22,7 @@ namespace rerun::components { /// \param shape Shape of the tensor. Determines the number of elements expected to be in `data_`. /// \param data_ Target of the pointer must outlive the archetype. template - explicit TensorData(Collection shape, const TElement* data_) + explicit TensorData(Collection shape, const TElement* data_) : data(rerun::datatypes::TensorData(std::move(shape), data_)) {} // diff --git a/rerun_cpp/src/rerun/datatypes.hpp b/rerun_cpp/src/rerun/datatypes.hpp index 0d41f3d62a23..30807019b8a6 100644 --- a/rerun_cpp/src/rerun/datatypes.hpp +++ b/rerun_cpp/src/rerun/datatypes.hpp @@ -29,7 +29,6 @@ #include "datatypes/rotation_axis_angle.hpp" #include "datatypes/tensor_buffer.hpp" #include "datatypes/tensor_data.hpp" -#include "datatypes/tensor_dimension.hpp" #include "datatypes/tensor_dimension_index_selection.hpp" #include "datatypes/tensor_dimension_selection.hpp" #include "datatypes/time_int.hpp" diff --git a/rerun_cpp/src/rerun/datatypes/.gitattributes b/rerun_cpp/src/rerun/datatypes/.gitattributes index 46c43435cc8b..a571592c1e65 100644 --- a/rerun_cpp/src/rerun/datatypes/.gitattributes +++ b/rerun_cpp/src/rerun/datatypes/.gitattributes @@ -55,8 +55,6 @@ tensor_buffer.cpp linguist-generated=true tensor_buffer.hpp linguist-generated=true tensor_data.cpp linguist-generated=true tensor_data.hpp linguist-generated=true -tensor_dimension.cpp linguist-generated=true -tensor_dimension.hpp linguist-generated=true tensor_dimension_index_selection.cpp linguist-generated=true tensor_dimension_index_selection.hpp linguist-generated=true tensor_dimension_selection.cpp linguist-generated=true diff --git a/rerun_cpp/src/rerun/datatypes/tensor_data.cpp b/rerun_cpp/src/rerun/datatypes/tensor_data.cpp index dfaa17f489c0..0a53b1bec03d 100644 --- a/rerun_cpp/src/rerun/datatypes/tensor_data.cpp +++ b/rerun_cpp/src/rerun/datatypes/tensor_data.cpp @@ -4,7 +4,6 @@ #include "tensor_data.hpp" #include "tensor_buffer.hpp" -#include "tensor_dimension.hpp" #include #include @@ -14,15 +13,8 @@ namespace rerun::datatypes {} namespace rerun { const std::shared_ptr& Loggable::arrow_datatype() { static const auto datatype = arrow::struct_({ - arrow::field( - "shape", - arrow::list(arrow::field( - "item", - Loggable::arrow_datatype(), - false - )), - false - ), + arrow::field("shape", arrow::list(arrow::field("item", arrow::uint64(), false)), false), + arrow::field("names", arrow::list(arrow::field("item", arrow::utf8(), false)), true), arrow::field( "buffer", Loggable::arrow_datatype(), @@ -67,26 +59,41 @@ namespace rerun { { auto field_builder = static_cast(builder->field_builder(0)); - auto value_builder = static_cast(field_builder->value_builder()); + auto value_builder = static_cast(field_builder->value_builder()); ARROW_RETURN_NOT_OK(field_builder->Reserve(static_cast(num_elements))); ARROW_RETURN_NOT_OK(value_builder->Reserve(static_cast(num_elements * 2))); for (size_t elem_idx = 0; elem_idx < num_elements; elem_idx += 1) { const auto& element = elements[elem_idx]; ARROW_RETURN_NOT_OK(field_builder->Append()); - if (element.shape.data()) { - RR_RETURN_NOT_OK( - Loggable::fill_arrow_array_builder( - value_builder, - element.shape.data(), - element.shape.size() - ) - ); + ARROW_RETURN_NOT_OK(value_builder->AppendValues( + element.shape.data(), + static_cast(element.shape.size()), + nullptr + )); + } + } + { + auto field_builder = static_cast(builder->field_builder(1)); + auto value_builder = static_cast(field_builder->value_builder()); + ARROW_RETURN_NOT_OK(field_builder->Reserve(static_cast(num_elements))); + ARROW_RETURN_NOT_OK(value_builder->Reserve(static_cast(num_elements * 1))); + + for (size_t elem_idx = 0; elem_idx < num_elements; elem_idx += 1) { + const auto& element = elements[elem_idx]; + if (element.names.has_value()) { + ARROW_RETURN_NOT_OK(field_builder->Append()); + for (size_t item_idx = 0; item_idx < element.names.value().size(); + item_idx += 1) { + ARROW_RETURN_NOT_OK(value_builder->Append(element.names.value()[item_idx])); + } + } else { + ARROW_RETURN_NOT_OK(field_builder->AppendNull()); } } } { - auto field_builder = static_cast(builder->field_builder(1)); + auto field_builder = static_cast(builder->field_builder(2)); ARROW_RETURN_NOT_OK(field_builder->Reserve(static_cast(num_elements))); for (size_t elem_idx = 0; elem_idx < num_elements; elem_idx += 1) { RR_RETURN_NOT_OK(Loggable::fill_arrow_array_builder( diff --git a/rerun_cpp/src/rerun/datatypes/tensor_data.hpp b/rerun_cpp/src/rerun/datatypes/tensor_data.hpp index b09aae93f06b..5822b81be239 100644 --- a/rerun_cpp/src/rerun/datatypes/tensor_data.hpp +++ b/rerun_cpp/src/rerun/datatypes/tensor_data.hpp @@ -7,10 +7,11 @@ #include "../component_descriptor.hpp" #include "../result.hpp" #include "tensor_buffer.hpp" -#include "tensor_dimension.hpp" #include #include +#include +#include namespace arrow { class Array; @@ -28,8 +29,17 @@ namespace rerun::datatypes { /// These dimensions are combined with an index to look up values from the `buffer` field, /// which stores a contiguous array of typed values. struct TensorData { - /// The shape of the tensor, including optional names for each dimension. - rerun::Collection shape; + /// The shape of the tensor, i.e. the length of each dimension. + rerun::Collection shape; + + /// The names of the dimensions of the tensor (optional). + /// + /// If set, should be the same length as `datatypes::TensorData::shape`. + /// If it has a different length your names may show up improperly, + /// and some constructors may produce a warning or even an error. + /// + /// Example: `["height", "width", "channel", "batch"]`. + std::optional> names; /// The content/data. rerun::datatypes::TensorBuffer buffer; @@ -39,9 +49,7 @@ namespace rerun::datatypes { /// /// \param shape_ Shape of the tensor. /// \param buffer_ The tensor buffer containing the tensor's data. - TensorData( - Collection shape_, datatypes::TensorBuffer buffer_ - ) + TensorData(Collection shape_, datatypes::TensorBuffer buffer_) : shape(std::move(shape_)), buffer(std::move(buffer_)) {} /// New tensor data from dimensions and pointer to tensor data. @@ -50,11 +58,11 @@ namespace rerun::datatypes { /// \param shape_ Shape of the tensor. Determines the number of elements expected to be in `data`. /// \param data Target of the pointer must outlive the archetype. template - explicit TensorData(Collection shape_, const TElement* data) + explicit TensorData(Collection shape_, const TElement* data) : shape(std::move(shape_)) { size_t num_elements = shape.empty() ? 0 : 1; for (const auto& dim : shape) { - num_elements *= dim.size; + num_elements *= dim; } buffer = rerun::Collection::borrow(data, num_elements); } diff --git a/rerun_cpp/src/rerun/datatypes/tensor_data_ext.cpp b/rerun_cpp/src/rerun/datatypes/tensor_data_ext.cpp index 2c0aa306117b..a7a6c0fda766 100644 --- a/rerun_cpp/src/rerun/datatypes/tensor_data_ext.cpp +++ b/rerun_cpp/src/rerun/datatypes/tensor_data_ext.cpp @@ -10,7 +10,7 @@ namespace rerun::datatypes { /// \param shape_ Shape of the tensor. /// \param buffer_ The tensor buffer containing the tensor's data. TensorData( - Collection shape_, datatypes::TensorBuffer buffer_ + Collection shape_, datatypes::TensorBuffer buffer_ ) : shape(std::move(shape_)), buffer(std::move(buffer_)) {} @@ -20,10 +20,10 @@ namespace rerun::datatypes { /// \param shape_ Shape of the tensor. Determines the number of elements expected to be in `data`. /// \param data Target of the pointer must outlive the archetype. template - explicit TensorData(Collection shape_, const TElement* data) : shape(std::move(shape_)) { + explicit TensorData(Collection shape_, const TElement* data) : shape(std::move(shape_)) { size_t num_elements = shape.empty() ? 0 : 1; for (const auto& dim : shape) { - num_elements *= dim.size; + num_elements *= dim; } buffer = rerun::Collection::borrow(data, num_elements); } diff --git a/rerun_cpp/src/rerun/datatypes/tensor_dimension.cpp b/rerun_cpp/src/rerun/datatypes/tensor_dimension.cpp deleted file mode 100644 index c58c45b1264a..000000000000 --- a/rerun_cpp/src/rerun/datatypes/tensor_dimension.cpp +++ /dev/null @@ -1,77 +0,0 @@ -// DO NOT EDIT! This file was auto-generated by crates/build/re_types_builder/src/codegen/cpp/mod.rs -// Based on "crates/store/re_types/definitions/rerun/datatypes/tensor_dimension.fbs". - -#include "tensor_dimension.hpp" - -#include -#include - -namespace rerun::datatypes {} - -namespace rerun { - const std::shared_ptr& Loggable::arrow_datatype() { - static const auto datatype = arrow::struct_({ - arrow::field("size", arrow::uint64(), false), - arrow::field("name", arrow::utf8(), true), - }); - return datatype; - } - - Result> Loggable::to_arrow( - const datatypes::TensorDimension* instances, size_t num_instances - ) { - // TODO(andreas): Allow configuring the memory pool. - arrow::MemoryPool* pool = arrow::default_memory_pool(); - auto datatype = arrow_datatype(); - - ARROW_ASSIGN_OR_RAISE(auto builder, arrow::MakeBuilder(datatype, pool)) - if (instances && num_instances > 0) { - RR_RETURN_NOT_OK(Loggable::fill_arrow_array_builder( - static_cast(builder.get()), - instances, - num_instances - )); - } - std::shared_ptr array; - ARROW_RETURN_NOT_OK(builder->Finish(&array)); - return array; - } - - rerun::Error Loggable::fill_arrow_array_builder( - arrow::StructBuilder* builder, const datatypes::TensorDimension* elements, - size_t num_elements - ) { - if (builder == nullptr) { - return rerun::Error(ErrorCode::UnexpectedNullArgument, "Passed array builder is null."); - } - if (elements == nullptr) { - return rerun::Error( - ErrorCode::UnexpectedNullArgument, - "Cannot serialize null pointer to arrow array." - ); - } - - { - auto field_builder = static_cast(builder->field_builder(0)); - ARROW_RETURN_NOT_OK(field_builder->Reserve(static_cast(num_elements))); - for (size_t elem_idx = 0; elem_idx < num_elements; elem_idx += 1) { - ARROW_RETURN_NOT_OK(field_builder->Append(elements[elem_idx].size)); - } - } - { - auto field_builder = static_cast(builder->field_builder(1)); - ARROW_RETURN_NOT_OK(field_builder->Reserve(static_cast(num_elements))); - for (size_t elem_idx = 0; elem_idx < num_elements; elem_idx += 1) { - const auto& element = elements[elem_idx]; - if (element.name.has_value()) { - ARROW_RETURN_NOT_OK(field_builder->Append(element.name.value())); - } else { - ARROW_RETURN_NOT_OK(field_builder->AppendNull()); - } - } - } - ARROW_RETURN_NOT_OK(builder->AppendValues(static_cast(num_elements), nullptr)); - - return Error::ok(); - } -} // namespace rerun diff --git a/rerun_cpp/src/rerun/datatypes/tensor_dimension.hpp b/rerun_cpp/src/rerun/datatypes/tensor_dimension.hpp deleted file mode 100644 index 052ff23efffd..000000000000 --- a/rerun_cpp/src/rerun/datatypes/tensor_dimension.hpp +++ /dev/null @@ -1,66 +0,0 @@ -// DO NOT EDIT! This file was auto-generated by crates/build/re_types_builder/src/codegen/cpp/mod.rs -// Based on "crates/store/re_types/definitions/rerun/datatypes/tensor_dimension.fbs". - -#pragma once - -#include "../component_descriptor.hpp" -#include "../result.hpp" - -#include -#include -#include -#include - -namespace arrow { - class Array; - class DataType; - class StructBuilder; -} // namespace arrow - -namespace rerun::datatypes { - /// **Datatype**: A single dimension within a multi-dimensional tensor. - struct TensorDimension { - /// The length of this dimension. - uint64_t size; - - /// The name of this dimension, e.g. "width", "height", "channel", "batch', …. - std::optional name; - - public: // START of extensions from tensor_dimension_ext.cpp: - /// Nameless dimension. - TensorDimension(size_t size_) : size(size_) {} - - /// Dimension with name. - TensorDimension(size_t size_, std::string name_) : size(size_), name(std::move(name_)) {} - - // END of extensions from tensor_dimension_ext.cpp, start of generated code: - - public: - TensorDimension() = default; - }; -} // namespace rerun::datatypes - -namespace rerun { - template - struct Loggable; - - /// \private - template <> - struct Loggable { - static constexpr ComponentDescriptor Descriptor = "rerun.datatypes.TensorDimension"; - - /// Returns the arrow data type this type corresponds to. - static const std::shared_ptr& arrow_datatype(); - - /// Serializes an array of `rerun::datatypes::TensorDimension` into an arrow array. - static Result> to_arrow( - const datatypes::TensorDimension* instances, size_t num_instances - ); - - /// Fills an arrow array builder with an array of this type. - static rerun::Error fill_arrow_array_builder( - arrow::StructBuilder* builder, const datatypes::TensorDimension* elements, - size_t num_elements - ); - }; -} // namespace rerun diff --git a/rerun_cpp/src/rerun/datatypes/tensor_dimension_ext.cpp b/rerun_cpp/src/rerun/datatypes/tensor_dimension_ext.cpp deleted file mode 100644 index 6699700c5ae9..000000000000 --- a/rerun_cpp/src/rerun/datatypes/tensor_dimension_ext.cpp +++ /dev/null @@ -1,21 +0,0 @@ -#include -#include "tensor_dimension.hpp" - -// #define EDIT_EXTENSION - -namespace rerun { - namespace datatypes { - -#ifdef EDIT_EXTENSION - // - - /// Nameless dimension. - TensorDimension(size_t size_) : size(size_) {} - - /// Dimension with name. - TensorDimension(size_t size_, std::string name_) : size(size_), name(std::move(name_)) {} - - // -#endif - } // namespace datatypes -} // namespace rerun diff --git a/rerun_py/README.md b/rerun_py/README.md index 92678ac6b1dc..d86ad3f7c513 100644 --- a/rerun_py/README.md +++ b/rerun_py/README.md @@ -89,3 +89,8 @@ While we try to keep the main branch usable at all times, it may be unstable occ ```sh pixi run -e py py-build && pixi run -e py py-test ``` + +# Running specific Python unit tests +```sh +pixi run -e py py-build && pixi run -e py pytest rerun_py/tests/unit/test_tensor.py +``` diff --git a/rerun_py/rerun_sdk/rerun/_validators.py b/rerun_py/rerun_sdk/rerun/_validators.py index 324a0799e440..1ea9535dc2cd 100644 --- a/rerun_py/rerun_sdk/rerun/_validators.py +++ b/rerun_py/rerun_sdk/rerun/_validators.py @@ -5,7 +5,7 @@ import numpy as np import numpy.typing as npt -from ._converters import to_np_float32, to_np_float64, to_np_uint32 +from ._converters import to_np_float32, to_np_float64, to_np_uint32, to_np_uint64 # This code is a straight port from Rust. @@ -67,6 +67,13 @@ def flat_np_uint32_array_from_array_like(data: Any, dimension: int) -> npt.NDArr return flat_np_array_from_array_like(array, dimension) +def flat_np_uint64_array_from_array_like(data: Any, dimension: int) -> npt.NDArray[np.uint64]: + """Converts to a flat uint numpy array from an arbitrary vector, validating for an expected dimensionality.""" + + array = to_np_uint64(data) + return flat_np_array_from_array_like(array, dimension) + + def flat_np_array_from_array_like(array: npt.NDArray[Any], dimension: int) -> npt.NDArray[Any]: """Converts to a flat numpy array from an arbitrary vector, validating for an expected dimensionality.""" diff --git a/rerun_py/rerun_sdk/rerun/archetypes/bar_chart_ext.py b/rerun_py/rerun_sdk/rerun/archetypes/bar_chart_ext.py index 20d1198e8b98..f066f3098749 100644 --- a/rerun_py/rerun_sdk/rerun/archetypes/bar_chart_ext.py +++ b/rerun_py/rerun_sdk/rerun/archetypes/bar_chart_ext.py @@ -21,7 +21,7 @@ def values__field_converter_override(data: TensorDataArrayLike) -> TensorDataBat # TODO(jleibs): Doing this on raw arrow data is not great. Clean this up # once we coerce to a canonical non-arrow type. - shape_dims = tensor_data.as_arrow_array()[0][0].values.field(0).to_numpy() + shape_dims = tensor_data.as_arrow_array()[0][0].values.to_numpy() if len([d for d in shape_dims if d != 1]) != 1: _send_warning_or_raise( diff --git a/rerun_py/rerun_sdk/rerun/archetypes/tensor_ext.py b/rerun_py/rerun_sdk/rerun/archetypes/tensor_ext.py index 0ee17ce0edab..14531279ff6c 100644 --- a/rerun_py/rerun_sdk/rerun/archetypes/tensor_ext.py +++ b/rerun_py/rerun_sdk/rerun/archetypes/tensor_ext.py @@ -18,7 +18,7 @@ def __init__( self: Any, data: TensorDataLike | TensorLike | None = None, *, - dim_names: Sequence[str | None] | None = None, + dim_names: Sequence[str] | None = None, value_range: Range1DLike | None = None, ): """ @@ -36,11 +36,11 @@ def __init__( ---------- self: The TensorData object to construct. - data: TensorDataLike | None + data: A TensorData object, or type that can be converted to a numpy array. - dim_names: Sequence[str] | None + dim_names: The names of the tensor dimensions when generating the shape from an array. - value_range: Sequence[float] | None + value_range: The range of values to use for colormapping. If not specified, the range will be estimated from the data. diff --git a/rerun_py/rerun_sdk/rerun/datatypes/.gitattributes b/rerun_py/rerun_sdk/rerun/datatypes/.gitattributes index dcfe4dfeec99..c3a9b1f6cc22 100644 --- a/rerun_py/rerun_sdk/rerun/datatypes/.gitattributes +++ b/rerun_py/rerun_sdk/rerun/datatypes/.gitattributes @@ -29,7 +29,6 @@ rgba32.py linguist-generated=true rotation_axis_angle.py linguist-generated=true tensor_buffer.py linguist-generated=true tensor_data.py linguist-generated=true -tensor_dimension.py linguist-generated=true tensor_dimension_index_selection.py linguist-generated=true tensor_dimension_selection.py linguist-generated=true time_int.py linguist-generated=true diff --git a/rerun_py/rerun_sdk/rerun/datatypes/__init__.py b/rerun_py/rerun_sdk/rerun/datatypes/__init__.py index 1cf1156f07c8..49397b6713b3 100644 --- a/rerun_py/rerun_sdk/rerun/datatypes/__init__.py +++ b/rerun_py/rerun_sdk/rerun/datatypes/__init__.py @@ -39,7 +39,6 @@ ) from .tensor_buffer import TensorBuffer, TensorBufferArrayLike, TensorBufferBatch, TensorBufferLike from .tensor_data import TensorData, TensorDataArrayLike, TensorDataBatch, TensorDataLike -from .tensor_dimension import TensorDimension, TensorDimensionArrayLike, TensorDimensionBatch, TensorDimensionLike from .tensor_dimension_index_selection import ( TensorDimensionIndexSelection, TensorDimensionIndexSelectionArrayLike, @@ -185,14 +184,10 @@ "TensorDataArrayLike", "TensorDataBatch", "TensorDataLike", - "TensorDimension", - "TensorDimensionArrayLike", - "TensorDimensionBatch", "TensorDimensionIndexSelection", "TensorDimensionIndexSelectionArrayLike", "TensorDimensionIndexSelectionBatch", "TensorDimensionIndexSelectionLike", - "TensorDimensionLike", "TensorDimensionSelection", "TensorDimensionSelectionArrayLike", "TensorDimensionSelectionBatch", diff --git a/rerun_py/rerun_sdk/rerun/datatypes/tensor_data.py b/rerun_py/rerun_sdk/rerun/datatypes/tensor_data.py index fa8e1e8364a5..28e3f52c428f 100644 --- a/rerun_py/rerun_sdk/rerun/datatypes/tensor_data.py +++ b/rerun_py/rerun_sdk/rerun/datatypes/tensor_data.py @@ -7,6 +7,7 @@ from typing import TYPE_CHECKING, Any, Sequence, Union +import numpy as np import numpy.typing as npt import pyarrow as pa from attrs import define, field @@ -15,6 +16,9 @@ from .._baseclasses import ( BaseBatch, ) +from .._converters import ( + to_np_uint64, +) from .tensor_data_ext import TensorDataExt __all__ = ["TensorData", "TensorDataArrayLike", "TensorDataBatch", "TensorDataLike"] @@ -46,8 +50,8 @@ class TensorData(TensorDataExt): # __init__ can be found in tensor_data_ext.py - shape: list[datatypes.TensorDimension] = field() - # The shape of the tensor, including optional names for each dimension. + shape: npt.NDArray[np.uint64] = field(converter=to_np_uint64) + # The shape of the tensor, i.e. the length of each dimension. # # (Docstring intentionally commented out to hide this field from the docs) @@ -56,6 +60,17 @@ class TensorData(TensorDataExt): # # (Docstring intentionally commented out to hide this field from the docs) + names: list[str] | None = field(default=None) + # The names of the dimensions of the tensor (optional). + # + # If set, should be the same length as [`datatypes.TensorData.shape`][rerun.datatypes.TensorData.shape]. + # If it has a different length your names may show up improperly, + # and some constructors may produce a warning or even an error. + # + # Example: `["height", "width", "channel", "batch"]`. + # + # (Docstring intentionally commented out to hide this field from the docs) + if TYPE_CHECKING: TensorDataLike = Union[TensorData, npt.ArrayLike] @@ -68,20 +83,10 @@ class TensorData(TensorDataExt): class TensorDataBatch(BaseBatch[TensorDataArrayLike]): _ARROW_DATATYPE = pa.struct([ pa.field( - "shape", - pa.list_( - pa.field( - "item", - pa.struct([ - pa.field("size", pa.uint64(), nullable=False, metadata={}), - pa.field("name", pa.utf8(), nullable=True, metadata={}), - ]), - nullable=False, - metadata={}, - ) - ), - nullable=False, - metadata={}, + "shape", pa.list_(pa.field("item", pa.uint64(), nullable=False, metadata={})), nullable=False, metadata={} + ), + pa.field( + "names", pa.list_(pa.field("item", pa.utf8(), nullable=False, metadata={})), nullable=True, metadata={} ), pa.field( "buffer", diff --git a/rerun_py/rerun_sdk/rerun/datatypes/tensor_data_ext.py b/rerun_py/rerun_sdk/rerun/datatypes/tensor_data_ext.py index 5d89c7f050fe..64dd756cf60e 100644 --- a/rerun_py/rerun_sdk/rerun/datatypes/tensor_data_ext.py +++ b/rerun_py/rerun_sdk/rerun/datatypes/tensor_data_ext.py @@ -8,6 +8,7 @@ import numpy.typing as npt import pyarrow as pa +from rerun._validators import flat_np_uint64_array_from_array_like from rerun.error_utils import _send_warning_or_raise from .._unions import build_dense_union @@ -20,7 +21,7 @@ def numpy(self, force: bool) -> npt.NDArray[Any]: ... if TYPE_CHECKING: - from . import TensorBufferLike, TensorDataArrayLike, TensorDataLike, TensorDimension, TensorDimensionLike + from . import TensorBufferLike, TensorDataArrayLike, TensorDataLike TensorLike = Union[TensorDataLike, TorchTensorLike] """Type helper for a tensor-like object that can be logged to Rerun.""" @@ -48,10 +49,10 @@ class TensorDataExt: def __init__( self: Any, *, - shape: Sequence[TensorDimensionLike] | None = None, + shape: Sequence[int] | None = None, buffer: TensorBufferLike | None = None, array: TensorLike | None = None, - dim_names: Sequence[str | None] | None = None, + dim_names: Sequence[str] | None = None, ) -> None: """ Construct a `TensorData` object. @@ -64,18 +65,18 @@ def __init__( Parameters ---------- - self: TensorData + self: The TensorData object to construct. - shape: Sequence[TensorDimensionLike] | None + shape: The shape of the tensor. If None, and an array is provided, the shape will be inferred from the shape of the array. - buffer: TensorBufferLike | None + buffer: The buffer of the tensor. If None, and an array is provided, the buffer will be generated from the array. - array: Tensor | None + array: A numpy array (or The array of the tensor. If None, the array will be inferred from the buffer. - dim_names: Sequence[str] | None - The names of the tensor dimensions when generating the shape from an array. + dim_names: + The names of the tensor dimensions. """ if array is None and buffer is None: @@ -84,10 +85,8 @@ def __init__( raise ValueError("Can only provide one of 'array' or 'buffer'") if buffer is not None and shape is None: raise ValueError("If 'buffer' is provided, 'shape' is also required") - if shape is not None and dim_names is not None: - raise ValueError("Can only provide one of 'shape' or 'names'") - from . import TensorBuffer, TensorDimension + from . import TensorBuffer from .tensor_data import _tensor_data__buffer__special_field_converter_override if shape is not None: @@ -101,7 +100,7 @@ def __init__( # If a shape we provided, it must match the array if resolved_shape: - shape_tuple = tuple(d.size for d in resolved_shape) + shape_tuple = tuple(d for d in resolved_shape) if shape_tuple != array.shape: _send_warning_or_raise( ( @@ -113,21 +112,10 @@ def __init__( resolved_shape = None if resolved_shape is None: - if dim_names: - if len(array.shape) != len(dim_names): - _send_warning_or_raise( - ( - f"len(array.shape) = {len(array.shape)} != " - + f"len(dim_names) = {len(dim_names)}. Dropping tensor dimension names." - ), - 2, - ) - resolved_shape = [TensorDimension(size, name) for size, name in zip(array.shape, dim_names)] # type: ignore[arg-type] - else: - resolved_shape = [TensorDimension(size) for size in array.shape] + resolved_shape = [size for size in array.shape] if resolved_shape is not None: - self.shape = resolved_shape + self.shape: npt.NDArray[np.uint64] = resolved_shape else: # This shouldn't be possible but typing can't figure it out raise ValueError("No shape provided.") @@ -137,7 +125,20 @@ def __init__( elif array is not None: self.buffer = TensorBuffer(array.flatten()) - expected_buffer_size = prod(d.size for d in self.shape) + self.names: list[str] | None = None + if dim_names: + if len(self.shape) == len(dim_names): + self.names = dim_names + else: + _send_warning_or_raise( + ( + f"len(shape) = {len(self.shape)} != " + + f"len(dim_names) = {len(dim_names)}. Ignoring tensor dimension names." + ), + 2, + ) + + expected_buffer_size = prod(d for d in self.shape) if len(self.buffer.inner) != expected_buffer_size: raise ValueError( f"Shape and buffer size do not match. {len(self.buffer.inner)} {self.shape}->{expected_buffer_size}" @@ -166,20 +167,26 @@ def native_to_pa_array_override(data: TensorDataArrayLike, data_type: pa.DataTyp data = TensorData(array=array) # Now build the actual arrow fields - shape = _build_shape_array(data.shape).cast(data_type.field("shape").type) + shape = pa.array([flat_np_uint64_array_from_array_like(data.shape, 1)], type=data_type.field("shape").type) buffer = _build_buffer_array(data.buffer) + if data.names is None: + names = pa.array([None], type=data_type.field("names").type) + else: + names = pa.array([data.names], type=data_type.field("names").type) + return pa.StructArray.from_arrays( [ shape, + names, buffer, ], - fields=[data_type.field("shape"), data_type.field("buffer")], + fields=data_type.fields, ).cast(data_type) def numpy(self: Any, force: bool) -> npt.NDArray[Any]: """Convert the TensorData back to a numpy array.""" - dims = [d.size for d in self.shape] + dims = [d for d in self.shape] return self.buffer.inner.reshape(dims) # type: ignore[no-any-return] @@ -188,26 +195,6 @@ def numpy(self: Any, force: bool) -> npt.NDArray[Any]: ################################################################################ -def _build_shape_array(dims: list[TensorDimension]) -> pa.Array: - from . import TensorDimensionBatch - - data_type = TensorDimensionBatch._ARROW_DATATYPE - - array = np.asarray([d.size for d in dims], dtype=np.uint64).flatten() - names = pa.array([d.name for d in dims], mask=[d is None for d in dims], type=data_type.field("name").type) - - return pa.ListArray.from_arrays( - offsets=[0, len(array)], - values=pa.StructArray.from_arrays( - [ - array, - names, - ], - fields=[data_type.field("size"), data_type.field("name")], - ), - ) - - DTYPE_MAP: Final[dict[npt.DTypeLike, str]] = { np.uint8: "U8", np.uint16: "U16", diff --git a/rerun_py/rerun_sdk/rerun/datatypes/tensor_dimension.py b/rerun_py/rerun_sdk/rerun/datatypes/tensor_dimension.py deleted file mode 100644 index 99b1b0977ec3..000000000000 --- a/rerun_py/rerun_sdk/rerun/datatypes/tensor_dimension.py +++ /dev/null @@ -1,71 +0,0 @@ -# DO NOT EDIT! This file was auto-generated by crates/build/re_types_builder/src/codegen/python/mod.rs -# Based on "crates/store/re_types/definitions/rerun/datatypes/tensor_dimension.fbs". - -# You can extend this class by creating a "TensorDimensionExt" class in "tensor_dimension_ext.py". - -from __future__ import annotations - -from typing import Any, Sequence, Union - -import pyarrow as pa -from attrs import define, field - -from .._baseclasses import ( - BaseBatch, -) -from .._converters import ( - str_or_none, -) - -__all__ = ["TensorDimension", "TensorDimensionArrayLike", "TensorDimensionBatch", "TensorDimensionLike"] - - -@define(init=False) -class TensorDimension: - """**Datatype**: A single dimension within a multi-dimensional tensor.""" - - def __init__(self: Any, size: int, name: str | None = None): - """ - Create a new instance of the TensorDimension datatype. - - Parameters - ---------- - size: - The length of this dimension. - name: - The name of this dimension, e.g. "width", "height", "channel", "batch', …. - - """ - - # You can define your own __init__ function as a member of TensorDimensionExt in tensor_dimension_ext.py - self.__attrs_init__(size=size, name=name) - - size: int = field(converter=int) - # The length of this dimension. - # - # (Docstring intentionally commented out to hide this field from the docs) - - name: str | None = field(default=None, converter=str_or_none) - # The name of this dimension, e.g. "width", "height", "channel", "batch', …. - # - # (Docstring intentionally commented out to hide this field from the docs) - - -TensorDimensionLike = TensorDimension -TensorDimensionArrayLike = Union[ - TensorDimension, - Sequence[TensorDimensionLike], -] - - -class TensorDimensionBatch(BaseBatch[TensorDimensionArrayLike]): - _ARROW_DATATYPE = pa.struct([ - pa.field("size", pa.uint64(), nullable=False, metadata={}), - pa.field("name", pa.utf8(), nullable=True, metadata={}), - ]) - - @staticmethod - def _native_to_pa_array(data: TensorDimensionArrayLike, data_type: pa.DataType) -> pa.Array: - raise NotImplementedError( - "Arrow serialization of TensorDimension not implemented: We lack codegen for arrow-serialization of general structs" - ) # You need to implement native_to_pa_array_override in tensor_dimension_ext.py diff --git a/rerun_py/tests/unit/test_tensor.py b/rerun_py/tests/unit/test_tensor.py index 34dbf4ab6923..b55f1d0ec3b2 100644 --- a/rerun_py/tests/unit/test_tensor.py +++ b/rerun_py/tests/unit/test_tensor.py @@ -6,7 +6,7 @@ import pytest import rerun as rr from rerun.components import TensorData, TensorDataBatch -from rerun.datatypes import TensorBuffer, TensorDataLike, TensorDimension +from rerun.datatypes import TensorBuffer, TensorDataLike rng = np.random.default_rng(12345) RANDOM_TENSOR_SOURCE = rng.uniform(0.0, 1.0, (8, 6, 3, 5)) @@ -15,12 +15,8 @@ TENSOR_DATA_INPUTS: list[TensorDataLike] = [ # Full explicit construction TensorData( - shape=[ - TensorDimension(8, name="a"), - TensorDimension(6, name="b"), - TensorDimension(3, name="c"), - TensorDimension(5, name="d"), - ], + shape=[8, 6, 3, 5], + dim_names=["a", "b", "c", "d"], buffer=TensorBuffer(RANDOM_TENSOR_SOURCE), ), # Implicit construction from ndarray @@ -33,14 +29,15 @@ TensorData(array=RANDOM_TENSOR_SOURCE, dim_names=["a", "b", "c", "d"]), ] -# 0 = shape -# 1 = buffer +SHAPE = 0 # Based on datatypes/tensor_data.fbs +NAMES = 1 # Based on datatypes/tensor_data.fbs +BUFFER = 2 # Based on datatypes/tensor_data.fbs CHECK_FIELDS: list[list[int]] = [ - [0, 1], - [1], - [1], - [0, 1], - [0, 1], + [SHAPE, NAMES, BUFFER], + [BUFFER], + [BUFFER], + [SHAPE, NAMES, BUFFER], + [SHAPE, NAMES, BUFFER], ] @@ -87,12 +84,8 @@ def test_bad_tensors() -> None: # Wrong size buffer for dimensions with pytest.raises(ValueError): TensorData( - shape=[ - TensorDimension(8, name="a"), - TensorDimension(6, name="b"), - TensorDimension(3, name="c"), - TensorDimension(4, name="d"), - ], + shape=[1, 2, 3], + dim_names=["a", "b", "c", "d"], buffer=RANDOM_TENSOR_SOURCE, ) @@ -108,11 +101,7 @@ def test_bad_tensors() -> None: # Shape disagrees with array with pytest.raises(ValueError): TensorData( - shape=[ - TensorDimension(8, name="a"), - TensorDimension(6, name="b"), - TensorDimension(5, name="c"), - TensorDimension(3, name="d"), - ], + shape=[1, 2, 3], + dim_names=["a", "b", "c", "d"], array=RANDOM_TENSOR_SOURCE, ) diff --git a/tests/cpp/roundtrips/tensor/main.cpp b/tests/cpp/roundtrips/tensor/main.cpp index c1a218349a5f..98d367454b74 100644 --- a/tests/cpp/roundtrips/tensor/main.cpp +++ b/tests/cpp/roundtrips/tensor/main.cpp @@ -8,10 +8,10 @@ int main(int, char** argv) { const auto rec = rerun::RecordingStream("rerun_example_roundtrip_tensor"); rec.save(argv[1]).exit_on_failure(); - std::vector dimensions{{3, 4, 5, 6}}; + std::vector shape{{3, 4, 5, 6}}; std::vector data(360); std::generate(data.begin(), data.end(), [n = 0]() mutable { return n++; }); - rec.log("tensor", rerun::archetypes::Tensor(rerun::datatypes::TensorData{dimensions, data})); + rec.log("tensor", rerun::archetypes::Tensor(rerun::datatypes::TensorData{shape, data})); } diff --git a/tests/python/release_checklist/check_all_components_ui.py b/tests/python/release_checklist/check_all_components_ui.py index 0849f5983c8b..4847c56d3d61 100644 --- a/tests/python/release_checklist/check_all_components_ui.py +++ b/tests/python/release_checklist/check_all_components_ui.py @@ -218,7 +218,7 @@ def alternatives(self) -> list[Any] | None: rr.datatypes.TensorData(array=np.random.randint(0, 255, (5, 3, 6, 4), dtype=np.uint8)), rr.datatypes.TensorData( array=np.random.randint(0, 255, (5, 3, 6, 4), dtype=np.uint8), - dim_names=[None, "hello", None, "world"], + dim_names=["hello", "brave", "new", "world"], ), rr.datatypes.TensorData(array=np.random.randint(0, 255, (5, 3, 6, 4, 3), dtype=np.uint8)), ]