Skip to content

Commit

Permalink
Remove all legacy Chunk iteration APIs (#8556)
Browse files Browse the repository at this point in the history
* DNM: requires #8555
  • Loading branch information
teh-cmc authored Jan 6, 2025
1 parent c4ce0e7 commit 87902e6
Show file tree
Hide file tree
Showing 7 changed files with 4 additions and 534 deletions.
4 changes: 0 additions & 4 deletions Cargo.lock
Original file line number Diff line number Diff line change
Expand Up @@ -6580,12 +6580,10 @@ version = "0.22.0-alpha.1+dev"
dependencies = [
"ahash",
"arrow",
"bytemuck",
"egui",
"glam",
"itertools 0.13.0",
"nohash-hasher",
"re_arrow2",
"re_chunk_store",
"re_entity_db",
"re_log",
Expand Down Expand Up @@ -6697,7 +6695,6 @@ version = "0.22.0-alpha.1+dev"
dependencies = [
"ahash",
"anyhow",
"arrow",
"bitflags 2.6.0",
"bytemuck",
"criterion",
Expand All @@ -6710,7 +6707,6 @@ dependencies = [
"nohash-hasher",
"once_cell",
"ordered-float",
"re_arrow2",
"re_chunk_store",
"re_data_ui",
"re_entity_db",
Expand Down
344 changes: 0 additions & 344 deletions crates/store/re_chunk/src/iter.rs
Original file line number Diff line number Diff line change
Expand Up @@ -201,350 +201,6 @@ impl Chunk {
}
}

/// Returns an iterator over the raw primitive values of a [`Chunk`], for a given component.
///
/// This is a very fast path: the entire column will be downcasted at once, and then every
/// component batch will be a slice reference into that global slice.
/// Use this when working with simple arrow datatypes and performance matters (e.g. scalars,
/// points, etc).
///
/// See also:
/// * [`Self::iter_primitive_array`]
/// * [`Self::iter_primitive_array_list`]
/// * [`Self::iter_string`]
/// * [`Self::iter_buffer`].
/// * [`Self::iter_component`].
#[inline]
pub fn iter_primitive<T: arrow2::types::NativeType>(
&self,
component_name: &ComponentName,
) -> impl Iterator<Item = &[T]> + '_ {
let Some(list_array) = self.get_first_component(component_name) else {
return Either::Left(std::iter::empty());
};

let Some(values) = list_array
.values()
.as_any()
.downcast_ref::<Arrow2PrimitiveArray<T>>()
else {
if cfg!(debug_assertions) {
panic!("downcast failed for {component_name}, data discarded");
} else {
re_log::error_once!("downcast failed for {component_name}, data discarded");
}
return Either::Left(std::iter::empty());
};
let values = values.values().as_slice();

// NOTE: No need for validity checks here, `iter_offsets` already takes care of that.
Either::Right(
self.iter_component_offsets(component_name)
.map(move |(idx, len)| &values[idx..idx + len]),
)
}

/// Returns an iterator over the raw boolean values of a [`Chunk`], for a given component.
///
/// This is a very fast path: the entire column will be downcasted at once, and then every
/// component batch will be a slice reference into that global slice.
/// Use this when working with simple arrow datatypes and performance matters.
///
/// See also:
/// * [`Self::iter_primitive_array`]
/// * [`Self::iter_primitive_array_list`]
/// * [`Self::iter_string`]
/// * [`Self::iter_buffer`].
/// * [`Self::iter_component`].
#[inline]
pub fn iter_bool(
&self,
component_name: &ComponentName,
) -> impl Iterator<Item = Arrow2Bitmap> + '_ {
let Some(list_array) = self.get_first_component(component_name) else {
return Either::Left(std::iter::empty());
};

let Some(values) = list_array
.values()
.as_any()
.downcast_ref::<Arrow2BooleanArray>()
else {
if cfg!(debug_assertions) {
panic!("downcast failed for {component_name}, data discarded");
} else {
re_log::error_once!("downcast failed for {component_name}, data discarded");
}
return Either::Left(std::iter::empty());
};
let values = values.values().clone();

// NOTE: No need for validity checks here, `iter_offsets` already takes care of that.
Either::Right(
self.iter_component_offsets(component_name)
.map(move |(idx, len)| values.clone().sliced(idx, len)),
)
}

/// Returns an iterator over the raw primitive arrays of a [`Chunk`], for a given component.
///
/// This is a very fast path: the entire column will be downcasted at once, and then every
/// component batch will be a slice reference into that global slice.
/// Use this when working with simple arrow datatypes and performance matters (e.g. scalars,
/// points, etc).
///
/// See also:
/// * [`Self::iter_primitive`]
/// * [`Self::iter_string`]
/// * [`Self::iter_buffer`].
/// * [`Self::iter_component`].
pub fn iter_primitive_array<const N: usize, T: arrow2::types::NativeType>(
&self,
component_name: &ComponentName,
) -> impl Iterator<Item = &[[T; N]]> + '_
where
[T; N]: bytemuck::Pod,
{
let Some(list_array) = self.get_first_component(component_name) else {
return Either::Left(std::iter::empty());
};

let Some(fixed_size_list_array) = list_array
.values()
.as_any()
.downcast_ref::<Arrow2FixedSizeListArray>()
else {
if cfg!(debug_assertions) {
panic!("downcast failed for {component_name}, data discarded");
} else {
re_log::error_once!("downcast failed for {component_name}, data discarded");
}
return Either::Left(std::iter::empty());
};

let Some(values) = fixed_size_list_array
.values()
.as_any()
.downcast_ref::<Arrow2PrimitiveArray<T>>()
else {
if cfg!(debug_assertions) {
panic!("downcast failed for {component_name}, data discarded");
} else {
re_log::error_once!("downcast failed for {component_name}, data discarded");
}
return Either::Left(std::iter::empty());
};

let size = fixed_size_list_array.size();
let values = values.values().as_slice();

// NOTE: No need for validity checks here, `iter_offsets` already takes care of that.
Either::Right(
self.iter_component_offsets(component_name)
.map(move |(idx, len)| {
bytemuck::cast_slice(&values[idx * size..idx * size + len * size])
}),
)
}

/// Returns an iterator over the raw list of primitive arrays of a [`Chunk`], for a given component.
///
/// This is a very fast path: the entire column will be downcasted at once, and then every
/// component batch will be a slice reference into that global slice.
/// Use this when working with simple arrow datatypes and performance matters (e.g. strips, etc).
///
/// See also:
/// * [`Self::iter_primitive`]
/// * [`Self::iter_primitive_array`]
/// * [`Self::iter_string`]
/// * [`Self::iter_buffer`].
/// * [`Self::iter_component`].
pub fn iter_primitive_array_list<const N: usize, T: arrow2::types::NativeType>(
&self,
component_name: &ComponentName,
) -> impl Iterator<Item = Vec<&[[T; N]]>> + '_
where
[T; N]: bytemuck::Pod,
{
let Some(list_array) = self.get_first_component(component_name) else {
return Either::Left(std::iter::empty());
};

let Some(inner_list_array) = list_array
.values()
.as_any()
.downcast_ref::<Arrow2ListArray<i32>>()
else {
if cfg!(debug_assertions) {
panic!("downcast failed for {component_name}, data discarded");
} else {
re_log::error_once!("downcast failed for {component_name}, data discarded");
}
return Either::Left(std::iter::empty());
};

let inner_offsets = inner_list_array.offsets();
let inner_lengths = inner_list_array.offsets().lengths().collect_vec();

let Some(fixed_size_list_array) = inner_list_array
.values()
.as_any()
.downcast_ref::<Arrow2FixedSizeListArray>()
else {
if cfg!(debug_assertions) {
panic!("downcast failed for {component_name}, data discarded");
} else {
re_log::error_once!("downcast failed for {component_name}, data discarded");
}
return Either::Left(std::iter::empty());
};

let Some(values) = fixed_size_list_array
.values()
.as_any()
.downcast_ref::<Arrow2PrimitiveArray<T>>()
else {
if cfg!(debug_assertions) {
panic!("downcast failed for {component_name}, data discarded");
} else {
re_log::error_once!("downcast failed for {component_name}, data discarded");
}
return Either::Left(std::iter::empty());
};

let size = fixed_size_list_array.size();
let values = values.values();

// NOTE: No need for validity checks here, `iter_offsets` already takes care of that.
Either::Right(
self.iter_component_offsets(component_name)
.map(move |(idx, len)| {
let inner_offsets = &inner_offsets.as_slice()[idx..idx + len];
let inner_lengths = &inner_lengths.as_slice()[idx..idx + len];
izip!(inner_offsets, inner_lengths)
.map(|(&idx, &len)| {
let idx = idx as usize;
bytemuck::cast_slice(&values[idx * size..idx * size + len * size])
})
.collect_vec()
}),
)
}

/// Returns an iterator over the raw strings of a [`Chunk`], for a given component.
///
/// This is a very fast path: the entire column will be downcasted at once, and then every
/// component batch will be a slice reference into that global slice.
/// Use this when working with simple arrow datatypes and performance matters (e.g. labels, etc).
///
/// See also:
/// * [`Self::iter_primitive`]
/// * [`Self::iter_primitive_array`]
/// * [`Self::iter_primitive_array_list`]
/// * [`Self::iter_buffer`].
/// * [`Self::iter_component`].
pub fn iter_string(
&self,
component_name: &ComponentName,
) -> impl Iterator<Item = Vec<ArrowString>> + '_ {
let Some(list_array) = self.get_first_component(component_name) else {
return Either::Left(std::iter::empty());
};

let Some(utf8_array) = list_array
.values()
.as_any()
.downcast_ref::<Arrow2Utf8Array<i32>>()
else {
if cfg!(debug_assertions) {
panic!("downcast failed for {component_name}, data discarded");
} else {
re_log::error_once!("downcast failed for {component_name}, data discarded");
}
return Either::Left(std::iter::empty());
};

let values = utf8_array.values();
let offsets = utf8_array.offsets();
let lengths = utf8_array.offsets().lengths().collect_vec();

// NOTE: No need for validity checks here, `iter_offsets` already takes care of that.
Either::Right(
self.iter_component_offsets(component_name)
.map(move |(idx, len)| {
let offsets = &offsets.as_slice()[idx..idx + len];
let lengths = &lengths.as_slice()[idx..idx + len];
izip!(offsets, lengths)
.map(|(&idx, &len)| ArrowString::from(values.clone().sliced(idx as _, len)))
.collect_vec()
}),
)
}

/// Returns an iterator over the raw buffers of a [`Chunk`], for a given component.
///
/// This is a very fast path: the entire column will be downcasted at once, and then every
/// component batch will be a slice reference into that global slice.
/// Use this when working with simple arrow datatypes and performance matters (e.g. blobs, etc).
///
/// See also:
/// * [`Self::iter_primitive`]
/// * [`Self::iter_primitive_array`]
/// * [`Self::iter_primitive_array_list`]
/// * [`Self::iter_string`].
/// * [`Self::iter_component`].
pub fn iter_buffer<T: arrow::datatypes::ArrowNativeType + arrow2::types::NativeType>(
&self,
component_name: &ComponentName,
) -> impl Iterator<Item = Vec<ArrowBuffer<T>>> + '_ {
let Some(list_array) = self.get_first_component(component_name) else {
return Either::Left(std::iter::empty());
};

let Some(inner_list_array) = list_array
.values()
.as_any()
.downcast_ref::<Arrow2ListArray<i32>>()
else {
if cfg!(debug_assertions) {
panic!("downcast failed for {component_name}, data discarded");
} else {
re_log::error_once!("downcast failed for {component_name}, data discarded");
}
return Either::Left(std::iter::empty());
};

let Some(values) = inner_list_array
.values()
.as_any()
.downcast_ref::<Arrow2PrimitiveArray<T>>()
else {
if cfg!(debug_assertions) {
panic!("downcast failed for {component_name}, data discarded");
} else {
re_log::error_once!("downcast failed for {component_name}, data discarded");
}
return Either::Left(std::iter::empty());
};

let values = values.values();
let offsets = inner_list_array.offsets();
let lengths = inner_list_array.offsets().lengths().collect_vec();

// NOTE: No need for validity checks here, `iter_offsets` already takes care of that.
Either::Right(
self.iter_component_offsets(component_name)
.map(move |(idx, len)| {
let offsets = &offsets.as_slice()[idx..idx + len];
let lengths = &lengths.as_slice()[idx..idx + len];
izip!(offsets, lengths)
// NOTE: Not an actual clone, just a refbump of the underlying buffer.
.map(|(&idx, &len)| values.clone().sliced(idx as _, len).into())
.collect_vec()
}),
)
}

/// Returns an iterator over the all the sliced component batches in a [`Chunk`]'s column, for
/// a given component.
///
Expand Down
2 changes: 0 additions & 2 deletions crates/viewer/re_view/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -37,8 +37,6 @@ re_viewport_blueprint.workspace = true

ahash.workspace = true
arrow.workspace = true
arrow2.workspace = true
bytemuck.workspace = true
egui.workspace = true
glam.workspace = true
itertools.workspace = true
Expand Down
Loading

0 comments on commit 87902e6

Please sign in to comment.