Skip to content

Commit

Permalink
Prepare to port codegen deserialization from arrow2 (#8372)
Browse files Browse the repository at this point in the history
### Related
* Part of #3741
 
### What
Some preparatory work for migrating the codegen deserializer from
`re_arrow2`
  • Loading branch information
emilk authored Dec 9, 2024
1 parent 29c1f83 commit 59a8440
Show file tree
Hide file tree
Showing 40 changed files with 418 additions and 349 deletions.
42 changes: 22 additions & 20 deletions crates/build/re_types_builder/src/codegen/rust/deserializer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -326,11 +326,12 @@ pub fn quote_arrow_deserializer(
let quoted_field_deserializers = obj
.fields
.iter()
.filter(|obj_field|
// For unit fields we don't have to collect any data.
obj_field.typ != crate::Type::Unit)
.enumerate()
.map(|(i, obj_field)| {
.filter(|(_, obj_field)| {
// For unit fields we don't have to collect any data.
obj_field.typ != crate::Type::Unit
})
.map(|(type_id, obj_field)| {
let data_dst = format_ident!("{}", obj_field.snake_case_name());

let field_datatype = &arrow_registry.get(&obj_field.fqname);
Expand All @@ -344,29 +345,29 @@ pub fn quote_arrow_deserializer(
InnerRepr::NativeIterable,
);

let i = i + 1; // NOTE: +1 to account for `_null_markers` virtual arm
let type_id = Literal::usize_unsuffixed(type_id + 1); // NOTE: +1 to account for `_null_markers` virtual arm

quote! {
let #data_dst = {
// NOTE: `data_src_arrays` is a runtime collection of all of the
// input's payload's union arms, while `#i` is our comptime union
// input's payload's union arms, while `#type_id` is our comptime union
// arm counter… there's no guarantee it's actually there at
// runtime!
if #i >= #data_src_arrays.len() {
if #data_src_arrays.len() <= #type_id {
// By not returning an error but rather defaulting to an empty
// vector, we introduce some kind of light forwards compatibility:
// old clients that don't yet know about the new arms can still
// send data in.
return Ok(Vec::new());

// return Err(DeserializationError::missing_union_arm(
// #quoted_datatype, #obj_field_fqname, #i,
// #quoted_datatype, #obj_field_fqname, #type_id,
// )).with_context(#obj_fqname);
}

// NOTE: The array indexing is safe: checked above.
let #data_src = &*#data_src_arrays[#i];
#quoted_deserializer.collect::<Vec<_>>()
let #data_src = &*#data_src_arrays[#type_id];
#quoted_deserializer.collect::<Vec<_>>()
}
}
});
Expand Down Expand Up @@ -583,19 +584,20 @@ fn quote_arrow_field_deserializer(

let offsets = #data_src.offsets();
arrow2::bitmap::utils::ZipValidity::new_with_validity(
offsets.iter().zip(offsets.lengths()),
offsets.windows(2),
#data_src.validity(),
)
.map(|elem| elem.map(|(start, len)| {
.map(|elem| elem.map(|window| {
// NOTE: Do _not_ use `Buffer::sliced`, it panics on malformed inputs.

let start = *start as usize;
let end = start + len;
let start = window[0] as usize;
let end = window[1] as usize;
let len = end - start;

// NOTE: It is absolutely crucial we explicitly handle the
// boundchecks manually first, otherwise rustc completely chokes
// when slicing the data (as in: a 100x perf drop)!
if end > #data_src_buf.len() {
if #data_src_buf.len() < end {
// error context is appended below during final collection
return Err(DeserializationError::offset_slice_oob(
(start, end), #data_src_buf.len(),
Expand Down Expand Up @@ -812,19 +814,19 @@ fn quote_arrow_field_deserializer(

let offsets = #data_src.offsets();
arrow2::bitmap::utils::ZipValidity::new_with_validity(
offsets.iter().zip(offsets.lengths()),
offsets.windows(2),
#data_src.validity(),
)
.map(|elem| elem.map(|(start, len)| {
.map(|elem| elem.map(|window| {
// NOTE: Do _not_ use `Buffer::sliced`, it panics on malformed inputs.

let start = *start as usize;
let end = start + len;
let start = window[0] as usize;
let end = window[1] as usize;

// NOTE: It is absolutely crucial we explicitly handle the
// boundchecks manually first, otherwise rustc completely chokes
// when slicing the data (as in: a 100x perf drop)!
if end > #data_src_inner.len() {
if #data_src_inner.len() < end {
// error context is appended below during final collection
return Err(DeserializationError::offset_slice_oob(
(start, end), #data_src_inner.len(),
Expand Down

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

31 changes: 16 additions & 15 deletions crates/store/re_types/src/blueprint/datatypes/selected_columns.rs

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

10 changes: 5 additions & 5 deletions crates/store/re_types/src/components/annotation_context.rs

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

10 changes: 5 additions & 5 deletions crates/store/re_types/src/components/geo_line_string.rs

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

10 changes: 5 additions & 5 deletions crates/store/re_types/src/components/line_strip2d.rs

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

10 changes: 5 additions & 5 deletions crates/store/re_types/src/components/line_strip3d.rs

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

11 changes: 6 additions & 5 deletions crates/store/re_types/src/datatypes/annotation_info.rs

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

10 changes: 5 additions & 5 deletions crates/store/re_types/src/datatypes/blob.rs

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading

0 comments on commit 59a8440

Please sign in to comment.