diff --git a/.github/workflows/arrow.yml b/.github/workflows/arrow.yml index daf38f2523fc..0b90a78577e5 100644 --- a/.github/workflows/arrow.yml +++ b/.github/workflows/arrow.yml @@ -146,11 +146,11 @@ jobs: - name: Setup Rust toolchain uses: ./.github/actions/setup-builder with: - target: wasm32-unknown-unknown,wasm32-wasi + target: wasm32-unknown-unknown,wasm32-wasip1 - name: Build wasm32-unknown-unknown run: cargo build -p arrow --no-default-features --features=json,csv,ipc,ffi --target wasm32-unknown-unknown - - name: Build wasm32-wasi - run: cargo build -p arrow --no-default-features --features=json,csv,ipc,ffi --target wasm32-wasi + - name: Build wasm32-wasip1 + run: cargo build -p arrow --no-default-features --features=json,csv,ipc,ffi --target wasm32-wasip1 clippy: name: Clippy diff --git a/.github/workflows/integration.yml b/.github/workflows/integration.yml index 9b23b1b5ad2e..a47195d1becf 100644 --- a/.github/workflows/integration.yml +++ b/.github/workflows/integration.yml @@ -65,8 +65,7 @@ jobs: ARROW_INTEGRATION_JAVA: ON ARROW_INTEGRATION_JS: ON ARCHERY_INTEGRATION_TARGET_IMPLEMENTATIONS: "rust" - # Disable nanoarrow integration, due to https://github.com/apache/arrow-rs/issues/5052 - ARCHERY_INTEGRATION_WITH_NANOARROW: "0" + ARCHERY_INTEGRATION_WITH_NANOARROW: "1" # https://github.com/apache/arrow/pull/38403/files#r1371281630 ARCHERY_INTEGRATION_WITH_RUST: "1" # These are necessary because the github runner overrides $HOME diff --git a/.github/workflows/object_store.yml b/.github/workflows/object_store.yml index 93f809aaabd4..1639b031ebfc 100644 --- a/.github/workflows/object_store.yml +++ b/.github/workflows/object_store.yml @@ -54,6 +54,10 @@ jobs: # targets. - name: Run clippy with default features run: cargo clippy -- -D warnings + - name: Run clippy without default features + run: cargo clippy --no-default-features -- -D warnings + - name: Run clippy with fs features + run: cargo clippy --no-default-features --features fs -- -D warnings - name: Run clippy with aws feature run: cargo clippy --features aws -- -D warnings - name: Run clippy with gcp feature @@ -200,11 +204,11 @@ jobs: - name: Setup Rust toolchain uses: ./.github/actions/setup-builder with: - target: wasm32-unknown-unknown,wasm32-wasi + target: wasm32-unknown-unknown,wasm32-wasip1 - name: Build wasm32-unknown-unknown run: cargo build --target wasm32-unknown-unknown - - name: Build wasm32-wasi - run: cargo build --target wasm32-wasi + - name: Build wasm32-wasip1 + run: cargo build --target wasm32-wasip1 windows: name: cargo test LocalFileSystem (win64) diff --git a/.github/workflows/parquet.yml b/.github/workflows/parquet.yml index 2269950fd235..4c46fde198bd 100644 --- a/.github/workflows/parquet.yml +++ b/.github/workflows/parquet.yml @@ -97,6 +97,8 @@ jobs: run: cargo check -p parquet --no-default-features - name: Check compilation --no-default-features --features arrow run: cargo check -p parquet --no-default-features --features arrow + - name: Check compilation --no-default-features --features simdutf8 + run: cargo check -p parquet --no-default-features --features simdutf8 - name: Check compilation --no-default-features --all-features run: cargo check -p parquet --all-features - name: Check compilation --all-targets @@ -123,13 +125,13 @@ jobs: - name: Setup Rust toolchain uses: ./.github/actions/setup-builder with: - target: wasm32-unknown-unknown,wasm32-wasi + target: wasm32-unknown-unknown,wasm32-wasip1 - name: Install clang # Needed for zlib compilation run: apt-get update && apt-get install -y clang gcc-multilib - name: Build wasm32-unknown-unknown run: cargo build -p parquet --target wasm32-unknown-unknown - - name: Build wasm32-wasi - run: cargo build -p parquet --target wasm32-wasi + - name: Build wasm32-wasip1 + run: cargo build -p parquet --target wasm32-wasip1 pyspark-integration-test: name: PySpark Integration Test diff --git a/.github/workflows/rust.yml b/.github/workflows/rust.yml index 044250b70435..ca0d2441ceae 100644 --- a/.github/workflows/rust.yml +++ b/.github/workflows/rust.yml @@ -123,23 +123,6 @@ jobs: uses: ./.github/actions/setup-builder - name: Install cargo-msrv run: cargo install cargo-msrv - - name: Downgrade arrow dependencies - run: cargo update -p ahash --precise 0.8.7 - - name: Check arrow - working-directory: arrow - run: | - # run `cd arrow; cargo msrv verify` to see problematic dependencies - cargo msrv verify --output-format=json - - name: Check parquet - working-directory: parquet - run: | - # run `cd parquet; cargo msrv verify` to see problematic dependencies - cargo msrv verify --output-format=json - - name: Check arrow-flight - working-directory: arrow-flight - run: | - # run `cd arrow-flight; cargo msrv verify` to see problematic dependencies - cargo msrv verify --output-format=json - name: Downgrade object_store dependencies working-directory: object_store # Necessary because tokio 1.30.0 updates MSRV to 1.63 @@ -147,8 +130,11 @@ jobs: run: | cargo update -p tokio --precise 1.29.1 cargo update -p url --precise 2.5.0 - - name: Check object_store - working-directory: object_store + - name: Check all packages run: | - # run `cd object_store; cargo msrv verify` to see problematic dependencies - cargo msrv verify --output-format=json + # run `cargo msrv verify --manifest-path "path/to/Cargo.toml"` to see problematic dependencies + find . -mindepth 2 -name Cargo.toml | while read -r dir + do + echo "Checking package '$dir'" + cargo msrv verify --manifest-path "$dir" --output-format=json || exit 1 + done diff --git a/Cargo.toml b/Cargo.toml index 75ba410f12a6..39e3c0bca99a 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -74,7 +74,7 @@ include = [ "Cargo.toml", ] edition = "2021" -rust-version = "1.62" +rust-version = "1.70" [workspace.dependencies] arrow = { version = "54.0.0", path = "./arrow", default-features = false } diff --git a/README.md b/README.md index ed42f630514b..7a3dc1dd2458 100644 --- a/README.md +++ b/README.md @@ -90,6 +90,16 @@ Planned Release Schedule | Dec 2024 | `0.11.2` | Minor, NO breaking API changes | | Feb 2025 | `0.12.0` | Major, potentially breaking API changes | +### Guidelines for `panic` vs `Result` + +In general, use panics for bad states that are unreachable, unrecoverable or harmful. +For those caused by invalid user input, however, we prefer to report that invalidity +gracefully as an error result instead of panicking. In general, invalid input should result +in an `Error` as soon as possible. It _is_ ok for code paths after validation to assume +validation has already occurred and panic if not. See [this ticket] for more nuances. + +[this ticket]: https://github.com/apache/arrow-rs/issues/6737 + ### Deprecation Guidelines Minor releases may deprecate, but not remove APIs. Deprecating APIs allows diff --git a/arrow-array/src/builder/generic_list_view_builder.rs b/arrow-array/src/builder/generic_list_view_builder.rs new file mode 100644 index 000000000000..5aaf9efefe24 --- /dev/null +++ b/arrow-array/src/builder/generic_list_view_builder.rs @@ -0,0 +1,707 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +use crate::builder::ArrayBuilder; +use crate::{ArrayRef, GenericListViewArray, OffsetSizeTrait}; +use arrow_buffer::{Buffer, BufferBuilder, NullBufferBuilder, ScalarBuffer}; +use arrow_schema::{Field, FieldRef}; +use std::any::Any; +use std::sync::Arc; + +/// Builder for [`GenericListViewArray`] +#[derive(Debug)] +pub struct GenericListViewBuilder { + offsets_builder: BufferBuilder, + sizes_builder: BufferBuilder, + null_buffer_builder: NullBufferBuilder, + values_builder: T, + field: Option, + current_offset: OffsetSize, +} + +impl Default for GenericListViewBuilder { + fn default() -> Self { + Self::new(T::default()) + } +} + +impl ArrayBuilder + for GenericListViewBuilder +{ + /// Returns the builder as a non-mutable `Any` reference. + fn as_any(&self) -> &dyn Any { + self + } + + /// Returns the builder as a mutable `Any` reference. + fn as_any_mut(&mut self) -> &mut dyn Any { + self + } + + /// Returns the boxed builder as a box of `Any`. + fn into_box_any(self: Box) -> Box { + self + } + + /// Returns the number of array slots in the builder + fn len(&self) -> usize { + self.null_buffer_builder.len() + } + + /// Builds the array and reset this builder. + fn finish(&mut self) -> ArrayRef { + Arc::new(self.finish()) + } + + /// Builds the array without resetting the builder. + fn finish_cloned(&self) -> ArrayRef { + Arc::new(self.finish_cloned()) + } +} + +impl GenericListViewBuilder { + /// Creates a new [`GenericListViewBuilder`] from a given values array builder + pub fn new(values_builder: T) -> Self { + let capacity = values_builder.len(); + Self::with_capacity(values_builder, capacity) + } + + /// Creates a new [`GenericListViewBuilder`] from a given values array builder + /// `capacity` is the number of items to pre-allocate space for in this builder + pub fn with_capacity(values_builder: T, capacity: usize) -> Self { + let offsets_builder = BufferBuilder::::new(capacity); + let sizes_builder = BufferBuilder::::new(capacity); + Self { + offsets_builder, + null_buffer_builder: NullBufferBuilder::new(capacity), + values_builder, + sizes_builder, + field: None, + current_offset: OffsetSize::zero(), + } + } + + /// + /// By default a nullable field is created with the name `item` + /// + /// Note: [`Self::finish`] and [`Self::finish_cloned`] will panic if the + /// field's data type does not match that of `T` + pub fn with_field(self, field: impl Into) -> Self { + Self { + field: Some(field.into()), + ..self + } + } +} + +impl GenericListViewBuilder +where + T: 'static, +{ + /// Returns the child array builder as a mutable reference. + /// + /// This mutable reference can be used to append values into the child array builder, + /// but you must call [`append`](#method.append) to delimit each distinct list value. + pub fn values(&mut self) -> &mut T { + &mut self.values_builder + } + + /// Returns the child array builder as an immutable reference + pub fn values_ref(&self) -> &T { + &self.values_builder + } + + /// Finish the current variable-length list array slot + /// + /// # Panics + /// + /// Panics if the length of [`Self::values`] exceeds `OffsetSize::MAX` + #[inline] + pub fn append(&mut self, is_valid: bool) { + self.offsets_builder.append(self.current_offset); + self.sizes_builder.append( + OffsetSize::from_usize( + self.values_builder.len() - self.current_offset.to_usize().unwrap(), + ) + .unwrap(), + ); + self.null_buffer_builder.append(is_valid); + self.current_offset = OffsetSize::from_usize(self.values_builder.len()).unwrap(); + } + + /// Append value into this [`GenericListViewBuilder`] + #[inline] + pub fn append_value(&mut self, i: I) + where + T: Extend>, + I: IntoIterator>, + { + self.extend(std::iter::once(Some(i))) + } + + /// Append a null to this [`GenericListViewBuilder`] + /// + /// See [`Self::append_value`] for an example use. + #[inline] + pub fn append_null(&mut self) { + self.offsets_builder.append(self.current_offset); + self.sizes_builder + .append(OffsetSize::from_usize(0).unwrap()); + self.null_buffer_builder.append_null(); + } + + /// Appends an optional value into this [`GenericListViewBuilder`] + /// + /// If `Some` calls [`Self::append_value`] otherwise calls [`Self::append_null`] + #[inline] + pub fn append_option(&mut self, i: Option) + where + T: Extend>, + I: IntoIterator>, + { + match i { + Some(i) => self.append_value(i), + None => self.append_null(), + } + } + + /// Builds the [`GenericListViewArray`] and reset this builder. + pub fn finish(&mut self) -> GenericListViewArray { + let values = self.values_builder.finish(); + let nulls = self.null_buffer_builder.finish(); + let offsets = self.offsets_builder.finish(); + self.current_offset = OffsetSize::zero(); + + // Safety: Safe by construction + let offsets = ScalarBuffer::from(offsets); + let sizes = self.sizes_builder.finish(); + let sizes = ScalarBuffer::from(sizes); + let field = match &self.field { + Some(f) => f.clone(), + None => Arc::new(Field::new("item", values.data_type().clone(), true)), + }; + GenericListViewArray::new(field, offsets, sizes, values, nulls) + } + + /// Builds the [`GenericListViewArray`] without resetting the builder. + pub fn finish_cloned(&self) -> GenericListViewArray { + let values = self.values_builder.finish_cloned(); + let nulls = self.null_buffer_builder.finish_cloned(); + + let offsets = Buffer::from_slice_ref(self.offsets_builder.as_slice()); + // Safety: safe by construction + let offsets = ScalarBuffer::from(offsets); + + let sizes = Buffer::from_slice_ref(self.sizes_builder.as_slice()); + let sizes = ScalarBuffer::from(sizes); + + let field = match &self.field { + Some(f) => f.clone(), + None => Arc::new(Field::new("item", values.data_type().clone(), true)), + }; + + GenericListViewArray::new(field, offsets, sizes, values, nulls) + } + + /// Returns the current offsets buffer as a slice + pub fn offsets_slice(&self) -> &[OffsetSize] { + self.offsets_builder.as_slice() + } +} + +impl Extend> for GenericListViewBuilder +where + O: OffsetSizeTrait, + B: ArrayBuilder + Extend, + V: IntoIterator, +{ + #[inline] + fn extend>>(&mut self, iter: T) { + for v in iter { + match v { + Some(elements) => { + self.values_builder.extend(elements); + self.append(true); + } + None => self.append(false), + } + } + } +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::builder::{make_builder, Int32Builder, ListViewBuilder}; + use crate::cast::AsArray; + use crate::types::Int32Type; + use crate::{Array, Int32Array}; + use arrow_schema::DataType; + + fn test_generic_list_view_array_builder_impl() { + let values_builder = Int32Builder::with_capacity(10); + let mut builder = GenericListViewBuilder::::new(values_builder); + + // [[0, 1, 2], [3, 4, 5], [6, 7]] + builder.values().append_value(0); + builder.values().append_value(1); + builder.values().append_value(2); + builder.append(true); + builder.values().append_value(3); + builder.values().append_value(4); + builder.values().append_value(5); + builder.append(true); + builder.values().append_value(6); + builder.values().append_value(7); + builder.append(true); + let list_array = builder.finish(); + + let list_values = list_array.values().as_primitive::(); + assert_eq!(list_values.values(), &[0, 1, 2, 3, 4, 5, 6, 7]); + assert_eq!(list_array.value_offsets(), [0, 3, 6].map(O::usize_as)); + assert_eq!(list_array.value_sizes(), [3, 3, 2].map(O::usize_as)); + assert_eq!(DataType::Int32, list_array.value_type()); + assert_eq!(3, list_array.len()); + assert_eq!(0, list_array.null_count()); + assert_eq!(O::from_usize(6).unwrap(), list_array.value_offsets()[2]); + assert_eq!(O::from_usize(2).unwrap(), list_array.value_sizes()[2]); + for i in 0..2 { + assert!(list_array.is_valid(i)); + assert!(!list_array.is_null(i)); + } + } + + #[test] + fn test_list_view_array_builder() { + test_generic_list_view_array_builder_impl::() + } + + #[test] + fn test_large_list_view_array_builder() { + test_generic_list_view_array_builder_impl::() + } + + fn test_generic_list_view_array_builder_nulls_impl() { + let values_builder = Int32Builder::with_capacity(10); + let mut builder = GenericListViewBuilder::::new(values_builder); + + // [[0, 1, 2], null, [3, null, 5], [6, 7]] + builder.values().append_value(0); + builder.values().append_value(1); + builder.values().append_value(2); + builder.append(true); + builder.append(false); + builder.values().append_value(3); + builder.values().append_null(); + builder.values().append_value(5); + builder.append(true); + builder.values().append_value(6); + builder.values().append_value(7); + builder.append(true); + + let list_array = builder.finish(); + + assert_eq!(DataType::Int32, list_array.value_type()); + assert_eq!(4, list_array.len()); + assert_eq!(1, list_array.null_count()); + assert_eq!(O::from_usize(3).unwrap(), list_array.value_offsets()[2]); + assert_eq!(O::from_usize(3).unwrap(), list_array.value_sizes()[2]); + } + + #[test] + fn test_list_view_array_builder_nulls() { + test_generic_list_view_array_builder_nulls_impl::() + } + + #[test] + fn test_large_list_view_array_builder_nulls() { + test_generic_list_view_array_builder_nulls_impl::() + } + + #[test] + fn test_list_view_array_builder_finish() { + let values_builder = Int32Array::builder(5); + let mut builder = ListViewBuilder::new(values_builder); + + builder.values().append_slice(&[1, 2, 3]); + builder.append(true); + builder.values().append_slice(&[4, 5, 6]); + builder.append(true); + + let mut arr = builder.finish(); + assert_eq!(2, arr.len()); + assert!(builder.is_empty()); + + builder.values().append_slice(&[7, 8, 9]); + builder.append(true); + arr = builder.finish(); + assert_eq!(1, arr.len()); + assert!(builder.is_empty()); + } + + #[test] + fn test_list_view_array_builder_finish_cloned() { + let values_builder = Int32Array::builder(5); + let mut builder = ListViewBuilder::new(values_builder); + + builder.values().append_slice(&[1, 2, 3]); + builder.append(true); + builder.values().append_slice(&[4, 5, 6]); + builder.append(true); + + let mut arr = builder.finish_cloned(); + assert_eq!(2, arr.len()); + assert!(!builder.is_empty()); + + builder.values().append_slice(&[7, 8, 9]); + builder.append(true); + arr = builder.finish(); + assert_eq!(3, arr.len()); + assert!(builder.is_empty()); + } + + #[test] + fn test_list_view_list_view_array_builder() { + let primitive_builder = Int32Builder::with_capacity(10); + let values_builder = ListViewBuilder::new(primitive_builder); + let mut builder = ListViewBuilder::new(values_builder); + + // [[[1, 2], [3, 4]], [[5, 6, 7], null, [8]], null, [[9, 10]]] + builder.values().values().append_value(1); + builder.values().values().append_value(2); + builder.values().append(true); + builder.values().values().append_value(3); + builder.values().values().append_value(4); + builder.values().append(true); + builder.append(true); + + builder.values().values().append_value(5); + builder.values().values().append_value(6); + builder.values().values().append_value(7); + builder.values().append(true); + builder.values().append(false); + builder.values().values().append_value(8); + builder.values().append(true); + builder.append(true); + + builder.append(false); + + builder.values().values().append_value(9); + builder.values().values().append_value(10); + builder.values().append(true); + builder.append(true); + + let l1 = builder.finish(); + + assert_eq!(4, l1.len()); + assert_eq!(1, l1.null_count()); + + assert_eq!(l1.value_offsets(), &[0, 2, 5, 5]); + assert_eq!(l1.value_sizes(), &[2, 3, 0, 1]); + + let l2 = l1.values().as_list_view::(); + + assert_eq!(6, l2.len()); + assert_eq!(1, l2.null_count()); + assert_eq!(l2.value_offsets(), &[0, 2, 4, 7, 7, 8]); + assert_eq!(l2.value_sizes(), &[2, 2, 3, 0, 1, 2]); + + let i1 = l2.values().as_primitive::(); + assert_eq!(10, i1.len()); + assert_eq!(0, i1.null_count()); + assert_eq!(i1.values(), &[1, 2, 3, 4, 5, 6, 7, 8, 9, 10]); + } + + #[test] + fn test_extend() { + let mut builder = ListViewBuilder::new(Int32Builder::new()); + builder.extend([ + Some(vec![Some(1), Some(2), Some(7), None]), + Some(vec![]), + Some(vec![Some(4), Some(5)]), + None, + ]); + + let array = builder.finish(); + assert_eq!(array.value_offsets(), [0, 4, 4, 6]); + assert_eq!(array.value_sizes(), [4, 0, 2, 0]); + assert_eq!(array.null_count(), 1); + assert!(array.is_null(3)); + let elements = array.values().as_primitive::(); + assert_eq!(elements.values(), &[1, 2, 7, 0, 4, 5]); + assert_eq!(elements.null_count(), 1); + assert!(elements.is_null(3)); + } + + #[test] + fn test_boxed_primitive_array_builder() { + let values_builder = make_builder(&DataType::Int32, 5); + let mut builder = ListViewBuilder::new(values_builder); + + builder + .values() + .as_any_mut() + .downcast_mut::() + .expect("should be an Int32Builder") + .append_slice(&[1, 2, 3]); + builder.append(true); + + builder + .values() + .as_any_mut() + .downcast_mut::() + .expect("should be an Int32Builder") + .append_slice(&[4, 5, 6]); + builder.append(true); + + let arr = builder.finish(); + assert_eq!(2, arr.len()); + + let elements = arr.values().as_primitive::(); + assert_eq!(elements.values(), &[1, 2, 3, 4, 5, 6]); + } + + #[test] + fn test_boxed_list_view_list_view_array_builder() { + // This test is same as `test_list_list_array_builder` but uses boxed builders. + let values_builder = make_builder( + &DataType::ListView(Arc::new(Field::new("item", DataType::Int32, true))), + 10, + ); + test_boxed_generic_list_view_generic_list_view_array_builder::(values_builder); + } + + #[test] + fn test_boxed_large_list_view_large_list_view_array_builder() { + // This test is same as `test_list_list_array_builder` but uses boxed builders. + let values_builder = make_builder( + &DataType::LargeListView(Arc::new(Field::new("item", DataType::Int32, true))), + 10, + ); + test_boxed_generic_list_view_generic_list_view_array_builder::(values_builder); + } + + fn test_boxed_generic_list_view_generic_list_view_array_builder( + values_builder: Box, + ) where + O: OffsetSizeTrait + PartialEq, + { + let mut builder: GenericListViewBuilder> = + GenericListViewBuilder::>::new(values_builder); + + // [[[1, 2], [3, 4]], [[5, 6, 7], null, [8]], null, [[9, 10]]] + builder + .values() + .as_any_mut() + .downcast_mut::>>() + .expect("should be an (Large)ListViewBuilder") + .values() + .as_any_mut() + .downcast_mut::() + .expect("should be an Int32Builder") + .append_value(1); + builder + .values() + .as_any_mut() + .downcast_mut::>>() + .expect("should be an (Large)ListViewBuilder") + .values() + .as_any_mut() + .downcast_mut::() + .expect("should be an Int32Builder") + .append_value(2); + builder + .values() + .as_any_mut() + .downcast_mut::>>() + .expect("should be an (Large)ListViewBuilder") + .append(true); + builder + .values() + .as_any_mut() + .downcast_mut::>>() + .expect("should be an (Large)ListViewBuilder") + .values() + .as_any_mut() + .downcast_mut::() + .expect("should be an Int32Builder") + .append_value(3); + builder + .values() + .as_any_mut() + .downcast_mut::>>() + .expect("should be an (Large)ListViewBuilder") + .values() + .as_any_mut() + .downcast_mut::() + .expect("should be an Int32Builder") + .append_value(4); + builder + .values() + .as_any_mut() + .downcast_mut::>>() + .expect("should be an (Large)ListViewBuilder") + .append(true); + builder.append(true); + + builder + .values() + .as_any_mut() + .downcast_mut::>>() + .expect("should be an (Large)ListViewBuilder") + .values() + .as_any_mut() + .downcast_mut::() + .expect("should be an Int32Builder") + .append_value(5); + builder + .values() + .as_any_mut() + .downcast_mut::>>() + .expect("should be an (Large)ListViewBuilder") + .values() + .as_any_mut() + .downcast_mut::() + .expect("should be an Int32Builder") + .append_value(6); + builder + .values() + .as_any_mut() + .downcast_mut::>>() + .expect("should be an (Large)ListViewBuilder") + .values() + .as_any_mut() + .downcast_mut::() + .expect("should be an (Large)ListViewBuilder") + .append_value(7); + builder + .values() + .as_any_mut() + .downcast_mut::>>() + .expect("should be an (Large)ListViewBuilder") + .append(true); + builder + .values() + .as_any_mut() + .downcast_mut::>>() + .expect("should be an (Large)ListViewBuilder") + .append(false); + builder + .values() + .as_any_mut() + .downcast_mut::>>() + .expect("should be an (Large)ListViewBuilder") + .values() + .as_any_mut() + .downcast_mut::() + .expect("should be an Int32Builder") + .append_value(8); + builder + .values() + .as_any_mut() + .downcast_mut::>>() + .expect("should be an (Large)ListViewBuilder") + .append(true); + builder.append(true); + + builder.append(false); + + builder + .values() + .as_any_mut() + .downcast_mut::>>() + .expect("should be an (Large)ListViewBuilder") + .values() + .as_any_mut() + .downcast_mut::() + .expect("should be an Int32Builder") + .append_value(9); + builder + .values() + .as_any_mut() + .downcast_mut::>>() + .expect("should be an (Large)ListViewBuilder") + .values() + .as_any_mut() + .downcast_mut::() + .expect("should be an Int32Builder") + .append_value(10); + builder + .values() + .as_any_mut() + .downcast_mut::>>() + .expect("should be an (Large)ListViewBuilder") + .append(true); + builder.append(true); + + let l1 = builder.finish(); + assert_eq!(4, l1.len()); + assert_eq!(1, l1.null_count()); + assert_eq!(l1.value_offsets(), &[0, 2, 5, 5].map(O::usize_as)); + assert_eq!(l1.value_sizes(), &[2, 3, 0, 1].map(O::usize_as)); + + let l2 = l1.values().as_list_view::(); + assert_eq!(6, l2.len()); + assert_eq!(1, l2.null_count()); + assert_eq!(l2.value_offsets(), &[0, 2, 4, 7, 7, 8].map(O::usize_as)); + assert_eq!(l2.value_sizes(), &[2, 2, 3, 0, 1, 2].map(O::usize_as)); + + let i1 = l2.values().as_primitive::(); + assert_eq!(10, i1.len()); + assert_eq!(0, i1.null_count()); + assert_eq!(i1.values(), &[1, 2, 3, 4, 5, 6, 7, 8, 9, 10]); + } + + #[test] + fn test_with_field() { + let field = Arc::new(Field::new("bar", DataType::Int32, false)); + let mut builder = ListViewBuilder::new(Int32Builder::new()).with_field(field.clone()); + builder.append_value([Some(1), Some(2), Some(3)]); + builder.append_null(); // This is fine as nullability refers to nullability of values + builder.append_value([Some(4)]); + let array = builder.finish(); + assert_eq!(array.len(), 3); + assert_eq!(array.data_type(), &DataType::ListView(field.clone())); + + builder.append_value([Some(4), Some(5)]); + let array = builder.finish(); + assert_eq!(array.data_type(), &DataType::ListView(field)); + assert_eq!(array.len(), 1); + } + + #[test] + #[should_panic( + expected = r#"Non-nullable field of ListViewArray \"item\" cannot contain nulls"# + )] + // If a non-nullable type is declared but a null value is used, it will be intercepted by the null check. + fn test_checks_nullability() { + let field = Arc::new(Field::new("item", DataType::Int32, false)); + let mut builder = ListViewBuilder::new(Int32Builder::new()).with_field(field.clone()); + builder.append_value([Some(1), None]); + builder.finish(); + } + + #[test] + #[should_panic(expected = "ListViewArray expected data type Int64 got Int32")] + // If the declared type does not match the actual appended type, it will be intercepted by type checking in the finish function. + fn test_checks_data_type() { + let field = Arc::new(Field::new("item", DataType::Int64, false)); + let mut builder = ListViewBuilder::new(Int32Builder::new()).with_field(field.clone()); + builder.append_value([Some(1)]); + builder.finish(); + } +} diff --git a/arrow-array/src/builder/mod.rs b/arrow-array/src/builder/mod.rs index 89a96280eb87..29d75024ea72 100644 --- a/arrow-array/src/builder/mod.rs +++ b/arrow-array/src/builder/mod.rs @@ -78,6 +78,73 @@ //! )) //! ``` //! +//! # Using the [`Extend`] trait to append values from an iterable: +//! +//! ``` +//! # use arrow_array::{Array}; +//! # use arrow_array::builder::{ArrayBuilder, StringBuilder}; +//! +//! let mut builder = StringBuilder::new(); +//! builder.extend(vec![Some("๐Ÿ"), Some("๐ŸŽ"), None]); +//! assert_eq!(builder.finish().len(), 3); +//! ``` +//! +//! # Using the [`Extend`] trait to write generic functions: +//! +//! ``` +//! # use arrow_array::{Array, ArrayRef, StringArray}; +//! # use arrow_array::builder::{ArrayBuilder, Int32Builder, ListBuilder, StringBuilder}; +//! +//! // For generic methods that fill a list of values for an [`ArrayBuilder`], use the [`Extend`] trait. +//! fn filter_and_fill>(builder: &mut impl Extend, values: I, filter: V) +//! where V: PartialEq +//! { +//! builder.extend(values.into_iter().filter(|v| *v == filter)); +//! } +//! let mut string_builder = StringBuilder::new(); +//! filter_and_fill( +//! &mut string_builder, +//! vec![Some("๐Ÿ"), Some("๐ŸŽ"), None], +//! Some("๐ŸŽ"), +//! ); +//! assert_eq!(string_builder.finish().len(), 1); +//! +//! let mut int_builder = Int32Builder::new(); +//! filter_and_fill( +//! &mut int_builder, +//! vec![Some(11), Some(42), None], +//! Some(42), +//! ); +//! assert_eq!(int_builder.finish().len(), 1); +//! +//! // For generic methods that fill lists-of-lists for an [`ArrayBuilder`], use the [`Extend`] trait. +//! fn filter_and_fill_if_contains>>( +//! list_builder: &mut impl Extend>, +//! values: I, +//! filter: Option, +//! ) where +//! T: PartialEq, +//! for<'a> &'a V: IntoIterator>, +//! { +//! list_builder.extend(values.into_iter().filter(|string: &Option| { +//! string +//! .as_ref() +//! .map(|str: &V| str.into_iter().any(|ch: &Option| ch == &filter)) +//! .unwrap_or(false) +//! })); +//! } +//! let builder = StringBuilder::new(); +//! let mut list_builder = ListBuilder::new(builder); +//! let pear_pear = vec![Some("๐Ÿ"),Some("๐Ÿ")]; +//! let pear_app = vec![Some("๐Ÿ"),Some("๐ŸŽ")]; +//! filter_and_fill_if_contains( +//! &mut list_builder, +//! vec![Some(pear_pear), Some(pear_app), None], +//! Some("๐ŸŽ"), +//! ); +//! assert_eq!(list_builder.finish().len(), 1); +//! ``` +//! //! # Custom Builders //! //! It is common to have a collection of statically defined Rust types that @@ -134,6 +201,8 @@ //! } //! } //! +//! /// For building arrays in generic code, use Extend instead of the append_* methods +//! /// e.g. append_value, append_option, append_null //! impl<'a> Extend<&'a MyRow> for MyRowBuilder { //! fn extend>(&mut self, iter: T) { //! iter.into_iter().for_each(|row| self.append(row)); @@ -180,6 +249,8 @@ mod generic_byte_run_builder; pub use generic_byte_run_builder::*; mod generic_bytes_view_builder; pub use generic_bytes_view_builder::*; +mod generic_list_view_builder; +pub use generic_list_view_builder::*; mod union_builder; pub use union_builder::*; @@ -304,6 +375,12 @@ pub type ListBuilder = GenericListBuilder; /// Builder for [`LargeListArray`](crate::array::LargeListArray) pub type LargeListBuilder = GenericListBuilder; +/// Builder for [`ListViewArray`](crate::array::ListViewArray) +pub type ListViewBuilder = GenericListViewBuilder; + +/// Builder for [`LargeListViewArray`](crate::array::LargeListViewArray) +pub type LargeListViewBuilder = GenericListViewBuilder; + /// Builder for [`BinaryArray`](crate::array::BinaryArray) /// /// See examples on [`GenericBinaryBuilder`] diff --git a/arrow-array/src/builder/struct_builder.rs b/arrow-array/src/builder/struct_builder.rs index 2b288445c74b..4a40c2201746 100644 --- a/arrow-array/src/builder/struct_builder.rs +++ b/arrow-array/src/builder/struct_builder.rs @@ -270,6 +270,16 @@ pub fn make_builder(datatype: &DataType, capacity: usize) -> Box { + let builder = make_builder(field.data_type(), capacity); + Box::new(ListViewBuilder::with_capacity(builder, capacity).with_field(field.clone())) + } + DataType::LargeListView(field) => { + let builder = make_builder(field.data_type(), capacity); + Box::new( + LargeListViewBuilder::with_capacity(builder, capacity).with_field(field.clone()), + ) + } DataType::Map(field, _) => match field.data_type() { DataType::Struct(fields) => { let map_field_names = MapFieldNames { diff --git a/arrow-array/src/cast.rs b/arrow-array/src/cast.rs index fc657f94c6a6..d871431593b6 100644 --- a/arrow-array/src/cast.rs +++ b/arrow-array/src/cast.rs @@ -832,6 +832,14 @@ pub trait AsArray: private::Sealed { self.as_list_opt().expect("list array") } + /// Downcast this to a [`GenericListViewArray`] returning `None` if not possible + fn as_list_view_opt(&self) -> Option<&GenericListViewArray>; + + /// Downcast this to a [`GenericListViewArray`] panicking if not possible + fn as_list_view(&self) -> &GenericListViewArray { + self.as_list_view_opt().expect("list view array") + } + /// Downcast this to a [`FixedSizeBinaryArray`] returning `None` if not possible fn as_fixed_size_binary_opt(&self) -> Option<&FixedSizeBinaryArray>; @@ -905,6 +913,10 @@ impl AsArray for dyn Array + '_ { self.as_any().downcast_ref() } + fn as_list_view_opt(&self) -> Option<&GenericListViewArray> { + self.as_any().downcast_ref() + } + fn as_fixed_size_binary_opt(&self) -> Option<&FixedSizeBinaryArray> { self.as_any().downcast_ref() } @@ -960,6 +972,10 @@ impl AsArray for ArrayRef { self.as_ref().as_list_opt() } + fn as_list_view_opt(&self) -> Option<&GenericListViewArray> { + self.as_ref().as_list_view_opt() + } + fn as_fixed_size_binary_opt(&self) -> Option<&FixedSizeBinaryArray> { self.as_ref().as_fixed_size_binary_opt() } diff --git a/arrow-buffer/src/buffer/boolean.rs b/arrow-buffer/src/buffer/boolean.rs index aaa86832f692..c8e5144c14cb 100644 --- a/arrow-buffer/src/buffer/boolean.rs +++ b/arrow-buffer/src/buffer/boolean.rs @@ -25,6 +25,14 @@ use crate::{ use std::ops::{BitAnd, BitOr, BitXor, Not}; /// A slice-able [`Buffer`] containing bit-packed booleans +/// +/// `BooleanBuffer`s can be creating using [`BooleanBufferBuilder`] +/// +/// # See Also +/// +/// * [`NullBuffer`] for representing null values in Arrow arrays +/// +/// [`NullBuffer`]: crate::NullBuffer #[derive(Debug, Clone, Eq)] pub struct BooleanBuffer { buffer: Buffer, diff --git a/arrow-buffer/src/buffer/immutable.rs b/arrow-buffer/src/buffer/immutable.rs index d0c8ffa39783..fd145ce2306e 100644 --- a/arrow-buffer/src/buffer/immutable.rs +++ b/arrow-buffer/src/buffer/immutable.rs @@ -28,8 +28,43 @@ use crate::{bit_util, bytes::Bytes, native::ArrowNativeType}; use super::ops::bitwise_unary_op_helper; use super::{MutableBuffer, ScalarBuffer}; -/// Buffer represents a contiguous memory region that can be shared with other buffers and across -/// thread boundaries. +/// A contiguous memory region that can be shared with other buffers and across +/// thread boundaries that stores Arrow data. +/// +/// `Buffer`s can be sliced and cloned without copying the underlying data and can +/// be created from memory allocated by non-Rust sources such as C/C++. +/// +/// # Example: Create a `Buffer` from a `Vec` (without copying) +/// ``` +/// # use arrow_buffer::Buffer; +/// let vec: Vec = vec![1, 2, 3]; +/// let buffer = Buffer::from(vec); +/// ``` +/// +/// # Example: Convert a `Buffer` to a `Vec` (without copying) +/// +/// Use [`Self::into_vec`] to convert a `Buffer` back into a `Vec` if there are +/// no other references and the types are aligned correctly. +/// ``` +/// # use arrow_buffer::Buffer; +/// # let vec: Vec = vec![1, 2, 3]; +/// # let buffer = Buffer::from(vec); +/// // convert the buffer back into a Vec of u32 +/// // note this will fail if the buffer is shared or not aligned correctly +/// let vec: Vec = buffer.into_vec().unwrap(); +/// ``` +/// +/// # Example: Create a `Buffer` from a [`bytes::Bytes`] (without copying) +/// +/// [`bytes::Bytes`] is a common type in the Rust ecosystem for shared memory +/// regions. You can create a buffer from a `Bytes` instance using the `From` +/// implementation, also without copying. +/// +/// ``` +/// # use arrow_buffer::Buffer; +/// let bytes = bytes::Bytes::from("hello"); +/// let buffer = Buffer::from(bytes); +///``` #[derive(Clone, Debug)] pub struct Buffer { /// the internal byte buffer. @@ -59,16 +94,15 @@ unsafe impl Send for Buffer where Bytes: Send {} unsafe impl Sync for Buffer where Bytes: Sync {} impl Buffer { - /// Auxiliary method to create a new Buffer - #[inline] + /// Create a new Buffer from a (internal) `Bytes` + /// + /// NOTE despite the same name, `Bytes` is an internal struct in arrow-rs + /// and is different than [`bytes::Bytes`]. + /// + /// See examples on [`Buffer`] for ways to create a buffer from a [`bytes::Bytes`]. + #[deprecated(since = "54.1.0", note = "Use Buffer::from instead")] pub fn from_bytes(bytes: Bytes) -> Self { - let length = bytes.len(); - let ptr = bytes.as_ptr(); - Buffer { - data: Arc::new(bytes), - ptr, - length, - } + Self::from(bytes) } /// Returns the offset, in bytes, of `Self::ptr` to `Self::data` @@ -99,8 +133,11 @@ impl Buffer { buffer.into() } - /// Creates a buffer from an existing memory region. Ownership of the memory is tracked via reference counting - /// and the memory will be freed using the `drop` method of [crate::alloc::Allocation] when the reference count reaches zero. + /// Creates a buffer from an existing memory region. + /// + /// Ownership of the memory is tracked via reference counting + /// and the memory will be freed using the `drop` method of + /// [crate::alloc::Allocation] when the reference count reaches zero. /// /// # Arguments /// @@ -147,7 +184,7 @@ impl Buffer { self.data.capacity() } - /// Tried to shrink the capacity of the buffer as much as possible, freeing unused memory. + /// Tries to shrink the capacity of the buffer as much as possible, freeing unused memory. /// /// If the buffer is shared, this is a no-op. /// @@ -182,7 +219,7 @@ impl Buffer { } } - /// Returns whether the buffer is empty. + /// Returns true if the buffer is empty. #[inline] pub fn is_empty(&self) -> bool { self.length == 0 @@ -198,7 +235,9 @@ impl Buffer { } /// Returns a new [Buffer] that is a slice of this buffer starting at `offset`. - /// Doing so allows the same memory region to be shared between buffers. + /// + /// This function is `O(1)` and does not copy any data, allowing the + /// same memory region to be shared between buffers. /// /// # Panics /// @@ -232,7 +271,10 @@ impl Buffer { /// Returns a new [Buffer] that is a slice of this buffer starting at `offset`, /// with `length` bytes. - /// Doing so allows the same memory region to be shared between buffers. + /// + /// This function is `O(1)` and does not copy any data, allowing the same + /// memory region to be shared between buffers. + /// /// # Panics /// Panics iff `(offset + length)` is larger than the existing length. pub fn slice_with_length(&self, offset: usize, length: usize) -> Self { @@ -320,10 +362,16 @@ impl Buffer { }) } - /// Returns `Vec` for mutating the buffer + /// Converts self into a `Vec`, if possible. + /// + /// This can be used to reuse / mutate the underlying data. /// - /// Returns `Err(self)` if this buffer does not have the same [`Layout`] as - /// the destination Vec or contains a non-zero offset + /// # Errors + /// + /// Returns `Err(self)` if + /// 1. this buffer does not have the same [`Layout`] as the destination Vec + /// 2. contains a non-zero offset + /// 3. The buffer is shared pub fn into_vec(self) -> Result, Self> { let layout = match self.data.deallocation() { Deallocation::Standard(l) => l, @@ -406,7 +454,29 @@ impl From> for Buffer { } } -/// Creating a `Buffer` instance by storing the boolean values into the buffer +/// Convert from internal `Bytes` (not [`bytes::Bytes`]) to `Buffer` +impl From for Buffer { + #[inline] + fn from(bytes: Bytes) -> Self { + let length = bytes.len(); + let ptr = bytes.as_ptr(); + Self { + data: Arc::new(bytes), + ptr, + length, + } + } +} + +/// Convert from [`bytes::Bytes`], not internal `Bytes` to `Buffer` +impl From for Buffer { + fn from(bytes: bytes::Bytes) -> Self { + let bytes: Bytes = bytes.into(); + Self::from(bytes) + } +} + +/// Create a `Buffer` instance by storing the boolean values into the buffer impl FromIterator for Buffer { fn from_iter(iter: I) -> Self where @@ -439,7 +509,9 @@ impl From> for Buffer { impl Buffer { /// Creates a [`Buffer`] from an [`Iterator`] with a trusted (upper) length. + /// /// Prefer this to `collect` whenever possible, as it is ~60% faster. + /// /// # Example /// ``` /// # use arrow_buffer::buffer::Buffer; diff --git a/arrow-buffer/src/buffer/mutable.rs b/arrow-buffer/src/buffer/mutable.rs index c4315a1d64cd..5ad55e306e2a 100644 --- a/arrow-buffer/src/buffer/mutable.rs +++ b/arrow-buffer/src/buffer/mutable.rs @@ -328,7 +328,7 @@ impl MutableBuffer { pub(super) fn into_buffer(self) -> Buffer { let bytes = unsafe { Bytes::new(self.data, self.len, Deallocation::Standard(self.layout)) }; std::mem::forget(self); - Buffer::from_bytes(bytes) + Buffer::from(bytes) } /// View this buffer as a mutable slice of a specific type. diff --git a/arrow-buffer/src/buffer/null.rs b/arrow-buffer/src/buffer/null.rs index ec12b885eb5a..a178b3101b4e 100644 --- a/arrow-buffer/src/buffer/null.rs +++ b/arrow-buffer/src/buffer/null.rs @@ -19,13 +19,16 @@ use crate::bit_iterator::{BitIndexIterator, BitIterator, BitSliceIterator}; use crate::buffer::BooleanBuffer; use crate::{Buffer, MutableBuffer}; -/// A [`BooleanBuffer`] used to encode validity for arrow arrays +/// A [`BooleanBuffer`] used to encode validity for Arrow arrays /// -/// As per the [Arrow specification], array validity is encoded in a packed bitmask with a +/// In the [Arrow specification], array validity is encoded in a packed bitmask with a /// `true` value indicating the corresponding slot is not null, and `false` indicating /// that it is null. /// +/// `NullBuffer`s can be creating using [`NullBufferBuilder`] +/// /// [Arrow specification]: https://arrow.apache.org/docs/format/Columnar.html#validity-bitmaps +/// [`NullBufferBuilder`]: crate::NullBufferBuilder #[derive(Debug, Clone, Eq, PartialEq)] pub struct NullBuffer { buffer: BooleanBuffer, @@ -49,7 +52,8 @@ impl NullBuffer { /// Create a new [`NullBuffer`] of length `len` where all values are valid /// - /// Note: it is more efficient to not set the null buffer if it is known to be all valid + /// Note: it is more efficient to not set the null buffer if it is known to + /// be all valid (aka all values are not null) pub fn new_valid(len: usize) -> Self { Self { buffer: BooleanBuffer::new_set(len), diff --git a/arrow-buffer/src/buffer/offset.rs b/arrow-buffer/src/buffer/offset.rs index a6be2b67af84..164af6f01d0e 100644 --- a/arrow-buffer/src/buffer/offset.rs +++ b/arrow-buffer/src/buffer/offset.rs @@ -133,6 +133,38 @@ impl OffsetBuffer { Self(out.into()) } + /// Get an Iterator over the lengths of this [`OffsetBuffer`] + /// + /// ``` + /// # use arrow_buffer::{OffsetBuffer, ScalarBuffer}; + /// let offsets = OffsetBuffer::<_>::new(ScalarBuffer::::from(vec![0, 1, 4, 9])); + /// assert_eq!(offsets.lengths().collect::>(), vec![1, 3, 5]); + /// ``` + /// + /// Empty [`OffsetBuffer`] will return an empty iterator + /// ``` + /// # use arrow_buffer::OffsetBuffer; + /// let offsets = OffsetBuffer::::new_empty(); + /// assert_eq!(offsets.lengths().count(), 0); + /// ``` + /// + /// This can be used to merge multiple [`OffsetBuffer`]s to one + /// ``` + /// # use arrow_buffer::{OffsetBuffer, ScalarBuffer}; + /// + /// let buffer1 = OffsetBuffer::::from_lengths([2, 6, 3, 7, 2]); + /// let buffer2 = OffsetBuffer::::from_lengths([1, 3, 5, 7, 9]); + /// + /// let merged = OffsetBuffer::::from_lengths( + /// vec![buffer1, buffer2].iter().flat_map(|x| x.lengths()) + /// ); + /// + /// assert_eq!(merged.lengths().collect::>(), &[2, 6, 3, 7, 2, 1, 3, 5, 7, 9]); + /// ``` + pub fn lengths(&self) -> impl ExactSizeIterator + '_ { + self.0.windows(2).map(|x| x[1].as_usize() - x[0].as_usize()) + } + /// Free up unused memory. pub fn shrink_to_fit(&mut self) { self.0.shrink_to_fit(); @@ -244,4 +276,24 @@ mod tests { fn from_lengths_usize_overflow() { OffsetBuffer::::from_lengths([usize::MAX, 1]); } + + #[test] + fn get_lengths() { + let offsets = OffsetBuffer::::new(ScalarBuffer::::from(vec![0, 1, 4, 9])); + assert_eq!(offsets.lengths().collect::>(), vec![1, 3, 5]); + } + + #[test] + fn get_lengths_should_be_with_fixed_size() { + let offsets = OffsetBuffer::::new(ScalarBuffer::::from(vec![0, 1, 4, 9])); + let iter = offsets.lengths(); + assert_eq!(iter.size_hint(), (3, Some(3))); + assert_eq!(iter.len(), 3); + } + + #[test] + fn get_lengths_from_empty_offset_buffer_should_be_empty_iterator() { + let offsets = OffsetBuffer::::new_empty(); + assert_eq!(offsets.lengths().collect::>(), vec![]); + } } diff --git a/arrow-buffer/src/builder/boolean.rs b/arrow-buffer/src/builder/boolean.rs index ca178ae5ce4e..67b306dd8cea 100644 --- a/arrow-buffer/src/builder/boolean.rs +++ b/arrow-buffer/src/builder/boolean.rs @@ -19,6 +19,12 @@ use crate::{bit_mask, bit_util, BooleanBuffer, Buffer, MutableBuffer}; use std::ops::Range; /// Builder for [`BooleanBuffer`] +/// +/// # See Also +/// +/// * [`NullBuffer`] for building [`BooleanBuffer`]s for representing nulls +/// +/// [`NullBuffer`]: crate::NullBuffer #[derive(Debug)] pub struct BooleanBufferBuilder { buffer: MutableBuffer, diff --git a/arrow-buffer/src/builder/null.rs b/arrow-buffer/src/builder/null.rs index 298b479e87df..ddbca46f7182 100644 --- a/arrow-buffer/src/builder/null.rs +++ b/arrow-buffer/src/builder/null.rs @@ -17,12 +17,16 @@ use crate::{BooleanBufferBuilder, MutableBuffer, NullBuffer}; -/// Builder for creating the null bit buffer. +/// Builder for creating [`NullBuffer`] +/// +/// # Performance /// /// This builder only materializes the buffer when we append `false`. /// If you only append `true`s to the builder, what you get will be /// `None` when calling [`finish`](#method.finish). -/// This optimization is **very** important for the performance. +/// +/// This optimization is **very** important for the performance as it avoids +/// allocating memory for the null buffer when there are no nulls. #[derive(Debug)] pub struct NullBufferBuilder { bitmap_builder: Option, diff --git a/arrow-buffer/src/bytes.rs b/arrow-buffer/src/bytes.rs index 77724137aef7..b811bd2c6b40 100644 --- a/arrow-buffer/src/bytes.rs +++ b/arrow-buffer/src/bytes.rs @@ -28,14 +28,18 @@ use crate::buffer::dangling_ptr; /// A continuous, fixed-size, immutable memory region that knows how to de-allocate itself. /// -/// This structs' API is inspired by the `bytes::Bytes`, but it is not limited to using rust's -/// global allocator nor u8 alignment. +/// Note that this structure is an internal implementation detail of the +/// arrow-rs crate. While it has the same name and similar API as +/// [`bytes::Bytes`] it is not limited to rust's global allocator nor u8 +/// alignment. It is possible to create a `Bytes` from `bytes::Bytes` using the +/// `From` implementation. /// /// In the most common case, this buffer is allocated using [`alloc`](std::alloc::alloc) /// with an alignment of [`ALIGNMENT`](crate::alloc::ALIGNMENT) /// /// When the region is allocated by a different allocator, [Deallocation::Custom], this calls the /// custom deallocator to deallocate the region when it is no longer needed. +/// pub struct Bytes { /// The raw pointer to be beginning of the region ptr: NonNull, diff --git a/arrow-cast/src/cast/mod.rs b/arrow-cast/src/cast/mod.rs index 7aff32b0beee..440d0a8becde 100644 --- a/arrow-cast/src/cast/mod.rs +++ b/arrow-cast/src/cast/mod.rs @@ -3767,7 +3767,6 @@ mod tests { Arc::new(StringViewArray::from(vec![Some("1.5"), Some("2.5"), None])); for array in inputs { - println!("type: {}", array.data_type()); assert!(can_cast_types(array.data_type(), &DataType::Utf8View)); let arr = cast(&array, &DataType::Utf8View).unwrap(); assert_eq!(expected.as_ref(), arr.as_ref()); @@ -9971,7 +9970,6 @@ mod tests { fn test_decimal_to_decimal_throw_error_on_precision_overflow_same_scale() { let array = vec![Some(123456789)]; let array = create_decimal_array(array, 24, 2).unwrap(); - println!("{:?}", array); let input_type = DataType::Decimal128(24, 2); let output_type = DataType::Decimal128(6, 2); assert!(can_cast_types(&input_type, &output_type)); @@ -9988,8 +9986,7 @@ mod tests { #[test] fn test_decimal_to_decimal_throw_error_on_precision_overflow_lower_scale() { let array = vec![Some(123456789)]; - let array = create_decimal_array(array, 24, 2).unwrap(); - println!("{:?}", array); + let array = create_decimal_array(array, 24, 4).unwrap(); let input_type = DataType::Decimal128(24, 4); let output_type = DataType::Decimal128(6, 2); assert!(can_cast_types(&input_type, &output_type)); @@ -10000,14 +9997,13 @@ mod tests { }; let result = cast_with_options(&array, &output_type, &options); assert_eq!(result.unwrap_err().to_string(), - "Invalid argument error: 123456790 is too large to store in a Decimal128 of precision 6. Max is 999999"); + "Invalid argument error: 1234568 is too large to store in a Decimal128 of precision 6. Max is 999999"); } #[test] fn test_decimal_to_decimal_throw_error_on_precision_overflow_greater_scale() { let array = vec![Some(123456789)]; let array = create_decimal_array(array, 24, 2).unwrap(); - println!("{:?}", array); let input_type = DataType::Decimal128(24, 2); let output_type = DataType::Decimal128(6, 3); assert!(can_cast_types(&input_type, &output_type)); @@ -10025,7 +10021,6 @@ mod tests { fn test_decimal_to_decimal_throw_error_on_precision_overflow_diff_type() { let array = vec![Some(123456789)]; let array = create_decimal_array(array, 24, 2).unwrap(); - println!("{:?}", array); let input_type = DataType::Decimal128(24, 2); let output_type = DataType::Decimal256(6, 2); assert!(can_cast_types(&input_type, &output_type)); diff --git a/arrow-cast/src/parse.rs b/arrow-cast/src/parse.rs index f4c4639c1c08..4e93e9787cc8 100644 --- a/arrow-cast/src/parse.rs +++ b/arrow-cast/src/parse.rs @@ -881,7 +881,7 @@ pub fn parse_decimal( for (_, b) in bs.by_ref() { if !b.is_ascii_digit() { if *b == b'e' || *b == b'E' { - result = match parse_e_notation::( + result = parse_e_notation::( s, digits as u16, fractionals as i16, @@ -889,10 +889,7 @@ pub fn parse_decimal( point_index, precision as u16, scale as i16, - ) { - Err(e) => return Err(e), - Ok(v) => v, - }; + )?; is_e_notation = true; @@ -926,7 +923,7 @@ pub fn parse_decimal( } } b'e' | b'E' => { - result = match parse_e_notation::( + result = parse_e_notation::( s, digits as u16, fractionals as i16, @@ -934,10 +931,7 @@ pub fn parse_decimal( index, precision as u16, scale as i16, - ) { - Err(e) => return Err(e), - Ok(v) => v, - }; + )?; is_e_notation = true; diff --git a/arrow-data/src/data.rs b/arrow-data/src/data.rs index a35b5e8629e9..be7e5f86a04d 100644 --- a/arrow-data/src/data.rs +++ b/arrow-data/src/data.rs @@ -264,8 +264,12 @@ impl ArrayData { offset, buffers, child_data, + align_buffers: false, + // SAFETY: caller responsible for ensuring data is valid + skip_validation: true, } - .build_unchecked() + .build() + .unwrap() } /// Create a new ArrayData, validating that the provided buffers form a valid @@ -1775,7 +1779,7 @@ impl PartialEq for ArrayData { } } -/// Builder for `ArrayData` type +/// Builder for [`ArrayData`] type #[derive(Debug)] pub struct ArrayDataBuilder { data_type: DataType, @@ -1786,6 +1790,20 @@ pub struct ArrayDataBuilder { offset: usize, buffers: Vec, child_data: Vec, + /// Should buffers be realigned (copying if necessary)? + /// + /// Defaults to false. + align_buffers: bool, + /// Should data validation be skipped for this [`ArrayData`]? + /// + /// Defaults to false. + /// + /// # Safety + /// + /// This flag can only be set to true using `unsafe` APIs. However, once true + /// subsequent calls to `build()` may result in undefined behavior if the data + /// is not valid. + skip_validation: bool, } impl ArrayDataBuilder { @@ -1801,6 +1819,8 @@ impl ArrayDataBuilder { offset: 0, buffers: vec![], child_data: vec![], + align_buffers: false, + skip_validation: false, } } @@ -1877,51 +1897,79 @@ impl ArrayDataBuilder { /// Creates an array data, without any validation /// + /// Note: This is shorthand for `self.with_skip_validation(true).build()` + /// /// # Safety /// /// The same caveats as [`ArrayData::new_unchecked`] /// apply. - #[allow(clippy::let_and_return)] pub unsafe fn build_unchecked(self) -> ArrayData { - let data = self.build_impl(); - // Provide a force_validate mode - #[cfg(feature = "force_validate")] - data.validate_data().unwrap(); - data + self.skip_validation(true).build().unwrap() } - /// Same as [`Self::build_unchecked`] but ignoring `force_validate` feature flag - unsafe fn build_impl(self) -> ArrayData { - let nulls = self - .nulls + /// Creates an `ArrayData`, consuming `self` + /// + /// # Safety + /// + /// By default the underlying buffers are checked to ensure they are valid + /// Arrow data. However, if the [`Self::skip_validation`] flag has been set + /// to true (by the `unsafe` API) this validation is skipped. If the data is + /// not valid, undefined behavior will result. + pub fn build(self) -> Result { + let Self { + data_type, + len, + null_count, + null_bit_buffer, + nulls, + offset, + buffers, + child_data, + align_buffers, + skip_validation, + } = self; + + let nulls = nulls .or_else(|| { - let buffer = self.null_bit_buffer?; - let buffer = BooleanBuffer::new(buffer, self.offset, self.len); - Some(match self.null_count { - Some(n) => NullBuffer::new_unchecked(buffer, n), + let buffer = null_bit_buffer?; + let buffer = BooleanBuffer::new(buffer, offset, len); + Some(match null_count { + Some(n) => { + // SAFETY: call to `data.validate_data()` below validates the null buffer is valid + unsafe { NullBuffer::new_unchecked(buffer, n) } + } None => NullBuffer::new(buffer), }) }) .filter(|b| b.null_count() != 0); - ArrayData { - data_type: self.data_type, - len: self.len, - offset: self.offset, - buffers: self.buffers, - child_data: self.child_data, + let mut data = ArrayData { + data_type, + len, + offset, + buffers, + child_data, nulls, + }; + + if align_buffers { + data.align_buffers(); } - } - /// Creates an array data, validating all inputs - pub fn build(self) -> Result { - let data = unsafe { self.build_impl() }; - data.validate_data()?; + // SAFETY: `skip_validation` is only set to true using `unsafe` APIs + if !skip_validation || cfg!(feature = "force_validate") { + data.validate_data()?; + } Ok(data) } /// Creates an array data, validating all inputs, and aligning any buffers + #[deprecated(since = "54.1.0", note = "Use ArrayData::align_buffers instead")] + pub fn build_aligned(self) -> Result { + self.align_buffers(true).build() + } + + /// Ensure that all buffers are aligned, copying data if necessary /// /// Rust requires that arrays are aligned to their corresponding primitive, /// see [`Layout::array`](std::alloc::Layout::array) and [`std::mem::align_of`]. @@ -1930,17 +1978,33 @@ impl ArrayDataBuilder { /// to allow for [slice](std::slice) based APIs. See [`BufferSpec::FixedWidth`]. /// /// As this alignment is architecture specific, and not guaranteed by all arrow implementations, - /// this method is provided to automatically copy buffers to a new correctly aligned allocation + /// this flag is provided to automatically copy buffers to a new correctly aligned allocation /// when necessary, making it useful when interacting with buffers produced by other systems, /// e.g. IPC or FFI. /// - /// This is unlike `[Self::build`] which will instead return an error on encountering + /// If this flag is not enabled, `[Self::build`] return an error on encountering /// insufficiently aligned buffers. - pub fn build_aligned(self) -> Result { - let mut data = unsafe { self.build_impl() }; - data.align_buffers(); - data.validate_data()?; - Ok(data) + pub fn align_buffers(mut self, align_buffers: bool) -> Self { + self.align_buffers = align_buffers; + self + } + + /// Skips validation of the data. + /// + /// If this flag is enabled, `[Self::build`] will skip validation of the + /// data + /// + /// If this flag is not enabled, `[Self::build`] will validate that all + /// buffers are valid and will return an error if any data is invalid. + /// Validation can be expensive. + /// + /// # Safety + /// + /// If validation is skipped, the buffers must form a valid Arrow array, + /// otherwise undefined behavior will result + pub unsafe fn skip_validation(mut self, skip_validation: bool) -> Self { + self.skip_validation = skip_validation; + self } } @@ -1955,6 +2019,8 @@ impl From for ArrayDataBuilder { nulls: d.nulls, null_bit_buffer: None, null_count: None, + align_buffers: false, + skip_validation: false, } } } diff --git a/arrow-flight/gen/Cargo.toml b/arrow-flight/gen/Cargo.toml index 6358227a8912..e52efbf67e21 100644 --- a/arrow-flight/gen/Cargo.toml +++ b/arrow-flight/gen/Cargo.toml @@ -20,7 +20,7 @@ name = "gen" description = "Code generation for arrow-flight" version = "0.1.0" edition = { workspace = true } -rust-version = { workspace = true } +rust-version = "1.71.1" authors = { workspace = true } homepage = { workspace = true } repository = { workspace = true } diff --git a/arrow-flight/src/decode.rs b/arrow-flight/src/decode.rs index 7bafc384306b..760fc926fca6 100644 --- a/arrow-flight/src/decode.rs +++ b/arrow-flight/src/decode.rs @@ -295,7 +295,7 @@ impl FlightDataDecoder { )); }; - let buffer = Buffer::from_bytes(data.data_body.into()); + let buffer = Buffer::from(data.data_body); let dictionary_batch = message.header_as_dictionary_batch().ok_or_else(|| { FlightError::protocol( "Could not get dictionary batch from DictionaryBatch message", diff --git a/arrow-flight/src/encode.rs b/arrow-flight/src/encode.rs index 315b7b3cb6e5..57ac9f3173fe 100644 --- a/arrow-flight/src/encode.rs +++ b/arrow-flight/src/encode.rs @@ -1708,7 +1708,7 @@ mod tests { ]) .unwrap(); - verify_encoded_split(batch, 112).await; + verify_encoded_split(batch, 120).await; } #[tokio::test] @@ -1719,7 +1719,7 @@ mod tests { // overage is much higher than ideal // https://github.com/apache/arrow-rs/issues/3478 - verify_encoded_split(batch, 4304).await; + verify_encoded_split(batch, 4312).await; } #[tokio::test] @@ -1755,7 +1755,7 @@ mod tests { // 5k over limit (which is 2x larger than limit of 5k) // overage is much higher than ideal // https://github.com/apache/arrow-rs/issues/3478 - verify_encoded_split(batch, 5800).await; + verify_encoded_split(batch, 5808).await; } #[tokio::test] @@ -1771,7 +1771,7 @@ mod tests { let batch = RecordBatch::try_from_iter(vec![("a1", Arc::new(array) as _)]).unwrap(); - verify_encoded_split(batch, 48).await; + verify_encoded_split(batch, 56).await; } #[tokio::test] @@ -1785,7 +1785,7 @@ mod tests { // overage is much higher than ideal // https://github.com/apache/arrow-rs/issues/3478 - verify_encoded_split(batch, 3328).await; + verify_encoded_split(batch, 3336).await; } #[tokio::test] @@ -1799,7 +1799,7 @@ mod tests { // overage is much higher than ideal // https://github.com/apache/arrow-rs/issues/3478 - verify_encoded_split(batch, 5280).await; + verify_encoded_split(batch, 5288).await; } #[tokio::test] @@ -1824,7 +1824,7 @@ mod tests { // overage is much higher than ideal // https://github.com/apache/arrow-rs/issues/3478 - verify_encoded_split(batch, 4128).await; + verify_encoded_split(batch, 4136).await; } /// Return size, in memory of flight data @@ -1833,7 +1833,7 @@ mod tests { .flight_descriptor .as_ref() .map(|descriptor| { - let path_len: usize = descriptor.path.iter().map(|p| p.as_bytes().len()).sum(); + let path_len: usize = descriptor.path.iter().map(|p| p.len()).sum(); std::mem::size_of_val(descriptor) + descriptor.cmd.len() + path_len }) diff --git a/arrow-flight/src/sql/client.rs b/arrow-flight/src/sql/client.rs index a6e228737b3f..6d3ac3dbe610 100644 --- a/arrow-flight/src/sql/client.rs +++ b/arrow-flight/src/sql/client.rs @@ -721,7 +721,7 @@ pub fn arrow_data_from_flight_data( let dictionaries_by_field = HashMap::new(); let record_batch = read_record_batch( - &Buffer::from_bytes(flight_data.data_body.into()), + &Buffer::from(flight_data.data_body), ipc_record_batch, arrow_schema_ref.clone(), &dictionaries_by_field, diff --git a/arrow-integration-testing/Cargo.toml b/arrow-integration-testing/Cargo.toml index 8654b4b92734..26cb05fae1c2 100644 --- a/arrow-integration-testing/Cargo.toml +++ b/arrow-integration-testing/Cargo.toml @@ -25,7 +25,7 @@ authors = { workspace = true } license = { workspace = true } edition = { workspace = true } publish = false -rust-version = { workspace = true } +rust-version = "1.75.0" [lib] crate-type = ["lib", "cdylib"] diff --git a/arrow-ipc/Cargo.toml b/arrow-ipc/Cargo.toml index cf91b3a3415f..4988eed4a5ed 100644 --- a/arrow-ipc/Cargo.toml +++ b/arrow-ipc/Cargo.toml @@ -38,7 +38,7 @@ arrow-array = { workspace = true } arrow-buffer = { workspace = true } arrow-data = { workspace = true } arrow-schema = { workspace = true } -flatbuffers = { version = "24.3.25", default-features = false } +flatbuffers = { version = "24.12.23", default-features = false } lz4_flex = { version = "0.11", default-features = false, features = ["std", "frame"], optional = true } zstd = { version = "0.13.0", default-features = false, optional = true } diff --git a/arrow-ipc/regen.sh b/arrow-ipc/regen.sh index 8d8862ccc7f4..b368bd1bc7cc 100755 --- a/arrow-ipc/regen.sh +++ b/arrow-ipc/regen.sh @@ -21,33 +21,36 @@ DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )" # Change to the toplevel `arrow-rs` directory pushd $DIR/../ -echo "Build flatc from source ..." - -FB_URL="https://github.com/google/flatbuffers" -FB_DIR="arrow/.flatbuffers" -FLATC="$FB_DIR/bazel-bin/flatc" - -if [ -z $(which bazel) ]; then - echo "bazel is required to build flatc" - exit 1 -fi - -echo "Bazel version: $(bazel version | head -1 | awk -F':' '{print $2}')" - -if [ ! -e $FB_DIR ]; then - echo "git clone $FB_URL ..." - git clone -b master --no-tag --depth 1 $FB_URL $FB_DIR +if [ -z "$FLATC" ]; then + echo "Build flatc from source ..." + + FB_URL="https://github.com/google/flatbuffers" + FB_DIR="arrow/.flatbuffers" + FLATC="$FB_DIR/bazel-bin/flatc" + + if [ -z $(which bazel) ]; then + echo "bazel is required to build flatc" + exit 1 + fi + + echo "Bazel version: $(bazel version | head -1 | awk -F':' '{print $2}')" + + if [ ! -e $FB_DIR ]; then + echo "git clone $FB_URL ..." + git clone -b master --no-tag --depth 1 $FB_URL $FB_DIR + else + echo "git pull $FB_URL ..." + git -C $FB_DIR pull + fi + + pushd $FB_DIR + echo "run: bazel build :flatc ..." + bazel build :flatc + popd else - echo "git pull $FB_URL ..." - git -C $FB_DIR pull + echo "Using flatc $FLATC ..." fi -pushd $FB_DIR -echo "run: bazel build :flatc ..." -bazel build :flatc -popd - - # Execute the code generation: $FLATC --filename-suffix "" --rust -o arrow-ipc/src/gen/ format/*.fbs @@ -99,37 +102,38 @@ for f in `ls *.rs`; do fi echo "Modifying: $f" - sed -i '' '/extern crate flatbuffers;/d' $f - sed -i '' '/use self::flatbuffers::EndianScalar;/d' $f - sed -i '' '/\#\[allow(unused_imports, dead_code)\]/d' $f - sed -i '' '/pub mod org {/d' $f - sed -i '' '/pub mod apache {/d' $f - sed -i '' '/pub mod arrow {/d' $f - sed -i '' '/pub mod flatbuf {/d' $f - sed -i '' '/} \/\/ pub mod flatbuf/d' $f - sed -i '' '/} \/\/ pub mod arrow/d' $f - sed -i '' '/} \/\/ pub mod apache/d' $f - sed -i '' '/} \/\/ pub mod org/d' $f - sed -i '' '/use core::mem;/d' $f - sed -i '' '/use core::cmp::Ordering;/d' $f - sed -i '' '/use self::flatbuffers::{EndianScalar, Follow};/d' $f + sed --in-place='' '/extern crate flatbuffers;/d' $f + sed --in-place='' '/use self::flatbuffers::EndianScalar;/d' $f + sed --in-place='' '/\#\[allow(unused_imports, dead_code)\]/d' $f + sed --in-place='' '/pub mod org {/d' $f + sed --in-place='' '/pub mod apache {/d' $f + sed --in-place='' '/pub mod arrow {/d' $f + sed --in-place='' '/pub mod flatbuf {/d' $f + sed --in-place='' '/} \/\/ pub mod flatbuf/d' $f + sed --in-place='' '/} \/\/ pub mod arrow/d' $f + sed --in-place='' '/} \/\/ pub mod apache/d' $f + sed --in-place='' '/} \/\/ pub mod org/d' $f + sed --in-place='' '/use core::mem;/d' $f + sed --in-place='' '/use core::cmp::Ordering;/d' $f + sed --in-place='' '/use self::flatbuffers::{EndianScalar, Follow};/d' $f # required by flatc 1.12.0+ - sed -i '' "/\#\!\[allow(unused_imports, dead_code)\]/d" $f + sed --in-place='' "/\#\!\[allow(unused_imports, dead_code)\]/d" $f for name in ${names[@]}; do - sed -i '' "/use crate::${name}::\*;/d" $f - sed -i '' "s/use self::flatbuffers::Verifiable;/use flatbuffers::Verifiable;/g" $f + sed --in-place='' "/use crate::${name}::\*;/d" $f + sed --in-place='' "s/use self::flatbuffers::Verifiable;/use flatbuffers::Verifiable;/g" $f done # Replace all occurrences of "type__" with "type_", "TYPE__" with "TYPE_". - sed -i '' 's/type__/type_/g' $f - sed -i '' 's/TYPE__/TYPE_/g' $f + sed --in-place='' 's/type__/type_/g' $f + sed --in-place='' 's/TYPE__/TYPE_/g' $f # Some files need prefixes if [[ $f == "File.rs" ]]; then # Now prefix the file with the static contents echo -e "${PREFIX}" "${SCHEMA_IMPORT}" | cat - $f > temp && mv temp $f elif [[ $f == "Message.rs" ]]; then + sed --in-place='' 's/List/\`List\`/g' $f echo -e "${PREFIX}" "${SCHEMA_IMPORT}" "${SPARSE_TENSOR_IMPORT}" "${TENSOR_IMPORT}" | cat - $f > temp && mv temp $f elif [[ $f == "SparseTensor.rs" ]]; then echo -e "${PREFIX}" "${SCHEMA_IMPORT}" "${TENSOR_IMPORT}" | cat - $f > temp && mv temp $f diff --git a/arrow-ipc/src/gen/File.rs b/arrow-ipc/src/gen/File.rs index c0c2fb183237..427cf75de096 100644 --- a/arrow-ipc/src/gen/File.rs +++ b/arrow-ipc/src/gen/File.rs @@ -23,6 +23,8 @@ use flatbuffers::EndianScalar; use std::{cmp::Ordering, mem}; // automatically generated by the FlatBuffers compiler, do not modify +// @generated + // struct Block, aligned to 8 #[repr(transparent)] #[derive(Clone, Copy, PartialEq)] @@ -64,6 +66,10 @@ impl<'b> flatbuffers::Push for Block { let src = ::core::slice::from_raw_parts(self as *const Block as *const u8, Self::size()); dst.copy_from_slice(src); } + #[inline] + fn alignment() -> flatbuffers::PushAlignment { + flatbuffers::PushAlignment::new(8) + } } impl<'a> flatbuffers::Verifiable for Block { @@ -211,8 +217,8 @@ impl<'a> Footer<'a> { Footer { _tab: table } } #[allow(unused_mut)] - pub fn create<'bldr: 'args, 'args: 'mut_bldr, 'mut_bldr>( - _fbb: &'mut_bldr mut flatbuffers::FlatBufferBuilder<'bldr>, + pub fn create<'bldr: 'args, 'args: 'mut_bldr, 'mut_bldr, A: flatbuffers::Allocator + 'bldr>( + _fbb: &'mut_bldr mut flatbuffers::FlatBufferBuilder<'bldr, A>, args: &'args FooterArgs<'args>, ) -> flatbuffers::WIPOffset> { let mut builder = FooterBuilder::new(_fbb); @@ -344,11 +350,11 @@ impl<'a> Default for FooterArgs<'a> { } } -pub struct FooterBuilder<'a: 'b, 'b> { - fbb_: &'b mut flatbuffers::FlatBufferBuilder<'a>, +pub struct FooterBuilder<'a: 'b, 'b, A: flatbuffers::Allocator + 'a> { + fbb_: &'b mut flatbuffers::FlatBufferBuilder<'a, A>, start_: flatbuffers::WIPOffset, } -impl<'a: 'b, 'b> FooterBuilder<'a, 'b> { +impl<'a: 'b, 'b, A: flatbuffers::Allocator + 'a> FooterBuilder<'a, 'b, A> { #[inline] pub fn add_version(&mut self, version: MetadataVersion) { self.fbb_ @@ -388,7 +394,7 @@ impl<'a: 'b, 'b> FooterBuilder<'a, 'b> { ); } #[inline] - pub fn new(_fbb: &'b mut flatbuffers::FlatBufferBuilder<'a>) -> FooterBuilder<'a, 'b> { + pub fn new(_fbb: &'b mut flatbuffers::FlatBufferBuilder<'a, A>) -> FooterBuilder<'a, 'b, A> { let start = _fbb.start_table(); FooterBuilder { fbb_: _fbb, @@ -474,16 +480,16 @@ pub unsafe fn size_prefixed_root_as_footer_unchecked(buf: &[u8]) -> Footer { flatbuffers::size_prefixed_root_unchecked::