Skip to content

Commit

Permalink
feat: finish_cloned() without resetting
Browse files Browse the repository at this point in the history
  • Loading branch information
QuenKar committed Dec 12, 2023
1 parent 58183fe commit 0759bc9
Show file tree
Hide file tree
Showing 9 changed files with 127 additions and 2 deletions.
5 changes: 4 additions & 1 deletion src/datatypes/src/scalars.rs
Original file line number Diff line number Diff line change
Expand Up @@ -139,8 +139,11 @@ pub trait ScalarVectorBuilder: MutableVector {
/// Push a value into the builder.
fn push(&mut self, value: Option<<Self::VectorType as ScalarVector>::RefItem<'_>>);

/// Finish build and return a new vector.
/// Finish build a new vector and reset `self`.
fn finish(&mut self) -> Self::VectorType;

/// Finish build a new vector without resetting `self`.
fn finish_cloned(&self) -> Self::VectorType;
}

macro_rules! impl_scalar_for_native {
Expand Down
33 changes: 33 additions & 0 deletions src/datatypes/src/vectors.rs
Original file line number Diff line number Diff line change
Expand Up @@ -191,6 +191,9 @@ pub trait MutableVector: Send + Sync {
/// Convert `self` to an (immutable) [VectorRef] and reset `self`.
fn to_vector(&mut self) -> VectorRef;

/// Convert `self` to an (immutable) [VectorRef] and without resetting `self`.
fn to_vector_cloned(&self) -> VectorRef;

/// Try to push value ref to this mutable vector.
fn try_push_value_ref(&mut self, value: ValueRef) -> Result<()>;

Expand Down Expand Up @@ -423,4 +426,34 @@ pub mod tests {
// Panic with_capacity
let _ = ListVectorBuilder::with_capacity(1024);
}

#[test]
fn test_mutable_vector_finish_cloned() {
// create a primitive type mutable vector
let mut builder = Int32VectorBuilder::with_capacity(1024);
builder.push(Some(1));
builder.push(Some(2));
builder.push(Some(3));
// use finish_cloned won't reset builder
let vector = builder.finish_cloned();
assert_eq!(vector.len(), 3);
assert_eq!(builder.len(), 3);

builder.push(Some(4));
assert_eq!(builder.len(), 4);

// use finish will reset builder
let vector = builder.finish();
assert_eq!(vector.len(), 4);
assert_eq!(builder.len(), 0);

// use MutableVector trait to_vector_cloned won't reset builder
let mut builder = StringVectorBuilder::with_capacity(1024);
builder.push(Some("1"));
builder.push(Some("2"));
builder.push(Some("3"));
let vector = builder.to_vector_cloned();
assert_eq!(vector.len(), 3);
assert_eq!(builder.len(), 3);
}
}
10 changes: 10 additions & 0 deletions src/datatypes/src/vectors/binary.rs
Original file line number Diff line number Diff line change
Expand Up @@ -159,6 +159,10 @@ impl MutableVector for BinaryVectorBuilder {
Arc::new(self.finish())
}

fn to_vector_cloned(&self) -> VectorRef {
Arc::new(self.finish_cloned())
}

fn try_push_value_ref(&mut self, value: ValueRef) -> Result<()> {
match value.as_binary()? {
Some(v) => self.mutable_array.append_value(v),
Expand Down Expand Up @@ -197,6 +201,12 @@ impl ScalarVectorBuilder for BinaryVectorBuilder {
array: self.mutable_array.finish(),
}
}

fn finish_cloned(&self) -> Self::VectorType {
BinaryVector {
array: self.mutable_array.finish_cloned(),
}
}
}

impl Serializable for BinaryVector {
Expand Down
10 changes: 10 additions & 0 deletions src/datatypes/src/vectors/boolean.rs
Original file line number Diff line number Diff line change
Expand Up @@ -175,6 +175,10 @@ impl MutableVector for BooleanVectorBuilder {
Arc::new(self.finish())
}

fn to_vector_cloned(&self) -> VectorRef {
Arc::new(self.finish_cloned())
}

fn try_push_value_ref(&mut self, value: ValueRef) -> Result<()> {
match value.as_boolean()? {
Some(v) => self.mutable_array.append_value(v),
Expand Down Expand Up @@ -213,6 +217,12 @@ impl ScalarVectorBuilder for BooleanVectorBuilder {
array: self.mutable_array.finish(),
}
}

fn finish_cloned(&self) -> Self::VectorType {
BooleanVector {
array: self.mutable_array.finish_cloned(),
}
}
}

impl Serializable for BooleanVector {
Expand Down
10 changes: 10 additions & 0 deletions src/datatypes/src/vectors/decimal.rs
Original file line number Diff line number Diff line change
Expand Up @@ -310,6 +310,10 @@ impl MutableVector for Decimal128VectorBuilder {
Arc::new(self.finish())
}

fn to_vector_cloned(&self) -> VectorRef {
Arc::new(self.finish_cloned())
}

fn try_push_value_ref(&mut self, value: ValueRef) -> Result<()> {
let decimal_val = value.as_decimal128()?.map(|v| v.val());
self.mutable_array.append_option(decimal_val);
Expand Down Expand Up @@ -358,6 +362,12 @@ impl ScalarVectorBuilder for Decimal128VectorBuilder {
array: self.mutable_array.finish(),
}
}

fn finish_cloned(&self) -> Self::VectorType {
Decimal128Vector {
array: self.mutable_array.finish_cloned(),
}
}
}

impl Decimal128VectorBuilder {
Expand Down
37 changes: 36 additions & 1 deletion src/datatypes/src/vectors/list.rs
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ use std::sync::Arc;
use arrow::array::{
Array, ArrayData, ArrayRef, BooleanBufferBuilder, Int32BufferBuilder, ListArray,
};
use arrow::buffer::Buffer;
use arrow::buffer::{Buffer, NullBuffer};
use arrow::datatypes::DataType as ArrowDataType;
use serde_json::Value as JsonValue;

Expand Down Expand Up @@ -281,6 +281,10 @@ impl MutableVector for ListVectorBuilder {
Arc::new(self.finish())
}

fn to_vector_cloned(&self) -> VectorRef {
Arc::new(self.finish_cloned())
}

fn try_push_value_ref(&mut self, value: ValueRef) -> Result<()> {
if let Some(list_ref) = value.as_list()? {
match list_ref {
Expand Down Expand Up @@ -355,6 +359,31 @@ impl ScalarVectorBuilder for ListVectorBuilder {
item_type: self.item_type.clone(),
}
}

fn finish_cloned(&self) -> Self::VectorType {
let len = self.len();
let values_vector = self.values_builder.to_vector_cloned();
let values_arr = values_vector.to_arrow_array();
let values_data = values_arr.to_data();

let offset_buffer = Buffer::from_slice_ref(self.offsets_builder.as_slice());
let nulls = self.null_buffer_builder.finish_cloned();

let data_type = ConcreteDataType::list_datatype(self.item_type.clone()).as_arrow_type();
let array_data_builder = ArrayData::builder(data_type)
.len(len)
.add_buffer(offset_buffer)
.add_child_data(values_data)
.nulls(nulls);

let array_data = unsafe { array_data_builder.build_unchecked() };
let array = ListArray::from(array_data);

ListVector {
array,
item_type: self.item_type.clone(),
}
}
}

// Ports from https://github.com/apache/arrow-rs/blob/94565bca99b5d9932a3e9a8e094aaf4e4384b1e5/arrow-array/src/builder/null_buffer_builder.rs
Expand Down Expand Up @@ -427,6 +456,12 @@ impl NullBufferBuilder {
buf
}

/// Builds the [NullBuffer] without resetting the builder.
fn finish_cloned(&self) -> Option<NullBuffer> {
let buffer = self.bitmap_builder.as_ref()?.finish_cloned();
Some(NullBuffer::new(buffer))
}

#[inline]
fn materialize_if_needed(&mut self) {
if self.bitmap_builder.is_none() {
Expand Down
4 changes: 4 additions & 0 deletions src/datatypes/src/vectors/null.rs
Original file line number Diff line number Diff line change
Expand Up @@ -156,6 +156,10 @@ impl MutableVector for NullVectorBuilder {
vector
}

fn to_vector_cloned(&self) -> VectorRef {
Arc::new(NullVector::new(self.length))
}

fn try_push_value_ref(&mut self, value: ValueRef) -> Result<()> {
ensure!(
value.is_null(),
Expand Down
10 changes: 10 additions & 0 deletions src/datatypes/src/vectors/primitive.rs
Original file line number Diff line number Diff line change
Expand Up @@ -303,6 +303,10 @@ impl<T: LogicalPrimitiveType> MutableVector for PrimitiveVectorBuilder<T> {
Arc::new(self.finish())
}

fn to_vector_cloned(&self) -> VectorRef {
Arc::new(self.finish_cloned())
}

fn try_push_value_ref(&mut self, value: ValueRef) -> Result<()> {
let primitive = T::cast_value_ref(value)?;
match primitive {
Expand Down Expand Up @@ -352,6 +356,12 @@ where
array: self.mutable_array.finish(),
}
}

fn finish_cloned(&self) -> Self::VectorType {
PrimitiveVector {
array: self.mutable_array.finish_cloned(),
}
}
}

pub(crate) fn replicate_primitive<T: LogicalPrimitiveType>(
Expand Down
10 changes: 10 additions & 0 deletions src/datatypes/src/vectors/string.rs
Original file line number Diff line number Diff line change
Expand Up @@ -190,6 +190,10 @@ impl MutableVector for StringVectorBuilder {
Arc::new(self.finish())
}

fn to_vector_cloned(&self) -> VectorRef {
Arc::new(self.finish_cloned())
}

fn try_push_value_ref(&mut self, value: ValueRef) -> Result<()> {
match value.as_string()? {
Some(v) => self.mutable_array.append_value(v),
Expand Down Expand Up @@ -228,6 +232,12 @@ impl ScalarVectorBuilder for StringVectorBuilder {
array: self.mutable_array.finish(),
}
}

fn finish_cloned(&self) -> Self::VectorType {
StringVector {
array: self.mutable_array.finish_cloned(),
}
}
}

impl Serializable for StringVector {
Expand Down

0 comments on commit 0759bc9

Please sign in to comment.