Skip to content

Commit

Permalink
feat: get internal value size of ValueRef (#2613)
Browse files Browse the repository at this point in the history
* feat: impl byte_size

* chore: clippy

* chore: cr comment
  • Loading branch information
QuenKar authored Oct 19, 2023
1 parent d57b144 commit ba15c14
Showing 1 changed file with 131 additions and 0 deletions.
131 changes: 131 additions & 0 deletions src/datatypes/src/value.rs
Original file line number Diff line number Diff line change
Expand Up @@ -654,6 +654,17 @@ impl ListValue {
Arc::new(new_item_field(output_type.item_type().as_arrow_type())),
))
}

/// use 'the first item size' * 'length of items' to estimate the size.
/// it could be inaccurate.
fn estimated_size(&self) -> usize {
if let Some(items) = &self.items {
if let Some(item) = items.first() {
return item.as_value_ref().data_size() * items.len();
}
}
0
}
}

impl Default for ListValue {
Expand Down Expand Up @@ -1090,12 +1101,46 @@ impl<'a> PartialOrd for ListValueRef<'a> {
}
}

impl<'a> ValueRef<'a> {
/// Returns the size of the underlying data in bytes,
/// The size is estimated and only considers the data size.
pub fn data_size(&self) -> usize {
match *self {
ValueRef::Null => 0,
ValueRef::Boolean(_) => 1,
ValueRef::UInt8(_) => 1,
ValueRef::UInt16(_) => 2,
ValueRef::UInt32(_) => 4,
ValueRef::UInt64(_) => 8,
ValueRef::Int8(_) => 1,
ValueRef::Int16(_) => 2,
ValueRef::Int32(_) => 4,
ValueRef::Int64(_) => 8,
ValueRef::Float32(_) => 4,
ValueRef::Float64(_) => 8,
ValueRef::String(v) => std::mem::size_of_val(v),
ValueRef::Binary(v) => std::mem::size_of_val(v),
ValueRef::Date(_) => 4,
ValueRef::DateTime(_) => 8,
ValueRef::Timestamp(_) => 16,
ValueRef::Time(_) => 16,
ValueRef::Duration(_) => 16,
ValueRef::Interval(_) => 24,
ValueRef::List(v) => match v {
ListValueRef::Indexed { vector, .. } => vector.memory_size() / vector.len(),
ListValueRef::Ref { val } => val.estimated_size(),
},
}
}
}

#[cfg(test)]
mod tests {
use arrow::datatypes::DataType as ArrowDataType;
use num_traits::Float;

use super::*;
use crate::vectors::ListVectorBuilder;

#[test]
fn test_try_from_scalar_value() {
Expand Down Expand Up @@ -2158,4 +2203,90 @@ mod tests {
duration_to_scalar_value(TimeUnit::Nanosecond, Some(1))
);
}

fn check_value_ref_size_eq(value_ref: &ValueRef, size: usize) {
assert_eq!(value_ref.data_size(), size);
}

#[test]
fn test_value_ref_estimated_size() {
assert_eq!(std::mem::size_of::<ValueRef>(), 24);

check_value_ref_size_eq(&ValueRef::Boolean(true), 1);
check_value_ref_size_eq(&ValueRef::UInt8(1), 1);
check_value_ref_size_eq(&ValueRef::UInt16(1), 2);
check_value_ref_size_eq(&ValueRef::UInt32(1), 4);
check_value_ref_size_eq(&ValueRef::UInt64(1), 8);
check_value_ref_size_eq(&ValueRef::Int8(1), 1);
check_value_ref_size_eq(&ValueRef::Int16(1), 2);
check_value_ref_size_eq(&ValueRef::Int32(1), 4);
check_value_ref_size_eq(&ValueRef::Int64(1), 8);
check_value_ref_size_eq(&ValueRef::Float32(1.0.into()), 4);
check_value_ref_size_eq(&ValueRef::Float64(1.0.into()), 8);
check_value_ref_size_eq(&ValueRef::String("greptimedb"), 10);
check_value_ref_size_eq(&ValueRef::Binary(b"greptimedb"), 10);
check_value_ref_size_eq(&ValueRef::Date(Date::new(1)), 4);
check_value_ref_size_eq(&ValueRef::DateTime(DateTime::new(1)), 8);
check_value_ref_size_eq(&ValueRef::Timestamp(Timestamp::new_millisecond(1)), 16);
check_value_ref_size_eq(&ValueRef::Time(Time::new_millisecond(1)), 16);
check_value_ref_size_eq(
&ValueRef::Interval(Interval::from_month_day_nano(1, 2, 3)),
24,
);
check_value_ref_size_eq(&ValueRef::Duration(Duration::new_millisecond(1)), 16);
check_value_ref_size_eq(
&ValueRef::List(ListValueRef::Ref {
val: &ListValue {
items: Some(Box::new(vec![
Value::String("hello world".into()),
Value::String("greptimedb".into()),
])),
datatype: ConcreteDataType::string_datatype(),
},
}),
22,
);

let data = vec![
Some(vec![Some(1), Some(2), Some(3)]),
None,
Some(vec![Some(4), None, Some(6)]),
];
let mut builder =
ListVectorBuilder::with_type_capacity(ConcreteDataType::int32_datatype(), 8);
for vec_opt in &data {
if let Some(vec) = vec_opt {
let values = vec.iter().map(|v| Value::from(*v)).collect();
let values = Some(Box::new(values));
let list_value = ListValue::new(values, ConcreteDataType::int32_datatype());

builder.push(Some(ListValueRef::Ref { val: &list_value }));
} else {
builder.push(None);
}
}
let vector = builder.finish();

check_value_ref_size_eq(
&ValueRef::List(ListValueRef::Indexed {
vector: &vector,
idx: 0,
}),
85,
);
check_value_ref_size_eq(
&ValueRef::List(ListValueRef::Indexed {
vector: &vector,
idx: 1,
}),
85,
);
check_value_ref_size_eq(
&ValueRef::List(ListValueRef::Indexed {
vector: &vector,
idx: 2,
}),
85,
)
}
}

0 comments on commit ba15c14

Please sign in to comment.