From ba15c14103e3f484e3331669fac8c86d52680b60 Mon Sep 17 00:00:00 2001 From: Wei <47681251+QuenKar@users.noreply.github.com> Date: Thu, 19 Oct 2023 11:59:37 +0800 Subject: [PATCH] feat: get internal value size of ValueRef (#2613) * feat: impl byte_size * chore: clippy * chore: cr comment --- src/datatypes/src/value.rs | 131 +++++++++++++++++++++++++++++++++++++ 1 file changed, 131 insertions(+) diff --git a/src/datatypes/src/value.rs b/src/datatypes/src/value.rs index 45e8f1ce02b5..287a27cb2837 100644 --- a/src/datatypes/src/value.rs +++ b/src/datatypes/src/value.rs @@ -654,6 +654,17 @@ impl ListValue { Arc::new(new_item_field(output_type.item_type().as_arrow_type())), )) } + + /// use 'the first item size' * 'length of items' to estimate the size. + /// it could be inaccurate. + fn estimated_size(&self) -> usize { + if let Some(items) = &self.items { + if let Some(item) = items.first() { + return item.as_value_ref().data_size() * items.len(); + } + } + 0 + } } impl Default for ListValue { @@ -1090,12 +1101,46 @@ impl<'a> PartialOrd for ListValueRef<'a> { } } +impl<'a> ValueRef<'a> { + /// Returns the size of the underlying data in bytes, + /// The size is estimated and only considers the data size. + pub fn data_size(&self) -> usize { + match *self { + ValueRef::Null => 0, + ValueRef::Boolean(_) => 1, + ValueRef::UInt8(_) => 1, + ValueRef::UInt16(_) => 2, + ValueRef::UInt32(_) => 4, + ValueRef::UInt64(_) => 8, + ValueRef::Int8(_) => 1, + ValueRef::Int16(_) => 2, + ValueRef::Int32(_) => 4, + ValueRef::Int64(_) => 8, + ValueRef::Float32(_) => 4, + ValueRef::Float64(_) => 8, + ValueRef::String(v) => std::mem::size_of_val(v), + ValueRef::Binary(v) => std::mem::size_of_val(v), + ValueRef::Date(_) => 4, + ValueRef::DateTime(_) => 8, + ValueRef::Timestamp(_) => 16, + ValueRef::Time(_) => 16, + ValueRef::Duration(_) => 16, + ValueRef::Interval(_) => 24, + ValueRef::List(v) => match v { + ListValueRef::Indexed { vector, .. } => vector.memory_size() / vector.len(), + ListValueRef::Ref { val } => val.estimated_size(), + }, + } + } +} + #[cfg(test)] mod tests { use arrow::datatypes::DataType as ArrowDataType; use num_traits::Float; use super::*; + use crate::vectors::ListVectorBuilder; #[test] fn test_try_from_scalar_value() { @@ -2158,4 +2203,90 @@ mod tests { duration_to_scalar_value(TimeUnit::Nanosecond, Some(1)) ); } + + fn check_value_ref_size_eq(value_ref: &ValueRef, size: usize) { + assert_eq!(value_ref.data_size(), size); + } + + #[test] + fn test_value_ref_estimated_size() { + assert_eq!(std::mem::size_of::(), 24); + + check_value_ref_size_eq(&ValueRef::Boolean(true), 1); + check_value_ref_size_eq(&ValueRef::UInt8(1), 1); + check_value_ref_size_eq(&ValueRef::UInt16(1), 2); + check_value_ref_size_eq(&ValueRef::UInt32(1), 4); + check_value_ref_size_eq(&ValueRef::UInt64(1), 8); + check_value_ref_size_eq(&ValueRef::Int8(1), 1); + check_value_ref_size_eq(&ValueRef::Int16(1), 2); + check_value_ref_size_eq(&ValueRef::Int32(1), 4); + check_value_ref_size_eq(&ValueRef::Int64(1), 8); + check_value_ref_size_eq(&ValueRef::Float32(1.0.into()), 4); + check_value_ref_size_eq(&ValueRef::Float64(1.0.into()), 8); + check_value_ref_size_eq(&ValueRef::String("greptimedb"), 10); + check_value_ref_size_eq(&ValueRef::Binary(b"greptimedb"), 10); + check_value_ref_size_eq(&ValueRef::Date(Date::new(1)), 4); + check_value_ref_size_eq(&ValueRef::DateTime(DateTime::new(1)), 8); + check_value_ref_size_eq(&ValueRef::Timestamp(Timestamp::new_millisecond(1)), 16); + check_value_ref_size_eq(&ValueRef::Time(Time::new_millisecond(1)), 16); + check_value_ref_size_eq( + &ValueRef::Interval(Interval::from_month_day_nano(1, 2, 3)), + 24, + ); + check_value_ref_size_eq(&ValueRef::Duration(Duration::new_millisecond(1)), 16); + check_value_ref_size_eq( + &ValueRef::List(ListValueRef::Ref { + val: &ListValue { + items: Some(Box::new(vec![ + Value::String("hello world".into()), + Value::String("greptimedb".into()), + ])), + datatype: ConcreteDataType::string_datatype(), + }, + }), + 22, + ); + + let data = vec![ + Some(vec![Some(1), Some(2), Some(3)]), + None, + Some(vec![Some(4), None, Some(6)]), + ]; + let mut builder = + ListVectorBuilder::with_type_capacity(ConcreteDataType::int32_datatype(), 8); + for vec_opt in &data { + if let Some(vec) = vec_opt { + let values = vec.iter().map(|v| Value::from(*v)).collect(); + let values = Some(Box::new(values)); + let list_value = ListValue::new(values, ConcreteDataType::int32_datatype()); + + builder.push(Some(ListValueRef::Ref { val: &list_value })); + } else { + builder.push(None); + } + } + let vector = builder.finish(); + + check_value_ref_size_eq( + &ValueRef::List(ListValueRef::Indexed { + vector: &vector, + idx: 0, + }), + 85, + ); + check_value_ref_size_eq( + &ValueRef::List(ListValueRef::Indexed { + vector: &vector, + idx: 1, + }), + 85, + ); + check_value_ref_size_eq( + &ValueRef::List(ListValueRef::Indexed { + vector: &vector, + idx: 2, + }), + 85, + ) + } }