From 1f48edca910de68db809ef358362a0308d56c386 Mon Sep 17 00:00:00 2001 From: Kould <2435992353@qq.com> Date: Mon, 25 Sep 2023 21:52:38 +0800 Subject: [PATCH] perf: Optimized `DataValue` conversion to bitwise sequence --- src/storage/table_codec.rs | 46 ++++---- src/types/value.rs | 225 ++++++++++++++++++++----------------- 2 files changed, 146 insertions(+), 125 deletions(-) diff --git a/src/storage/table_codec.rs b/src/storage/table_codec.rs index 070d8b5d..49edc50d 100644 --- a/src/storage/table_codec.rs +++ b/src/storage/table_codec.rs @@ -90,13 +90,14 @@ impl TableCodec { } pub fn encode_tuple_key(&self, tuple_id: &TupleId) -> Result, TypeError> { - let string_key = format!( - "{}_Tuple_0_{}", + let mut string_key = format!( + "{}_Tuple_0_", self.table.name, - tuple_id.to_primary_key()?, - ); + ).into_bytes(); + + tuple_id.to_primary_key(&mut string_key)?; - Ok(string_key.into_bytes()) + Ok(string_key) } pub fn decode_tuple(&self, bytes: &[u8]) -> Tuple { @@ -140,13 +141,13 @@ impl TableCodec { "{}_Index_0_{}_0", self.table.name, index.id - ); + ).into_bytes(); for col_v in &index.column_values { - string_key += &format!("_{}", col_v.to_index_key()?); + col_v.to_index_key(&mut string_key)?; } - Ok(string_key.into_bytes()) + Ok(string_key) } pub fn decode_index(bytes: &[u8]) -> Result, TypeError> { @@ -276,14 +277,10 @@ mod tests { let (key, bytes) = codec.encode_tuple(&tuple)?; - assert_eq!( - String::from_utf8(key.to_vec()).ok().unwrap(), - format!( - "{}_Tuple_0_{}", - table_catalog.name, - tuple.id.clone().unwrap().to_primary_key()?, - ) - ); + let mut test_key = format!("{}_Tuple_0_", table_catalog.name).into_bytes(); + tuple.id.clone().unwrap().to_primary_key(&mut test_key)?; + + assert_eq!(key.to_vec(), test_key); assert_eq!(codec.decode_tuple(&bytes), tuple); Ok(()) @@ -343,15 +340,14 @@ mod tests { let (key, bytes) = codec.encode_index(&index, &tuple_ids)?; - assert_eq!( - String::from_utf8(key.to_vec()).ok().unwrap(), - format!( - "{}_Index_0_{}_0_{}", - table_catalog.name, - index.id, - index.column_values[0].to_index_key()? - ) - ); + let mut test_key = format!( + "{}_Index_0_{}_0", + table_catalog.name, + index.id, + ).into_bytes(); + index.column_values[0].to_index_key(&mut test_key)?; + + assert_eq!(key.to_vec(), test_key); assert_eq!(TableCodec::decode_index(&bytes)?, tuple_ids); Ok(()) diff --git a/src/types/value.rs b/src/types/value.rs index 4db15d49..a02d0183 100644 --- a/src/types/value.rs +++ b/src/types/value.rs @@ -146,14 +146,10 @@ impl PartialOrd for DataValue { } } -macro_rules! signed_to_primary_key { - ($ty:ty, $EXPR:expr) => {{ - if $EXPR.is_negative() { - $EXPR ^ (-1 ^ <$ty>::MIN) - } else { - $EXPR - } - }}; +macro_rules! encode_u { + ($b:ident, $u:expr) => { + $b.extend_from_slice(&$u.to_be_bytes()) + }; } impl Eq for DataValue {} @@ -387,49 +383,78 @@ impl DataValue { } } - pub fn to_primary_key(&self) -> Result { + pub fn to_primary_key(&self, b: &mut Vec) -> Result<(), TypeError> { match self { - DataValue::Int8(option) => option.map(|v| format!("{:0width$}", signed_to_primary_key!(i8, v), width = 4)), - DataValue::Int16(option) => option.map(|v| format!("{:0width$}", signed_to_primary_key!(i16, v), width = 6)), - DataValue::Int32(option) => option.map(|v| format!("{:0width$}", signed_to_primary_key!(i32, v), width = 11)), - DataValue::Int64(option) => option.map(|v| format!("{:0width$}", signed_to_primary_key!(i64, v), width = 20)), - DataValue::UInt8(option) => option.map(|v| format!("{:0width$}", v, width = 3)), - DataValue::UInt16(option) => option.map(|v| format!("{:0width$}", v, width = 5)), - DataValue::UInt32(option) => option.map(|v| format!("{:0width$}", v, width = 10)), - DataValue::UInt64(option) => option.map(|v| format!("{:0width$}", v, width = 20)), - DataValue::Utf8(option) => option.clone(), - _ => return Err(TypeError::InvalidType), - }.ok_or(TypeError::NotNull) + DataValue::Int8(Some(v)) => encode_u!(b, *v as u8 ^ 0x80_u8), + DataValue::Int16(Some(v)) => encode_u!(b, *v as u16 ^ 0x8000_u16), + DataValue::Int32(Some(v)) => encode_u!(b, *v as u32 ^ 0x80000000_u32), + DataValue::Int64(Some(v)) => encode_u!(b, *v as u64 ^ 0x8000000000000000_u64), + DataValue::UInt8(Some(v)) => encode_u!(b, v), + DataValue::UInt16(Some(v)) => encode_u!(b, v), + DataValue::UInt32(Some(v)) => encode_u!(b, v), + DataValue::UInt64(Some(v)) => encode_u!(b, v), + DataValue::Utf8(Some(v)) => b.copy_from_slice(&mut v.as_bytes()), + value => { + return if value.is_null() { + Err(TypeError::NotNull) + } else { + Err(TypeError::InvalidType) + } + } + } + + Ok(()) } - pub fn to_index_key(&self) -> Result { + pub fn to_index_key(&self, b: &mut Vec) -> Result<(), TypeError> { match self { - DataValue::Int8(option) => option.map(|v| format!("{:0width$}", signed_to_primary_key!(i8, v), width = 4)), - DataValue::Int16(option) => option.map(|v| format!("{:0width$}", signed_to_primary_key!(i16, v), width = 6)), - DataValue::Int32(option) => option.map(|v| format!("{:0width$}", signed_to_primary_key!(i32, v), width = 11)), - DataValue::Int64(option) => option.map(|v| format!("{:0width$}", signed_to_primary_key!(i64, v), width = 20)), - DataValue::UInt8(option) => option.map(|v| format!("{:0width$}", v, width = 3)), - DataValue::UInt16(option) => option.map(|v| format!("{:0width$}", v, width = 5)), - DataValue::UInt32(option) => option.map(|v| format!("{:0width$}", v, width = 10)), - DataValue::UInt64(option) => option.map(|v| format!("{:0width$}", v, width = 20)), - DataValue::Utf8(option) => option.clone(), - DataValue::Date32(option) => option.map(|v| format!("{:0width$}", signed_to_primary_key!(i32, v), width = 11)), - DataValue::Date64(option) => option.map(|v| format!("{:0width$}", signed_to_primary_key!(i64, v), width = 20)), - DataValue::Boolean(option) => option.map(|b| if b { "1" } else { "0" }.to_string()), - DataValue::Float32(option) => option.map(|v| format!("{:0width$}", (unsafe { mem::transmute::(v.to_bits()) }), width = 11)), - DataValue::Float64(option) => option.map(|v| format!("{:0width$}", (unsafe { mem::transmute::(v.to_bits()) }), width = 20)), - DataValue::Decimal(option) => option.map(|v| { - let i = signed_to_primary_key!(i128, v.mantissa()); - let scale = v.scale(); - let mut string = format!("{:0width$}", i, width = 40); - - if scale != 0 { - string.insert(40 - scale as usize, '.'); + DataValue::Int8(Some(v)) => encode_u!(b, *v as u8 ^ 0x80_u8), + DataValue::Int16(Some(v)) => encode_u!(b, *v as u16 ^ 0x8000_u16), + DataValue::Int32(Some(v)) | DataValue::Date32(Some(v)) => { + encode_u!(b, *v as u32 ^ 0x80000000_u32) + }, + DataValue::Int64(Some(v)) | DataValue::Date64(Some(v)) => { + encode_u!(b, *v as u64 ^ 0x8000000000000000_u64) + }, + DataValue::UInt8(Some(v)) => encode_u!(b, v), + DataValue::UInt16(Some(v)) => encode_u!(b, v), + DataValue::UInt32(Some(v)) => encode_u!(b, v), + DataValue::UInt64(Some(v)) => encode_u!(b, v), + DataValue::Utf8(Some(v)) => b.copy_from_slice(&mut v.as_bytes()), + DataValue::Boolean(Some(v)) => b.push(if *v { b'1' } else { b'0' }), + DataValue::Float32(Some(f)) => { + let mut u = f.to_bits(); + + if *f >= 0_f32 { + u |= 0x80000000_u32; + } else { + u = !u; + } + + encode_u!(b, u); + }, + DataValue::Float64(Some(f)) => { + let mut u = f.to_bits(); + + if *f >= 0_f64 { + u |= 0x8000000000000000_u64; + } else { + u = !u; } - string - }), - _ => return Err(TypeError::InvalidType), - }.ok_or(TypeError::NotNull) + + encode_u!(b, u); + }, + DataValue::Decimal(Some(_v)) => todo!(), + value => { + return if value.is_null() { + todo!() + } else { + Err(TypeError::InvalidType) + } + }, + } + + Ok(()) } pub fn cast(self, to: &LogicalType) -> Result { @@ -900,45 +925,60 @@ impl fmt::Debug for DataValue { #[cfg(test)] mod test { - use rust_decimal::Decimal; use crate::types::errors::TypeError; use crate::types::value::DataValue; #[test] fn test_to_primary_key() -> Result<(), TypeError> { - let key_i8_1 = DataValue::Int8(Some(i8::MIN)).to_primary_key()?; - let key_i8_2 = DataValue::Int8(Some(-1_i8)).to_primary_key()?; - let key_i8_3 = DataValue::Int8(Some(i8::MAX)).to_primary_key()?; + let mut key_i8_1 = Vec::new(); + let mut key_i8_2 = Vec::new(); + let mut key_i8_3 = Vec::new(); - println!("{} < {}", key_i8_1, key_i8_2); - println!("{} < {}", key_i8_2, key_i8_3); + DataValue::Int8(Some(i8::MIN)).to_primary_key(&mut key_i8_1)?; + DataValue::Int8(Some(-1_i8)).to_primary_key(&mut key_i8_2)?; + DataValue::Int8(Some(i8::MAX)).to_primary_key(&mut key_i8_3)?; + + println!("{:?} < {:?}", key_i8_1, key_i8_2); + println!("{:?} < {:?}", key_i8_2, key_i8_3); assert!(key_i8_1 < key_i8_2); assert!(key_i8_2 < key_i8_3); - let key_i16_1 = DataValue::Int16(Some(i16::MIN)).to_primary_key()?; - let key_i16_2 = DataValue::Int16(Some(-1_i16)).to_primary_key()?; - let key_i16_3 = DataValue::Int16(Some(i16::MAX)).to_primary_key()?; + let mut key_i16_1 = Vec::new(); + let mut key_i16_2 = Vec::new(); + let mut key_i16_3 = Vec::new(); + + DataValue::Int16(Some(i16::MIN)).to_primary_key(&mut key_i16_1)?; + DataValue::Int16(Some(-1_i16)).to_primary_key(&mut key_i16_2)?; + DataValue::Int16(Some(i16::MAX)).to_primary_key(&mut key_i16_3)?; - println!("{} < {}", key_i16_1, key_i16_2); - println!("{} < {}", key_i16_2, key_i16_3); + println!("{:?} < {:?}", key_i16_1, key_i16_2); + println!("{:?} < {:?}", key_i16_2, key_i16_3); assert!(key_i16_1 < key_i16_2); assert!(key_i16_2 < key_i16_3); - let key_i32_1 = DataValue::Int32(Some(i32::MIN)).to_primary_key()?; - let key_i32_2 = DataValue::Int32(Some(-1_i32)).to_primary_key()?; - let key_i32_3 = DataValue::Int32(Some(i32::MAX)).to_primary_key()?; + let mut key_i32_1 = Vec::new(); + let mut key_i32_2 = Vec::new(); + let mut key_i32_3 = Vec::new(); - println!("{} < {}", key_i32_1, key_i32_2); - println!("{} < {}", key_i32_2, key_i32_3); + DataValue::Int32(Some(i32::MIN)).to_primary_key(&mut key_i32_1)?; + DataValue::Int32(Some(-1_i32)).to_primary_key(&mut key_i32_2)?; + DataValue::Int32(Some(i32::MAX)).to_primary_key(&mut key_i32_3)?; + + println!("{:?} < {:?}", key_i32_1, key_i32_2); + println!("{:?} < {:?}", key_i32_2, key_i32_3); assert!(key_i32_1 < key_i32_2); assert!(key_i32_2 < key_i32_3); - let key_i64_1 = DataValue::Int64(Some(i64::MIN)).to_primary_key()?; - let key_i64_2 = DataValue::Int64(Some(-1_i64)).to_primary_key()?; - let key_i64_3 = DataValue::Int64(Some(i64::MAX)).to_primary_key()?; + let mut key_i64_1 = Vec::new(); + let mut key_i64_2 = Vec::new(); + let mut key_i64_3 = Vec::new(); + + DataValue::Int64(Some(i64::MIN)).to_primary_key(&mut key_i64_1)?; + DataValue::Int64(Some(-1_i64)).to_primary_key(&mut key_i64_2)?; + DataValue::Int64(Some(i64::MAX)).to_primary_key(&mut key_i64_3)?; - println!("{} < {}", key_i64_1, key_i64_2); - println!("{} < {}", key_i64_2, key_i64_3); + println!("{:?} < {:?}", key_i64_1, key_i64_2); + println!("{:?} < {:?}", key_i64_2, key_i64_3); assert!(key_i64_1 < key_i64_2); assert!(key_i64_2 < key_i64_3); @@ -947,47 +987,32 @@ mod test { #[test] fn test_to_index_key_f() -> Result<(), TypeError> { - let key_f32_1 = DataValue::Float32(Some(f32::MIN)).to_index_key()?; - let key_f32_2 = DataValue::Float32(Some(-1_f32)).to_index_key()?; - let key_f32_3 = DataValue::Float32(Some(f32::MAX)).to_index_key()?; + let mut key_f32_1 = Vec::new(); + let mut key_f32_2 = Vec::new(); + let mut key_f32_3 = Vec::new(); - println!("{} < {}", key_f32_1, key_f32_2); - println!("{} < {}", key_f32_2, key_f32_3); + DataValue::Float32(Some(f32::MIN)).to_index_key(&mut key_f32_1)?; + DataValue::Float32(Some(-1_f32)).to_index_key(&mut key_f32_2)?; + DataValue::Float32(Some(f32::MAX)).to_index_key(&mut key_f32_3)?; + + println!("{:?} < {:?}", key_f32_1, key_f32_2); + println!("{:?} < {:?}", key_f32_2, key_f32_3); assert!(key_f32_1 < key_f32_2); assert!(key_f32_2 < key_f32_3); - let key_f64_1 = DataValue::Float64(Some(f64::MIN)).to_index_key()?; - let key_f64_2 = DataValue::Float64(Some(-1_f64)).to_index_key()?; - let key_f64_3 = DataValue::Float64(Some(f64::MAX)).to_index_key()?; + let mut key_f64_1 = Vec::new(); + let mut key_f64_2 = Vec::new(); + let mut key_f64_3 = Vec::new(); + + DataValue::Float64(Some(f64::MIN)).to_index_key(&mut key_f64_1)?; + DataValue::Float64(Some(-1_f64)).to_index_key(&mut key_f64_2)?; + DataValue::Float64(Some(f64::MAX)).to_index_key(&mut key_f64_3)?; - println!("{} < {}", key_f64_1, key_f64_2); - println!("{} < {}", key_f64_2, key_f64_3); + println!("{:?} < {:?}", key_f64_1, key_f64_2); + println!("{:?} < {:?}", key_f64_2, key_f64_3); assert!(key_f64_1 < key_f64_2); assert!(key_f64_2 < key_f64_3); Ok(()) } - - #[test] - fn test_to_index_key_d() -> Result<(), TypeError> { - let key_scale_0_1 = DataValue::Decimal(Some(Decimal::new(i64::MIN, 0))).to_index_key()?; - let key_scale_0_2 = DataValue::Decimal(Some(Decimal::new(-1_i64, 0))).to_index_key()?; - let key_scale_0_3 = DataValue::Decimal(Some(Decimal::new(i64::MAX, 0))).to_index_key()?; - - println!("{} < {}", key_scale_0_1, key_scale_0_2); - println!("{} < {}", key_scale_0_2, key_scale_0_3); - assert!(key_scale_0_1 < key_scale_0_2); - assert!(key_scale_0_2 < key_scale_0_3); - - let key_scale_10_1 = DataValue::Decimal(Some(Decimal::new(i64::MIN, 10))).to_index_key()?; - let key_scale_10_2 = DataValue::Decimal(Some(Decimal::new(-1_i64, 10))).to_index_key()?; - let key_scale_10_3 = DataValue::Decimal(Some(Decimal::new(i64::MAX, 10))).to_index_key()?; - - println!("{} < {}", key_scale_10_1, key_scale_10_2); - println!("{} < {}", key_scale_10_2, key_scale_10_3); - assert!(key_scale_10_1 < key_scale_10_2); - assert!(key_scale_10_2 < key_scale_10_3); - - Ok(()) - } }