From ba036dba6b5ac13632322e5738b81985b47e3618 Mon Sep 17 00:00:00 2001 From: Kould <2435992353@qq.com> Date: Wed, 27 Mar 2024 22:41:13 +0800 Subject: [PATCH 1/5] feat: support `Octets` for `Char/Varchar` --- Cargo.toml | 2 +- src/binder/create_table.rs | 3 +- src/binder/expr.rs | 13 +- src/catalog/column.rs | 3 +- src/execution/volcano/dml/analyze.rs | 4 +- src/execution/volcano/dml/copy_from_file.rs | 3 +- src/execution/volcano/dql/describe.rs | 25 +- src/execution/volcano/dql/explain.rs | 4 +- src/execution/volcano/dql/show_table.rs | 4 +- src/expression/evaluator.rs | 17 +- src/expression/mod.rs | 4 +- src/expression/value_compute.rs | 77 ++++-- src/marcos/mod.rs | 9 +- src/optimizer/core/histogram.rs | 2 +- .../rule/normalization/column_pruning.rs | 4 +- src/storage/table_codec.rs | 2 +- src/types/mod.rs | 69 +++--- src/types/tuple.rs | 47 +++- src/types/tuple_builder.rs | 7 +- src/types/value.rs | 225 ++++++++++++------ tests/slt/char.slt | 47 ++++ tests/slt/sql_2016/E021_01.slt | 10 +- tests/slt/sql_2016/E021_02.slt | 15 +- 23 files changed, 397 insertions(+), 199 deletions(-) create mode 100644 tests/slt/char.slt diff --git a/Cargo.toml b/Cargo.toml index 6d8125f0..5f854b1c 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -58,7 +58,7 @@ regex = { version = "1.10.3" } rust_decimal = { version = "1.34.3" } serde = { version = "1.0.197", features = ["derive", "rc"] } siphasher = { version = "1.0.0", features = ["serde"] } -sqlparser = { version = "0.34.0" } +sqlparser = { version = "0.34.0", features = ["serde"] } strum_macros = { version = "0.26.2" } thiserror = { version = "1.0.58" } tokio = { version = "1.36.0", features = ["full"] } diff --git a/src/binder/create_table.rs b/src/binder/create_table.rs index 70717fcc..96f87f11 100644 --- a/src/binder/create_table.rs +++ b/src/binder/create_table.rs @@ -147,6 +147,7 @@ mod tests { use crate::storage::Storage; use crate::types::LogicalType; use std::sync::atomic::AtomicUsize; + use sqlparser::ast::CharLengthUnits; use tempfile::TempDir; #[tokio::test] @@ -177,7 +178,7 @@ mod tests { assert_eq!(op.columns[1].nullable, true); assert_eq!( op.columns[1].desc, - ColumnDesc::new(LogicalType::Varchar(Some(10)), false, false, None) + ColumnDesc::new(LogicalType::Varchar(Some(10), CharLengthUnits::Characters), false, false, None) ); } _ => unreachable!(), diff --git a/src/binder/expr.rs b/src/binder/expr.rs index 3e8e0275..18530e29 100644 --- a/src/binder/expr.rs +++ b/src/binder/expr.rs @@ -3,10 +3,7 @@ use crate::errors::DatabaseError; use crate::expression; use crate::expression::agg::AggKind; use itertools::Itertools; -use sqlparser::ast::{ - BinaryOperator, DataType, Expr, Function, FunctionArg, FunctionArgExpr, Ident, Query, - UnaryOperator, -}; +use sqlparser::ast::{BinaryOperator, CharLengthUnits, DataType, Expr, Function, FunctionArg, FunctionArgExpr, Ident, Query, UnaryOperator}; use std::slice; use std::sync::Arc; @@ -69,7 +66,8 @@ impl<'a, T: Transaction> Binder<'a, T> { let logical_type = LogicalType::try_from(data_type.clone())?; let value = DataValue::Utf8 { value: Some(value.to_string()), - ty: Utf8Type::Variable, + ty: Utf8Type::Variable(None), + unit: CharLengthUnits::Characters, } .cast(&logical_type)?; @@ -354,7 +352,7 @@ impl<'a, T: Transaction> Binder<'a, T> { | BinaryOperator::And | BinaryOperator::Or | BinaryOperator::Xor => LogicalType::Boolean, - BinaryOperator::StringConcat => LogicalType::Varchar(None), + BinaryOperator::StringConcat => LogicalType::Varchar(None, CharLengthUnits::Characters), _ => todo!(), }; @@ -603,7 +601,8 @@ impl<'a, T: Transaction> Binder<'a, T> { fn wildcard_expr() -> ScalarExpression { ScalarExpression::Constant(Arc::new(DataValue::Utf8 { value: Some("*".to_string()), - ty: Utf8Type::Variable, + ty: Utf8Type::Variable(None), + unit: CharLengthUnits::Characters, })) } } diff --git a/src/catalog/column.rs b/src/catalog/column.rs index 32d7653f..15f31908 100644 --- a/src/catalog/column.rs +++ b/src/catalog/column.rs @@ -4,6 +4,7 @@ use crate::expression::ScalarExpression; use serde::{Deserialize, Serialize}; use std::hash::Hash; use std::sync::Arc; +use sqlparser::ast::CharLengthUnits; use crate::types::tuple::EMPTY_TUPLE; use crate::types::value::ValueRef; @@ -50,7 +51,7 @@ impl ColumnCatalog { table_name: None, }, nullable: true, - desc: ColumnDesc::new(LogicalType::Varchar(None), false, false, None), + desc: ColumnDesc::new(LogicalType::Varchar(None, CharLengthUnits::Characters), false, false, None), } } diff --git a/src/execution/volcano/dml/analyze.rs b/src/execution/volcano/dml/analyze.rs index ae8b6a8f..be7b9a08 100644 --- a/src/execution/volcano/dml/analyze.rs +++ b/src/execution/volcano/dml/analyze.rs @@ -16,6 +16,7 @@ use std::fmt::Formatter; use std::sync::Arc; use std::time::{SystemTime, UNIX_EPOCH}; use std::{fmt, fs}; +use sqlparser::ast::CharLengthUnits; const DEFAULT_NUM_OF_BUCKETS: usize = 100; const DEFAULT_STATISTICS_META_PATH: &str = "fnck_sql_statistics_metas"; @@ -108,7 +109,8 @@ impl Analyze { meta.to_file(&path)?; values.push(Arc::new(DataValue::Utf8 { value: Some(path.clone()), - ty: Utf8Type::Variable, + ty: Utf8Type::Variable(None), + unit: CharLengthUnits::Characters })); transaction.save_table_meta(&table_name, path, meta)?; } diff --git a/src/execution/volcano/dml/copy_from_file.rs b/src/execution/volcano/dml/copy_from_file.rs index 935ebc46..cb9a3d60 100644 --- a/src/execution/volcano/dml/copy_from_file.rs +++ b/src/execution/volcano/dml/copy_from_file.rs @@ -107,6 +107,7 @@ mod tests { use futures::StreamExt; use std::io::Write; use std::sync::Arc; + use sqlparser::ast::CharLengthUnits; use tempfile::TempDir; use super::*; @@ -148,7 +149,7 @@ mod tests { table_name: None, }, nullable: false, - desc: ColumnDesc::new(LogicalType::Varchar(Some(10)), false, false, None), + desc: ColumnDesc::new(LogicalType::Varchar(Some(10), CharLengthUnits::Characters), false, false, None), }), ]; diff --git a/src/execution/volcano/dql/describe.rs b/src/execution/volcano/dql/describe.rs index fb27ce64..75279a95 100644 --- a/src/execution/volcano/dql/describe.rs +++ b/src/execution/volcano/dql/describe.rs @@ -8,19 +8,23 @@ use crate::types::value::{DataValue, Utf8Type, ValueRef}; use futures_async_stream::try_stream; use lazy_static::lazy_static; use std::sync::Arc; +use sqlparser::ast::CharLengthUnits; lazy_static! { static ref PRIMARY_KEY_TYPE: ValueRef = Arc::new(DataValue::Utf8 { value: Some(String::from("PRIMARY")), - ty: Utf8Type::Variable + ty: Utf8Type::Variable(None), + unit: CharLengthUnits::Characters }); static ref UNIQUE_KEY_TYPE: ValueRef = Arc::new(DataValue::Utf8 { value: Some(String::from("UNIQUE")), - ty: Utf8Type::Variable + ty: Utf8Type::Variable(None), + unit: CharLengthUnits::Characters }); static ref EMPTY_KEY_TYPE: ValueRef = Arc::new(DataValue::Utf8 { value: Some(String::from("EMPTY")), - ty: Utf8Type::Variable + ty: Utf8Type::Variable(None), + unit: CharLengthUnits::Characters }); } @@ -69,24 +73,29 @@ impl Describe { let values = vec![ Arc::new(DataValue::Utf8 { value: Some(column.name().to_string()), - ty: Utf8Type::Variable, + ty: Utf8Type::Variable(None), + unit: CharLengthUnits::Characters, }), Arc::new(DataValue::Utf8 { value: Some(datatype.to_string()), - ty: Utf8Type::Variable, + ty: Utf8Type::Variable(None), + unit: CharLengthUnits::Characters, }), Arc::new(DataValue::Utf8 { value: datatype.raw_len().map(|len| len.to_string()), - ty: Utf8Type::Variable, + ty: Utf8Type::Variable(None), + unit: CharLengthUnits::Characters, }), Arc::new(DataValue::Utf8 { value: Some(column.nullable.to_string()), - ty: Utf8Type::Variable, + ty: Utf8Type::Variable(None), + unit: CharLengthUnits::Characters, }), key_fn(column), Arc::new(DataValue::Utf8 { value: Some(default), - ty: Utf8Type::Variable, + ty: Utf8Type::Variable(None), + unit: CharLengthUnits::Characters, }), ]; yield Tuple { id: None, values }; diff --git a/src/execution/volcano/dql/explain.rs b/src/execution/volcano/dql/explain.rs index f1dbb3ca..6be7fd3e 100644 --- a/src/execution/volcano/dql/explain.rs +++ b/src/execution/volcano/dql/explain.rs @@ -6,6 +6,7 @@ use crate::types::tuple::Tuple; use crate::types::value::{DataValue, Utf8Type}; use futures_async_stream::try_stream; use std::sync::Arc; +use sqlparser::ast::CharLengthUnits; pub struct Explain { plan: LogicalPlan, @@ -28,7 +29,8 @@ impl Explain { pub async fn _execute(self) { let values = vec![Arc::new(DataValue::Utf8 { value: Some(self.plan.explain(0)), - ty: Utf8Type::Variable, + ty: Utf8Type::Variable(None), + unit: CharLengthUnits::Characters })]; yield Tuple { id: None, values }; diff --git a/src/execution/volcano/dql/show_table.rs b/src/execution/volcano/dql/show_table.rs index f64b6dd1..3ba7af6f 100644 --- a/src/execution/volcano/dql/show_table.rs +++ b/src/execution/volcano/dql/show_table.rs @@ -6,6 +6,7 @@ use crate::types::tuple::Tuple; use crate::types::value::{DataValue, Utf8Type}; use futures_async_stream::try_stream; use std::sync::Arc; +use sqlparser::ast::CharLengthUnits; pub struct ShowTables; @@ -23,7 +24,8 @@ impl ShowTables { for TableMeta { table_name } in metas { let values = vec![Arc::new(DataValue::Utf8 { value: Some(table_name.to_string()), - ty: Utf8Type::Variable, + ty: Utf8Type::Variable(None), + unit: CharLengthUnits::Characters })]; yield Tuple { id: None, values }; diff --git a/src/expression/evaluator.rs b/src/expression/evaluator.rs index f97a1d4c..37437ee8 100644 --- a/src/expression/evaluator.rs +++ b/src/expression/evaluator.rs @@ -10,6 +10,7 @@ use lazy_static::lazy_static; use std::cmp; use std::cmp::Ordering; use std::sync::Arc; +use sqlparser::ast::CharLengthUnits; lazy_static! { static ref NULL_VALUE: ValueRef = Arc::new(DataValue::Null); @@ -25,7 +26,8 @@ macro_rules! eval_to_num { } else { return Ok(Arc::new(DataValue::Utf8 { value: None, - ty: Utf8Type::Variable, + ty: Utf8Type::Variable(None), + unit: CharLengthUnits::Characters })); } }; @@ -156,7 +158,7 @@ impl ScalarExpression { from_expr, } => { if let Some(mut string) = DataValue::clone(expr.eval(tuple, schema)?.as_ref()) - .cast(&LogicalType::Varchar(None))? + .cast(&LogicalType::Varchar(None, CharLengthUnits::Characters))? .utf8() { if let Some(from_expr) = from_expr { @@ -169,7 +171,8 @@ impl ScalarExpression { if from > len_i { return Ok(Arc::new(DataValue::Utf8 { value: None, - ty: Utf8Type::Variable, + ty: Utf8Type::Variable(None), + unit: CharLengthUnits::Characters, })); } string = string.split_off(from as usize); @@ -182,19 +185,21 @@ impl ScalarExpression { Ok(Arc::new(DataValue::Utf8 { value: Some(string), - ty: Utf8Type::Variable, + ty: Utf8Type::Variable(None), + unit: CharLengthUnits::Characters, })) } else { Ok(Arc::new(DataValue::Utf8 { value: None, - ty: Utf8Type::Variable, + ty: Utf8Type::Variable(None), + unit: CharLengthUnits::Characters, })) } } ScalarExpression::Position { expr, in_expr } => { let unpack = |expr: &ScalarExpression| -> Result { Ok(DataValue::clone(expr.eval(tuple, schema)?.as_ref()) - .cast(&LogicalType::Varchar(None))? + .cast(&LogicalType::Varchar(None, CharLengthUnits::Characters))? .utf8() .unwrap_or("".to_owned())) }; diff --git a/src/expression/mod.rs b/src/expression/mod.rs index ab97c43c..db5f749a 100644 --- a/src/expression/mod.rs +++ b/src/expression/mod.rs @@ -5,7 +5,7 @@ use std::hash::Hash; use std::sync::Arc; use std::{fmt, mem}; -use sqlparser::ast::{BinaryOperator as SqlBinaryOperator, UnaryOperator as SqlUnaryOperator}; +use sqlparser::ast::{BinaryOperator as SqlBinaryOperator, CharLengthUnits, UnaryOperator as SqlUnaryOperator}; use self::agg::AggKind; use crate::catalog::{ColumnCatalog, ColumnDesc, ColumnRef}; @@ -394,7 +394,7 @@ impl ScalarExpression { ScalarExpression::IsNull { .. } | ScalarExpression::In { .. } | ScalarExpression::Between { .. } => LogicalType::Boolean, - ScalarExpression::SubString { .. } => LogicalType::Varchar(None), + ScalarExpression::SubString { .. } => LogicalType::Varchar(None, CharLengthUnits::Characters), ScalarExpression::Position { .. } => LogicalType::Integer, ScalarExpression::Alias { expr, .. } | ScalarExpression::Reference { expr, .. } => { expr.return_type() diff --git a/src/expression/value_compute.rs b/src/expression/value_compute.rs index 3ceb7031..11f7363b 100644 --- a/src/expression/value_compute.rs +++ b/src/expression/value_compute.rs @@ -4,6 +4,7 @@ use crate::types::value::{DataValue, Utf8Type, ValueRef}; use crate::types::LogicalType; use regex::Regex; use std::cmp::Ordering; +use sqlparser::ast::CharLengthUnits; fn unpack_bool(value: DataValue) -> Option { match value { @@ -193,8 +194,8 @@ impl DataValue { op: &BinaryOperator, ) -> Result { if let BinaryOperator::Like(escape_char) | BinaryOperator::NotLike(escape_char) = op { - let value_option = unpack_utf8(self.clone().cast(&LogicalType::Varchar(None))?); - let pattern_option = unpack_utf8(right.clone().cast(&LogicalType::Varchar(None))?); + let value_option = unpack_utf8(self.clone().cast(&LogicalType::Varchar(None, CharLengthUnits::Characters))?); + let pattern_option = unpack_utf8(right.clone().cast(&LogicalType::Varchar(None, CharLengthUnits::Characters))?); let mut is_match = if let (Some(value), Some(pattern)) = (value_option, pattern_option) { @@ -511,7 +512,7 @@ impl DataValue { _ => return Err(DatabaseError::UnsupportedBinaryOperator(unified_type, *op)), } } - LogicalType::Varchar(_) | LogicalType::Char(_) => { + LogicalType::Varchar(_, _) | LogicalType::Char(_, _) => { let left_value = unpack_utf8(self.clone().cast(&unified_type)?); let right_value = unpack_utf8(right.clone().cast(&unified_type)?); @@ -576,7 +577,8 @@ impl DataValue { DataValue::Utf8 { value, - ty: Utf8Type::Variable, + ty: Utf8Type::Variable(None), + unit: CharLengthUnits::Characters, } } _ => return Err(DatabaseError::UnsupportedBinaryOperator(unified_type, *op)), @@ -649,6 +651,7 @@ impl DataValue { #[cfg(test)] mod test { + use sqlparser::ast::CharLengthUnits; use crate::errors::DatabaseError; use crate::expression::BinaryOperator; use crate::types::value::{DataValue, Utf8Type}; @@ -1544,11 +1547,13 @@ mod test { DataValue::binary_op( &DataValue::Utf8 { value: Some("a".to_string()), - ty: Utf8Type::Variable + ty: Utf8Type::Variable(None), + unit: CharLengthUnits::Characters, }, &DataValue::Utf8 { value: Some("b".to_string()), - ty: Utf8Type::Variable + ty: Utf8Type::Variable(None), + unit: CharLengthUnits::Characters, }, &BinaryOperator::Gt )?, @@ -1558,11 +1563,13 @@ mod test { DataValue::binary_op( &DataValue::Utf8 { value: Some("a".to_string()), - ty: Utf8Type::Variable + ty: Utf8Type::Variable(None), + unit: CharLengthUnits::Characters, }, &DataValue::Utf8 { value: Some("b".to_string()), - ty: Utf8Type::Variable + ty: Utf8Type::Variable(None), + unit: CharLengthUnits::Characters, }, &BinaryOperator::Lt )?, @@ -1572,11 +1579,13 @@ mod test { DataValue::binary_op( &DataValue::Utf8 { value: Some("a".to_string()), - ty: Utf8Type::Variable + ty: Utf8Type::Variable(None), + unit: CharLengthUnits::Characters, }, &DataValue::Utf8 { value: Some("a".to_string()), - ty: Utf8Type::Variable + ty: Utf8Type::Variable(None), + unit: CharLengthUnits::Characters, }, &BinaryOperator::GtEq )?, @@ -1586,11 +1595,13 @@ mod test { DataValue::binary_op( &DataValue::Utf8 { value: Some("a".to_string()), - ty: Utf8Type::Variable + ty: Utf8Type::Variable(None), + unit: CharLengthUnits::Characters, }, &DataValue::Utf8 { value: Some("a".to_string()), - ty: Utf8Type::Variable + ty: Utf8Type::Variable(None), + unit: CharLengthUnits::Characters, }, &BinaryOperator::LtEq )?, @@ -1600,11 +1611,13 @@ mod test { DataValue::binary_op( &DataValue::Utf8 { value: Some("a".to_string()), - ty: Utf8Type::Variable + ty: Utf8Type::Variable(None), + unit: CharLengthUnits::Characters, }, &DataValue::Utf8 { value: Some("a".to_string()), - ty: Utf8Type::Variable + ty: Utf8Type::Variable(None), + unit: CharLengthUnits::Characters, }, &BinaryOperator::NotEq )?, @@ -1614,11 +1627,13 @@ mod test { DataValue::binary_op( &DataValue::Utf8 { value: Some("a".to_string()), - ty: Utf8Type::Variable + ty: Utf8Type::Variable(None), + unit: CharLengthUnits::Characters, }, &DataValue::Utf8 { value: Some("a".to_string()), - ty: Utf8Type::Variable + ty: Utf8Type::Variable(None), + unit: CharLengthUnits::Characters, }, &BinaryOperator::Eq )?, @@ -1629,11 +1644,13 @@ mod test { DataValue::binary_op( &DataValue::Utf8 { value: None, - ty: Utf8Type::Variable + ty: Utf8Type::Variable(None), + unit: CharLengthUnits::Characters, }, &DataValue::Utf8 { value: Some("a".to_string()), - ty: Utf8Type::Variable + ty: Utf8Type::Variable(None), + unit: CharLengthUnits::Characters, }, &BinaryOperator::Gt )?, @@ -1643,11 +1660,13 @@ mod test { DataValue::binary_op( &DataValue::Utf8 { value: None, - ty: Utf8Type::Variable + ty: Utf8Type::Variable(None), + unit: CharLengthUnits::Characters, }, &DataValue::Utf8 { value: Some("a".to_string()), - ty: Utf8Type::Variable + ty: Utf8Type::Variable(None), + unit: CharLengthUnits::Characters, }, &BinaryOperator::Lt )?, @@ -1657,11 +1676,13 @@ mod test { DataValue::binary_op( &DataValue::Utf8 { value: None, - ty: Utf8Type::Variable + ty: Utf8Type::Variable(None), + unit: CharLengthUnits::Characters, }, &DataValue::Utf8 { value: Some("a".to_string()), - ty: Utf8Type::Variable + ty: Utf8Type::Variable(None), + unit: CharLengthUnits::Characters, }, &BinaryOperator::GtEq )?, @@ -1671,11 +1692,13 @@ mod test { DataValue::binary_op( &DataValue::Utf8 { value: None, - ty: Utf8Type::Variable + ty: Utf8Type::Variable(None), + unit: CharLengthUnits::Characters, }, &DataValue::Utf8 { value: Some("a".to_string()), - ty: Utf8Type::Variable + ty: Utf8Type::Variable(None), + unit: CharLengthUnits::Characters, }, &BinaryOperator::LtEq )?, @@ -1685,11 +1708,13 @@ mod test { DataValue::binary_op( &DataValue::Utf8 { value: None, - ty: Utf8Type::Variable + ty: Utf8Type::Variable(None), + unit: CharLengthUnits::Characters, }, &DataValue::Utf8 { value: Some("a".to_string()), - ty: Utf8Type::Variable + ty: Utf8Type::Variable(None), + unit: CharLengthUnits::Characters, }, &BinaryOperator::NotEq )?, diff --git a/src/marcos/mod.rs b/src/marcos/mod.rs index 39332a1d..f82ddd67 100644 --- a/src/marcos/mod.rs +++ b/src/marcos/mod.rs @@ -138,6 +138,7 @@ mod test { use serde::Deserialize; use serde::Serialize; use std::sync::Arc; + use sqlparser::ast::CharLengthUnits; fn build_tuple() -> (Tuple, SchemaRef) { let schema_ref = Arc::new(vec![ @@ -149,14 +150,15 @@ mod test { Arc::new(ColumnCatalog::new( "c2".to_string(), false, - ColumnDesc::new(LogicalType::Varchar(None), false, false, None), + ColumnDesc::new(LogicalType::Varchar(None, CharLengthUnits::Characters), false, false, None), )), ]); let values = vec![ Arc::new(DataValue::Int32(Some(9))), Arc::new(DataValue::Utf8 { value: Some("LOL".to_string()), - ty: Utf8Type::Variable, + ty: Utf8Type::Variable(None), + unit: CharLengthUnits::Characters, }), ]; @@ -207,7 +209,8 @@ mod test { ScalarExpression::Constant(Arc::new(DataValue::Int8(Some(1)))), ScalarExpression::Constant(Arc::new(DataValue::Utf8 { value: Some("1".to_string()), - ty: Utf8Type::Variable, + ty: Utf8Type::Variable(None), + unit: CharLengthUnits::Characters, })), ], &Tuple { diff --git a/src/optimizer/core/histogram.rs b/src/optimizer/core/histogram.rs index 17cdf193..a103565b 100644 --- a/src/optimizer/core/histogram.rs +++ b/src/optimizer/core/histogram.rs @@ -255,7 +255,7 @@ impl Histogram { ) -> Result { let float_value = |value: &DataValue, prefix_len: usize| { let value = match value.logical_type() { - LogicalType::Varchar(_) | LogicalType::Char(_) => match value { + LogicalType::Varchar(_, ..) | LogicalType::Char(_, ..) => match value { DataValue::Utf8 { value, .. } => value.as_ref().map(|string| { if prefix_len > string.len() { return 0.0; diff --git a/src/optimizer/rule/normalization/column_pruning.rs b/src/optimizer/rule/normalization/column_pruning.rs index e48e98e1..1322a669 100644 --- a/src/optimizer/rule/normalization/column_pruning.rs +++ b/src/optimizer/rule/normalization/column_pruning.rs @@ -12,6 +12,7 @@ use itertools::Itertools; use lazy_static::lazy_static; use std::collections::HashSet; use std::sync::Arc; +use sqlparser::ast::CharLengthUnits; lazy_static! { static ref COLUMN_PRUNING_RULE: Pattern = { @@ -63,7 +64,8 @@ impl ColumnPruning { if op.agg_calls.is_empty() && op.groupby_exprs.is_empty() { let value = Arc::new(DataValue::Utf8 { value: Some("*".to_string()), - ty: Utf8Type::Variable, + ty: Utf8Type::Variable(None), + unit: CharLengthUnits::Characters, }); // only single COUNT(*) is not depend on any column // removed all expressions from the aggregate: push a COUNT(*) diff --git a/src/storage/table_codec.rs b/src/storage/table_codec.rs index 3994e489..fc05b7d5 100644 --- a/src/storage/table_codec.rs +++ b/src/storage/table_codec.rs @@ -172,7 +172,7 @@ impl TableCodec { | LogicalType::USmallint | LogicalType::UInteger | LogicalType::UBigint - | LogicalType::Varchar(_) + | LogicalType::Varchar(_, ..) ) { return Err(DatabaseError::InvalidType); } diff --git a/src/types/mod.rs b/src/types/mod.rs index 209dcda6..8e9e05f7 100644 --- a/src/types/mod.rs +++ b/src/types/mod.rs @@ -10,7 +10,7 @@ use std::any::TypeId; use std::cmp; use crate::errors::DatabaseError; -use sqlparser::ast::{CharLengthUnits, CharacterLength, ExactNumberInfo, TimezoneInfo}; +use sqlparser::ast::{CharLengthUnits, ExactNumberInfo, TimezoneInfo}; use strum_macros::AsRefStr; pub type ColumnId = u32; @@ -34,8 +34,8 @@ pub enum LogicalType { UBigint, Float, Double, - Char(u32), - Varchar(Option), + Char(u32, CharLengthUnits), + Varchar(Option, CharLengthUnits), Date, DateTime, Time, @@ -75,7 +75,7 @@ impl LogicalType { } else if type_id == TypeId::of::() { Some(LogicalType::Decimal(None, None)) } else if type_id == TypeId::of::() { - Some(LogicalType::Varchar(None)) + Some(LogicalType::Varchar(None, CharLengthUnits::Characters)) } else { None } @@ -96,8 +96,11 @@ impl LogicalType { LogicalType::Float => Some(4), LogicalType::Double => Some(8), /// Note: The non-fixed length type's raw_len is None e.g. Varchar - LogicalType::Varchar(_) => None, - LogicalType::Char(len) => Some(*len as usize), + LogicalType::Varchar(_, _) => None, + LogicalType::Char(len, unit) => match unit { + CharLengthUnits::Characters => None, + CharLengthUnits::Octets => Some(*len as usize) + }, LogicalType::Decimal(_, _) => Some(16), LogicalType::Date => Some(4), LogicalType::DateTime => Some(8), @@ -179,8 +182,8 @@ impl LogicalType { } if matches!( (left, right), - (LogicalType::Date, LogicalType::Varchar(_)) - | (LogicalType::Varchar(_), LogicalType::Date) + (LogicalType::Date, LogicalType::Varchar(_, ..)) + | (LogicalType::Varchar(_, ..), LogicalType::Date) ) { return Ok(LogicalType::Date); } @@ -192,15 +195,15 @@ impl LogicalType { } if matches!( (left, right), - (LogicalType::DateTime, LogicalType::Varchar(_)) - | (LogicalType::Varchar(_), LogicalType::DateTime) + (LogicalType::DateTime, LogicalType::Varchar(_, ..)) + | (LogicalType::Varchar(_, ..), LogicalType::DateTime) ) { return Ok(LogicalType::DateTime); } - if let (LogicalType::Char(_), LogicalType::Varchar(len)) - | (LogicalType::Varchar(len), LogicalType::Char(_)) = (left, right) + if let (LogicalType::Char(_, ..), LogicalType::Varchar(len, ..)) + | (LogicalType::Varchar(len, ..), LogicalType::Char(_, ..)) = (left, right) { - return Ok(LogicalType::Varchar(*len)); + return Ok(LogicalType::Varchar(*len, CharLengthUnits::Characters)); } Err(DatabaseError::Incomparable(*left, *right)) } @@ -296,20 +299,20 @@ impl LogicalType { LogicalType::UBigint => matches!(to, LogicalType::Float | LogicalType::Double), LogicalType::Float => matches!(to, LogicalType::Double), LogicalType::Double => false, - LogicalType::Char(_) => false, - LogicalType::Varchar(_) => false, + LogicalType::Char(_, ..) => false, + LogicalType::Varchar(_, ..) => false, LogicalType::Date => matches!( to, - LogicalType::DateTime | LogicalType::Varchar(_) | LogicalType::Char(_) + LogicalType::DateTime | LogicalType::Varchar(_, ..) | LogicalType::Char(_, ..) ), LogicalType::DateTime => matches!( to, LogicalType::Date | LogicalType::Time - | LogicalType::Varchar(_) - | LogicalType::Char(_) + | LogicalType::Varchar(_, ..) + | LogicalType::Char(_, ..) ), - LogicalType::Time => matches!(to, LogicalType::Varchar(_) | LogicalType::Char(_)), + LogicalType::Time => matches!(to, LogicalType::Varchar(_, ..) | LogicalType::Char(_, ..)), LogicalType::Decimal(_, _) | LogicalType::Tuple => false, } } @@ -324,31 +327,23 @@ impl TryFrom for LogicalType { sqlparser::ast::DataType::Char(char_len) | sqlparser::ast::DataType::Character(char_len) => { let mut len = 1; - if let Some(CharacterLength { length, unit }) = char_len { - if matches!(unit, Some(CharLengthUnits::Octets)) { - return Err(DatabaseError::UnsupportedStmt(format!( - "char unit: {:?}", - unit - ))); - } - len = cmp::max(len, length) + let mut char_unit = None; + if let Some(sqlparser::ast::CharacterLength { length, unit }) = char_len { + len = cmp::max(len, length); + char_unit = unit; } - Ok(LogicalType::Char(len as u32)) + Ok(LogicalType::Char(len as u32, char_unit.unwrap_or(CharLengthUnits::Characters))) } sqlparser::ast::DataType::CharVarying(varchar_len) | sqlparser::ast::DataType::CharacterVarying(varchar_len) | sqlparser::ast::DataType::Varchar(varchar_len) => { let mut len = None; - if let Some(CharacterLength { length, unit }) = varchar_len { - if matches!(unit, Some(CharLengthUnits::Octets)) { - return Err(DatabaseError::UnsupportedStmt(format!( - "char unit: {:?}", - unit - ))); - } - len = Some(length as u32) + let mut char_unit = None; + if let Some(sqlparser::ast::CharacterLength { length, unit }) = varchar_len { + len = Some(length as u32); + char_unit = unit; } - Ok(LogicalType::Varchar(len)) + Ok(LogicalType::Varchar(len, char_unit.unwrap_or(CharLengthUnits::Characters))) } sqlparser::ast::DataType::Float(_) => Ok(LogicalType::Float), sqlparser::ast::DataType::Double | sqlparser::ast::DataType::DoublePrecision => { diff --git a/src/types/tuple.rs b/src/types/tuple.rs index dff31ffb..97d5cd5d 100644 --- a/src/types/tuple.rs +++ b/src/types/tuple.rs @@ -176,6 +176,7 @@ mod tests { use itertools::Itertools; use rust_decimal::Decimal; use std::sync::Arc; + use sqlparser::ast::CharLengthUnits; #[test] fn test_tuple_serialize_to_and_deserialize_from() { @@ -193,7 +194,7 @@ mod tests { Arc::new(ColumnCatalog::new( "c3".to_string(), false, - ColumnDesc::new(LogicalType::Varchar(Some(2)), false, false, None), + ColumnDesc::new(LogicalType::Varchar(Some(2), CharLengthUnits::Characters), false, false, None), )), Arc::new(ColumnCatalog::new( "c4".to_string(), @@ -248,7 +249,17 @@ mod tests { Arc::new(ColumnCatalog::new( "c14".to_string(), false, - ColumnDesc::new(LogicalType::Char(1), false, false, None), + ColumnDesc::new(LogicalType::Char(1, CharLengthUnits::Characters), false, false, None), + )), + Arc::new(ColumnCatalog::new( + "c15".to_string(), + false, + ColumnDesc::new(LogicalType::Varchar(Some(2), CharLengthUnits::Octets), false, false, None), + )), + Arc::new(ColumnCatalog::new( + "c16".to_string(), + false, + ColumnDesc::new(LogicalType::Char(1, CharLengthUnits::Octets), false, false, None), )), ]); @@ -260,7 +271,8 @@ mod tests { Arc::new(DataValue::UInt32(Some(1))), Arc::new(DataValue::Utf8 { value: Some("LOL".to_string()), - ty: Utf8Type::Variable, + ty: Utf8Type::Variable(Some(2)), + unit: CharLengthUnits::Characters, }), Arc::new(DataValue::Int16(Some(1))), Arc::new(DataValue::UInt16(Some(1))), @@ -275,6 +287,17 @@ mod tests { Arc::new(DataValue::Utf8 { value: Some("K".to_string()), ty: Utf8Type::Fixed(1), + unit: CharLengthUnits::Characters, + }), + Arc::new(DataValue::Utf8 { + value: Some("LOL".to_string()), + ty: Utf8Type::Variable(Some(2)), + unit: CharLengthUnits::Octets, + }), + Arc::new(DataValue::Utf8 { + value: Some("K".to_string()), + ty: Utf8Type::Fixed(1), + unit: CharLengthUnits::Octets, }), ], }, @@ -285,7 +308,8 @@ mod tests { Arc::new(DataValue::UInt32(None)), Arc::new(DataValue::Utf8 { value: None, - ty: Utf8Type::Variable, + ty: Utf8Type::Variable(Some(2)), + unit: CharLengthUnits::Characters, }), Arc::new(DataValue::Int16(None)), Arc::new(DataValue::UInt16(None)), @@ -300,6 +324,17 @@ mod tests { Arc::new(DataValue::Utf8 { value: None, ty: Utf8Type::Fixed(1), + unit: CharLengthUnits::Characters, + }), + Arc::new(DataValue::Utf8 { + value: None, + ty: Utf8Type::Variable(Some(2)), + unit: CharLengthUnits::Octets, + }), + Arc::new(DataValue::Utf8 { + value: None, + ty: Utf8Type::Fixed(1), + unit: CharLengthUnits::Octets, }), ], }, @@ -312,13 +347,13 @@ mod tests { let tuple_0 = Tuple::deserialize_from( &types, - &[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13], + &[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15], &columns, &tuples[0].serialize_to(&types).unwrap(), ); let tuple_1 = Tuple::deserialize_from( &types, - &[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13], + &[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15], &columns, &tuples[1].serialize_to(&types).unwrap(), ); diff --git a/src/types/tuple_builder.rs b/src/types/tuple_builder.rs index d9763a87..95c16d33 100644 --- a/src/types/tuple_builder.rs +++ b/src/types/tuple_builder.rs @@ -2,6 +2,7 @@ use crate::errors::DatabaseError; use crate::types::tuple::{Schema, Tuple}; use crate::types::value::{DataValue, Utf8Type}; use std::sync::Arc; +use sqlparser::ast::CharLengthUnits; pub struct TupleBuilder<'a> { schema: &'a Schema, @@ -15,7 +16,8 @@ impl<'a> TupleBuilder<'a> { pub fn build_result(message: String) -> Tuple { let values = vec![Arc::new(DataValue::Utf8 { value: Some(message), - ty: Utf8Type::Variable, + ty: Utf8Type::Variable(None), + unit: CharLengthUnits::Characters, })]; Tuple { id: None, values } @@ -32,7 +34,8 @@ impl<'a> TupleBuilder<'a> { let data_value = Arc::new( DataValue::Utf8 { value: Some(value.to_string()), - ty: Utf8Type::Variable, + ty: Utf8Type::Variable(None), + unit: CharLengthUnits::Characters, } .cast(self.schema[i].datatype())?, ); diff --git a/src/types/value.rs b/src/types/value.rs index 9e4300d0..9dac3b66 100644 --- a/src/types/value.rs +++ b/src/types/value.rs @@ -14,6 +14,7 @@ use crate::errors::DatabaseError; use ordered_float::OrderedFloat; use rust_decimal::prelude::{FromPrimitive, ToPrimitive}; use serde::{Deserialize, Serialize}; +use sqlparser::ast::{CharLengthUnits}; use super::LogicalType; @@ -34,7 +35,7 @@ pub type ValueRef = Arc; #[derive(Clone, Serialize, Deserialize)] pub enum Utf8Type { - Variable, + Variable(Option), Fixed(u32), } @@ -55,6 +56,7 @@ pub enum DataValue { Utf8 { value: Option, ty: Utf8Type, + unit: CharLengthUnits, }, /// Date stored as a signed 32bit int days since UNIX epoch 1970-01-01 Date32(Option), @@ -248,23 +250,25 @@ impl Hash for DataValue { } } macro_rules! varchar_cast { - ($value:expr, $len:expr, $ty:expr) => { + ($value:expr, $len:expr, $ty:expr, $unit:expr) => { $value .map(|v| { let string_value = format!("{}", v); if let Some(len) = $len { - if string_value.len() > *len as usize { + if Self::check_string_len(&string_value, *len as usize, $unit) { return Err(DatabaseError::TooLong); } } Ok(DataValue::Utf8 { value: Some(string_value), ty: $ty, + unit: $unit, }) }) .unwrap_or(Ok(DataValue::Utf8 { value: None, ty: $ty, + unit: $unit, })) }; } @@ -315,22 +319,48 @@ impl DataValue { } } + pub(crate) fn check_string_len(string: &str, len: usize, unit: CharLengthUnits) -> bool { + match unit { + CharLengthUnits::Characters => string.chars().count() > len, + CharLengthUnits::Octets => string.len() > len, + } + } + pub(crate) fn check_len(&self, logic_type: &LogicalType) -> Result<(), DatabaseError> { let is_over_len = match (logic_type, self) { + (LogicalType::Varchar(None, _), _) => false, + ( + LogicalType::Varchar(Some(len), CharLengthUnits::Characters), + DataValue::Utf8 { + value: Some(val), + ty: Utf8Type::Variable(_), + unit: CharLengthUnits::Characters + }, + ) + | ( + LogicalType::Char(len, CharLengthUnits::Characters), + DataValue::Utf8 { + value: Some(val), + ty: Utf8Type::Fixed(_), + unit: CharLengthUnits::Characters + }, + ) => Self::check_string_len(val, *len as usize, CharLengthUnits::Characters), ( - LogicalType::Varchar(Some(len)), + LogicalType::Varchar(Some(len), CharLengthUnits::Octets), DataValue::Utf8 { value: Some(val), - ty: Utf8Type::Variable, + ty: Utf8Type::Variable(_), + unit: CharLengthUnits::Octets }, ) | ( - LogicalType::Char(len), + LogicalType::Char(len, CharLengthUnits::Octets), DataValue::Utf8 { value: Some(val), ty: Utf8Type::Fixed(_), + unit: CharLengthUnits::Octets }, - ) => val.len() > *len as usize, + ) => Self::check_string_len(val, *len as usize, CharLengthUnits::Octets), (LogicalType::Decimal(full_len, scale_len), DataValue::Decimal(Some(val))) => { if let Some(len) = full_len { if val.mantissa().ilog10() + 1 > *len as u32 { @@ -404,13 +434,17 @@ impl DataValue { LogicalType::UBigint => DataValue::UInt64(None), LogicalType::Float => DataValue::Float32(None), LogicalType::Double => DataValue::Float64(None), - LogicalType::Char(len) => DataValue::Utf8 { - value: None, - ty: Utf8Type::Fixed(*len), + LogicalType::Char(len, unit) => { + DataValue::Utf8 { + value: None, + ty: Utf8Type::Fixed(*len), + unit: *unit, + } }, - LogicalType::Varchar(_) => DataValue::Utf8 { + LogicalType::Varchar(len, unit) => DataValue::Utf8 { value: None, - ty: Utf8Type::Variable, + ty: Utf8Type::Variable(*len), + unit: *unit, }, LogicalType::Date => DataValue::Date32(None), LogicalType::DateTime => DataValue::Date64(None), @@ -435,13 +469,17 @@ impl DataValue { LogicalType::UBigint => DataValue::UInt64(Some(0)), LogicalType::Float => DataValue::Float32(Some(0.0)), LogicalType::Double => DataValue::Float64(Some(0.0)), - LogicalType::Char(len) => DataValue::Utf8 { - value: Some(String::new()), - ty: Utf8Type::Fixed(*len), + LogicalType::Char(len, unit) => { + DataValue::Utf8 { + value: Some(String::new()), + ty: Utf8Type::Fixed(*len), + unit: *unit, + } }, - LogicalType::Varchar(_) => DataValue::Utf8 { + LogicalType::Varchar(len, unit) => DataValue::Utf8 { value: Some(String::new()), - ty: Utf8Type::Variable, + ty: Utf8Type::Variable(*len), + unit: *unit, }, LogicalType::Date => DataValue::Date32(Some(UNIX_DATETIME.num_days_from_ce())), LogicalType::DateTime => DataValue::Date64(Some(UNIX_DATETIME.timestamp())), @@ -511,10 +549,10 @@ impl DataValue { return Ok(bytes.write_fixedint(*v)?); } } - DataValue::Utf8 { value: v, ty } => { + DataValue::Utf8 { value: v, ty, unit } => { if let Some(v) = v { match ty { - Utf8Type::Variable => { + Utf8Type::Variable(_) => { let string_bytes = v.as_bytes(); let len = string_bytes.len(); @@ -522,12 +560,30 @@ impl DataValue { return Ok(len); } Utf8Type::Fixed(len) => { - let mut string_bytes = - format!("{:len$}", v, len = *len as usize).into_bytes(); - let len = string_bytes.len(); - - bytes.append(&mut string_bytes); - return Ok(len); + match unit { + CharLengthUnits::Characters => { + let chars_len = *len as usize; + let mut string_bytes = + format!("{:len$}", v, len = chars_len).into_bytes(); + let octets_len = string_bytes.len(); + + bytes.append(&mut string_bytes); + return Ok(octets_len); + } + CharLengthUnits::Octets => { + let octets_len = *len as usize; + let mut string = v.clone(); + + for _ in 0..(octets_len - string.len()) { + string.push(' ') + } + let mut string_bytes = string.into_bytes(); + + assert_eq!(octets_len, string_bytes.len()); + bytes.append(&mut string_bytes); + return Ok(octets_len); + } + } } } } @@ -597,7 +653,7 @@ impl DataValue { buf.copy_from_slice(bytes); f64::from_ne_bytes(buf) })), - LogicalType::Char(len) => { + LogicalType::Char(len, unit) => { // https://dev.mysql.com/doc/refman/8.0/en/char.html#:~:text=If%20a%20given%20value%20is%20stored%20into%20the%20CHAR(4)%20and%20VARCHAR(4)%20columns%2C%20the%20values%20retrieved%20from%20the%20columns%20are%20not%20always%20the%20same%20because%20trailing%20spaces%20are%20removed%20from%20CHAR%20columns%20upon%20retrieval.%20The%20following%20example%20illustrates%20this%20difference%3A let value = (!bytes.is_empty()).then(|| { let last_non_zero_index = match bytes.iter().rposition(|&x| x != b' ') { @@ -609,14 +665,16 @@ impl DataValue { DataValue::Utf8 { value, ty: Utf8Type::Fixed(*len), + unit: *unit, } } - LogicalType::Varchar(_) => { + LogicalType::Varchar(len, unit) => { let value = (!bytes.is_empty()).then(|| String::from_utf8(bytes.to_owned()).unwrap()); DataValue::Utf8 { value, - ty: Utf8Type::Variable, + ty: Utf8Type::Variable(*len), + unit: *unit, } } LogicalType::Date => { @@ -651,13 +709,19 @@ impl DataValue { DataValue::UInt32(_) => LogicalType::UInteger, DataValue::UInt64(_) => LogicalType::UBigint, DataValue::Utf8 { - ty: Utf8Type::Variable, + ty: Utf8Type::Variable(len), + unit, .. - } => LogicalType::Varchar(None), + } => { + LogicalType::Varchar(*len, *unit) + }, DataValue::Utf8 { ty: Utf8Type::Fixed(len), + unit, .. - } => LogicalType::Char(*len), + } => { + LogicalType::Char(*len, *unit) + }, DataValue::Date32(_) => LogicalType::Date, DataValue::Date64(_) => LogicalType::DateTime, DataValue::Time(_) => LogicalType::Time, @@ -796,13 +860,17 @@ impl DataValue { LogicalType::UBigint => Ok(DataValue::UInt64(None)), LogicalType::Float => Ok(DataValue::Float32(None)), LogicalType::Double => Ok(DataValue::Float64(None)), - LogicalType::Char(len) => Ok(DataValue::Utf8 { - value: None, - ty: Utf8Type::Fixed(*len), - }), - LogicalType::Varchar(_) => Ok(DataValue::Utf8 { + LogicalType::Char(len, unit) => { + Ok(DataValue::Utf8 { + value: None, + ty: Utf8Type::Fixed(*len), + unit: *unit, + }) + }, + LogicalType::Varchar(len, unit) => Ok(DataValue::Utf8 { value: None, - ty: Utf8Type::Variable, + ty: Utf8Type::Variable(*len), + unit: *unit, }), LogicalType::Date => Ok(DataValue::Date32(None)), LogicalType::DateTime => Ok(DataValue::Date64(None)), @@ -823,16 +891,16 @@ impl DataValue { LogicalType::UBigint => Ok(DataValue::UInt64(value.map(|v| v.into()))), LogicalType::Float => Ok(DataValue::Float32(value.map(|v| v.into()))), LogicalType::Double => Ok(DataValue::Float64(value.map(|v| v.into()))), - LogicalType::Char(len) => varchar_cast!(value, Some(len), Utf8Type::Fixed(*len)), - LogicalType::Varchar(len) => varchar_cast!(value, len, Utf8Type::Variable), + LogicalType::Char(len, unit) => varchar_cast!(value, Some(len), Utf8Type::Fixed(*len), *unit), + LogicalType::Varchar(len, unit) => varchar_cast!(value, len, Utf8Type::Variable(*len), *unit), _ => Err(DatabaseError::CastFail), }, DataValue::Float32(value) => match to { LogicalType::SqlNull => Ok(DataValue::Null), LogicalType::Float => Ok(DataValue::Float32(value)), LogicalType::Double => Ok(DataValue::Float64(value.map(|v| v.into()))), - LogicalType::Char(len) => varchar_cast!(value, Some(len), Utf8Type::Fixed(*len)), - LogicalType::Varchar(len) => varchar_cast!(value, len, Utf8Type::Variable), + LogicalType::Char(len, unit) => varchar_cast!(value, Some(len), Utf8Type::Fixed(*len), *unit), + LogicalType::Varchar(len, unit) => varchar_cast!(value, len, Utf8Type::Variable(*len), *unit), LogicalType::Decimal(_, option) => Ok(DataValue::Decimal( value .map(|v| { @@ -850,8 +918,8 @@ impl DataValue { LogicalType::SqlNull => Ok(DataValue::Null), LogicalType::Float => Ok(DataValue::Float32(value.map(|v| v as f32))), LogicalType::Double => Ok(DataValue::Float64(value)), - LogicalType::Char(len) => varchar_cast!(value, Some(len), Utf8Type::Fixed(*len)), - LogicalType::Varchar(len) => varchar_cast!(value, len, Utf8Type::Variable), + LogicalType::Char(len, unit) => varchar_cast!(value, Some(len), Utf8Type::Fixed(*len), *unit), + LogicalType::Varchar(len, unit) => varchar_cast!(value, len, Utf8Type::Variable(*len), *unit), LogicalType::Decimal(_, option) => Ok(DataValue::Decimal( value .map(|v| { @@ -883,8 +951,8 @@ impl DataValue { LogicalType::Bigint => Ok(DataValue::Int64(value.map(|v| v.into()))), LogicalType::Float => Ok(DataValue::Float32(value.map(|v| v.into()))), LogicalType::Double => Ok(DataValue::Float64(value.map(|v| v.into()))), - LogicalType::Char(len) => varchar_cast!(value, Some(len), Utf8Type::Fixed(*len)), - LogicalType::Varchar(len) => varchar_cast!(value, len, Utf8Type::Variable), + LogicalType::Char(len, unit) => varchar_cast!(value, Some(len), Utf8Type::Fixed(*len), *unit), + LogicalType::Varchar(len, unit) => varchar_cast!(value, len, Utf8Type::Variable(*len), *unit), LogicalType::Decimal(_, option) => Ok(DataValue::Decimal(value.map(|v| { let mut decimal = Decimal::from(v); Self::decimal_round_i(option, &mut decimal); @@ -911,8 +979,8 @@ impl DataValue { LogicalType::Bigint => Ok(DataValue::Int64(value.map(|v| v.into()))), LogicalType::Float => Ok(DataValue::Float32(value.map(|v| v.into()))), LogicalType::Double => Ok(DataValue::Float64(value.map(|v| v.into()))), - LogicalType::Char(len) => varchar_cast!(value, Some(len), Utf8Type::Fixed(*len)), - LogicalType::Varchar(len) => varchar_cast!(value, len, Utf8Type::Variable), + LogicalType::Char(len, unit) => varchar_cast!(value, Some(len), Utf8Type::Fixed(*len), *unit), + LogicalType::Varchar(len, unit) => varchar_cast!(value, len, Utf8Type::Variable(*len), *unit), LogicalType::Decimal(_, option) => Ok(DataValue::Decimal(value.map(|v| { let mut decimal = Decimal::from(v); Self::decimal_round_i(option, &mut decimal); @@ -938,8 +1006,8 @@ impl DataValue { LogicalType::Bigint => Ok(DataValue::Int64(value.map(|v| v.into()))), LogicalType::Float => Ok(DataValue::Float32(value.map(|v| v as f32))), LogicalType::Double => Ok(DataValue::Float64(value.map(|v| v.into()))), - LogicalType::Char(len) => varchar_cast!(value, Some(len), Utf8Type::Fixed(*len)), - LogicalType::Varchar(len) => varchar_cast!(value, len, Utf8Type::Variable), + LogicalType::Char(len, unit) => varchar_cast!(value, Some(len), Utf8Type::Fixed(*len), *unit), + LogicalType::Varchar(len, unit) => varchar_cast!(value, len, Utf8Type::Variable(*len), *unit), LogicalType::Decimal(_, option) => Ok(DataValue::Decimal(value.map(|v| { let mut decimal = Decimal::from(v); Self::decimal_round_i(option, &mut decimal); @@ -964,8 +1032,8 @@ impl DataValue { LogicalType::Bigint => Ok(DataValue::Int64(value)), LogicalType::Float => Ok(DataValue::Float32(value.map(|v| v as f32))), LogicalType::Double => Ok(DataValue::Float64(value.map(|v| v as f64))), - LogicalType::Char(len) => varchar_cast!(value, Some(len), Utf8Type::Fixed(*len)), - LogicalType::Varchar(len) => varchar_cast!(value, len, Utf8Type::Variable), + LogicalType::Char(len, unit) => varchar_cast!(value, Some(len), Utf8Type::Fixed(*len), *unit), + LogicalType::Varchar(len, unit) => varchar_cast!(value, len, Utf8Type::Variable(*len), *unit), LogicalType::Decimal(_, option) => Ok(DataValue::Decimal(value.map(|v| { let mut decimal = Decimal::from(v); Self::decimal_round_i(option, &mut decimal); @@ -986,8 +1054,8 @@ impl DataValue { LogicalType::UBigint => Ok(DataValue::UInt64(value.map(|v| v.into()))), LogicalType::Float => Ok(DataValue::Float32(value.map(|v| v.into()))), LogicalType::Double => Ok(DataValue::Float64(value.map(|v| v.into()))), - LogicalType::Char(len) => varchar_cast!(value, Some(len), Utf8Type::Fixed(*len)), - LogicalType::Varchar(len) => varchar_cast!(value, len, Utf8Type::Variable), + LogicalType::Char(len, unit) => varchar_cast!(value, Some(len), Utf8Type::Fixed(*len), *unit), + LogicalType::Varchar(len, unit) => varchar_cast!(value, len, Utf8Type::Variable(*len), *unit), LogicalType::Decimal(_, option) => Ok(DataValue::Decimal(value.map(|v| { let mut decimal = Decimal::from(v); Self::decimal_round_i(option, &mut decimal); @@ -1006,8 +1074,8 @@ impl DataValue { LogicalType::UBigint => Ok(DataValue::UInt64(value.map(|v| v.into()))), LogicalType::Float => Ok(DataValue::Float32(value.map(|v| v.into()))), LogicalType::Double => Ok(DataValue::Float64(value.map(|v| v.into()))), - LogicalType::Char(len) => varchar_cast!(value, Some(len), Utf8Type::Fixed(*len)), - LogicalType::Varchar(len) => varchar_cast!(value, len, Utf8Type::Variable), + LogicalType::Char(len, unit) => varchar_cast!(value, Some(len), Utf8Type::Fixed(*len), *unit), + LogicalType::Varchar(len, unit) => varchar_cast!(value, len, Utf8Type::Variable(*len), *unit), LogicalType::Decimal(_, option) => Ok(DataValue::Decimal(value.map(|v| { let mut decimal = Decimal::from(v); Self::decimal_round_i(option, &mut decimal); @@ -1024,8 +1092,8 @@ impl DataValue { LogicalType::UBigint => Ok(DataValue::UInt64(value.map(|v| v.into()))), LogicalType::Float => Ok(DataValue::Float32(value.map(|v| v as f32))), LogicalType::Double => Ok(DataValue::Float64(value.map(|v| v.into()))), - LogicalType::Char(len) => varchar_cast!(value, Some(len), Utf8Type::Fixed(*len)), - LogicalType::Varchar(len) => varchar_cast!(value, len, Utf8Type::Variable), + LogicalType::Char(len, unit) => varchar_cast!(value, Some(len), Utf8Type::Fixed(*len), *unit), + LogicalType::Varchar(len, unit) => varchar_cast!(value, len, Utf8Type::Variable(*len), *unit), LogicalType::Decimal(_, option) => Ok(DataValue::Decimal(value.map(|v| { let mut decimal = Decimal::from(v); Self::decimal_round_i(option, &mut decimal); @@ -1040,8 +1108,8 @@ impl DataValue { LogicalType::UBigint => Ok(DataValue::UInt64(value)), LogicalType::Float => Ok(DataValue::Float32(value.map(|v| v as f32))), LogicalType::Double => Ok(DataValue::Float64(value.map(|v| v as f64))), - LogicalType::Char(len) => varchar_cast!(value, Some(len), Utf8Type::Fixed(*len)), - LogicalType::Varchar(len) => varchar_cast!(value, len, Utf8Type::Variable), + LogicalType::Char(len, unit) => varchar_cast!(value, Some(len), Utf8Type::Fixed(*len), *unit), + LogicalType::Varchar(len, unit) => varchar_cast!(value, len, Utf8Type::Variable(*len), *unit), LogicalType::Decimal(_, option) => Ok(DataValue::Decimal(value.map(|v| { let mut decimal = Decimal::from(v); Self::decimal_round_i(option, &mut decimal); @@ -1087,8 +1155,8 @@ impl DataValue { LogicalType::Double => Ok(DataValue::Float64( value.map(|v| f64::from_str(&v)).transpose()?, )), - LogicalType::Char(len) => varchar_cast!(value, Some(len), Utf8Type::Fixed(*len)), - LogicalType::Varchar(len) => varchar_cast!(value, len, Utf8Type::Variable), + LogicalType::Char(len, unit) => varchar_cast!(value, Some(len), Utf8Type::Fixed(*len), *unit), + LogicalType::Varchar(len, unit) => varchar_cast!(value, len, Utf8Type::Variable(*len), *unit), LogicalType::Date => { let option = value .map(|v| { @@ -1130,11 +1198,11 @@ impl DataValue { }, DataValue::Date32(value) => match to { LogicalType::SqlNull => Ok(DataValue::Null), - LogicalType::Char(len) => { - varchar_cast!(Self::format_date(value), Some(len), Utf8Type::Fixed(*len)) + LogicalType::Char(len, unit) => { + varchar_cast!(Self::format_date(value), Some(len), Utf8Type::Fixed(*len), *unit) } - LogicalType::Varchar(len) => { - varchar_cast!(Self::format_date(value), len, Utf8Type::Variable) + LogicalType::Varchar(len, unit) => { + varchar_cast!(Self::format_date(value), len, Utf8Type::Variable(*len), *unit) } LogicalType::Date => Ok(DataValue::Date32(value)), LogicalType::DateTime => { @@ -1150,15 +1218,16 @@ impl DataValue { }, DataValue::Date64(value) => match to { LogicalType::SqlNull => Ok(DataValue::Null), - LogicalType::Char(len) => { + LogicalType::Char(len, unit) => { varchar_cast!( Self::format_datetime(value), Some(len), - Utf8Type::Fixed(*len) + Utf8Type::Fixed(*len), + *unit ) } - LogicalType::Varchar(len) => { - varchar_cast!(Self::format_datetime(value), len, Utf8Type::Variable) + LogicalType::Varchar(len, unit) => { + varchar_cast!(Self::format_datetime(value), len, Utf8Type::Variable(*len), *unit) } LogicalType::Date => { let option = value.and_then(|v| { @@ -1181,11 +1250,11 @@ impl DataValue { }, DataValue::Time(value) => match to { LogicalType::SqlNull => Ok(DataValue::Null), - LogicalType::Char(len) => { - varchar_cast!(Self::format_time(value), Some(len), Utf8Type::Fixed(*len)) + LogicalType::Char(len, unit) => { + varchar_cast!(Self::format_time(value), Some(len), Utf8Type::Fixed(*len), *unit) } - LogicalType::Varchar(len) => { - varchar_cast!(Self::format_time(value), len, Utf8Type::Variable) + LogicalType::Varchar(len, unit) => { + varchar_cast!(Self::format_time(value), len, Utf8Type::Variable(*len), *unit) } _ => Err(DatabaseError::CastFail), }, @@ -1194,8 +1263,8 @@ impl DataValue { LogicalType::Float => Ok(DataValue::Float32(value.and_then(|v| v.to_f32()))), LogicalType::Double => Ok(DataValue::Float64(value.and_then(|v| v.to_f64()))), LogicalType::Decimal(_, _) => Ok(DataValue::Decimal(value)), - LogicalType::Char(len) => varchar_cast!(value, Some(len), Utf8Type::Fixed(*len)), - LogicalType::Varchar(len) => varchar_cast!(value, len, Utf8Type::Variable), + LogicalType::Char(len, unit) => varchar_cast!(value, Some(len), Utf8Type::Fixed(*len), *unit), + LogicalType::Varchar(len, unit) => varchar_cast!(value, len, Utf8Type::Variable(*len), *unit), _ => Err(DatabaseError::CastFail), }, DataValue::Tuple(values) => match to { @@ -1306,7 +1375,8 @@ impl From for DataValue { fn from(value: String) -> Self { DataValue::Utf8 { value: Some(value), - ty: Utf8Type::Variable, + ty: Utf8Type::Variable(None), + unit: CharLengthUnits::Characters, } } } @@ -1315,7 +1385,8 @@ impl From> for DataValue { fn from(value: Option) -> Self { DataValue::Utf8 { value, - ty: Utf8Type::Variable, + ty: Utf8Type::Variable(None), + unit: CharLengthUnits::Characters, } } } diff --git a/tests/slt/char.slt b/tests/slt/char.slt new file mode 100644 index 00000000..16a98891 --- /dev/null +++ b/tests/slt/char.slt @@ -0,0 +1,47 @@ +statement ok +create table t1(id int primary key, v1 char, v2 char(5 characters), v3 char(5 octets)) + +statement ok +insert into t1 values(0, '🖕', '🖕🖕🖕🖕🖕', '🖕'); + +statement ok +insert into t1 values(1, null, null, null); + +statement error +insert into t1 values(1, '🖕', '🖕🖕🖕🖕🖕🖕', '🖕'); + +statement error +insert into t1 values(1, '🖕', '🖕🖕🖕🖕🖕', '🖕🖕'); + +query ITT +select * from t1; +---- +0 🖕 🖕🖕🖕🖕🖕 🖕 +1 null null null + +statement ok +create table t2(id int primary key, v1 varchar, v2 varchar(5 characters), v3 varchar(5 octets)) + +statement ok +insert into t2 values(0, '🖕', '🖕🖕🖕🖕🖕', '🖕'); + +statement ok +insert into t2 values(1, null, null, null); + +statement error +insert into t2 values(1, '🖕', '🖕🖕🖕🖕🖕🖕', '🖕'); + +statement error +insert into t2 values(1, '🖕', '🖕🖕🖕🖕🖕', '🖕🖕'); + +query ITT +select * from t2; +---- +0 🖕 🖕🖕🖕🖕🖕 🖕 +1 null null null + +statement ok +drop table t1; + +statement ok +drop table t2; \ No newline at end of file diff --git a/tests/slt/sql_2016/E021_01.slt b/tests/slt/sql_2016/E021_01.slt index ffe33d48..1c0cea04 100644 --- a/tests/slt/sql_2016/E021_01.slt +++ b/tests/slt/sql_2016/E021_01.slt @@ -6,9 +6,8 @@ CREATE TABLE TABLE_E021_01_01_01 ( ID INT PRIMARY KEY, A CHAR ( 8 ) ) statement ok CREATE TABLE TABLE_E021_01_01_02 ( ID INT PRIMARY KEY, A CHAR ( 8 CHARACTERS ) ) -# TODO: char unit: OCTETS -# statement ok -# CREATE TABLE TABLE_E021_01_01_03 ( ID INT PRIMARY KEY, A CHAR ( 8 OCTETS ) ) +statement ok +CREATE TABLE TABLE_E021_01_01_03 ( ID INT PRIMARY KEY, A CHAR ( 8 OCTETS ) ) statement ok CREATE TABLE TABLE_E021_01_01_04 ( ID INT PRIMARY KEY, A CHAR ) @@ -19,9 +18,8 @@ CREATE TABLE TABLE_E021_01_01_05 ( ID INT PRIMARY KEY, A CHARACTER ( 8 ) ) statement ok CREATE TABLE TABLE_E021_01_01_06 ( ID INT PRIMARY KEY, A CHARACTER ( 8 CHARACTERS ) ) -# TODO: char unit: OCTETS -# statement ok -# CREATE TABLE TABLE_E021_01_01_07 ( ID INT PRIMARY KEY, A CHARACTER ( 8 OCTETS ) ) +statement ok +CREATE TABLE TABLE_E021_01_01_07 ( ID INT PRIMARY KEY, A CHARACTER ( 8 OCTETS ) ) statement ok CREATE TABLE TABLE_E021_01_01_08 ( ID INT PRIMARY KEY, A CHARACTER ) diff --git a/tests/slt/sql_2016/E021_02.slt b/tests/slt/sql_2016/E021_02.slt index 45045a94..b4cc607c 100644 --- a/tests/slt/sql_2016/E021_02.slt +++ b/tests/slt/sql_2016/E021_02.slt @@ -6,9 +6,8 @@ CREATE TABLE TABLE_E021_02_01_01 ( ID INT PRIMARY KEY, A CHAR VARYING ( 8 ) ) statement ok CREATE TABLE TABLE_E021_02_01_02 ( ID INT PRIMARY KEY, A CHAR VARYING ( 8 CHARACTERS ) ) -# TODO: char unit: OCTETS -# statement ok -# CREATE TABLE TABLE_E021_02_01_03 ( ID INT PRIMARY KEY, A CHAR VARYING ( 8 OCTETS ) ) +statement ok +CREATE TABLE TABLE_E021_02_01_03 ( ID INT PRIMARY KEY, A CHAR VARYING ( 8 OCTETS ) ) statement ok CREATE TABLE TABLE_E021_02_01_04 ( ID INT PRIMARY KEY, A CHAR VARYING ) @@ -19,9 +18,8 @@ CREATE TABLE TABLE_E021_02_01_05 ( ID INT PRIMARY KEY, A CHARACTER VARYING ( 8 ) statement ok CREATE TABLE TABLE_E021_02_01_06 ( ID INT PRIMARY KEY, A CHARACTER VARYING ( 8 CHARACTERS ) ) -# TODO: char unit: OCTETS -# statement ok -# CREATE TABLE TABLE_E021_02_01_07 ( ID INT PRIMARY KEY, A CHARACTER VARYING ( 8 OCTETS ) ) +statement ok +CREATE TABLE TABLE_E021_02_01_07 ( ID INT PRIMARY KEY, A CHARACTER VARYING ( 8 OCTETS ) ) statement ok CREATE TABLE TABLE_E021_02_01_08 ( ID INT PRIMARY KEY, A CHARACTER VARYING ) @@ -32,9 +30,8 @@ CREATE TABLE TABLE_E021_02_01_09 ( ID INT PRIMARY KEY, A VARCHAR ( 8 ) ) statement ok CREATE TABLE TABLE_E021_02_01_10 ( ID INT PRIMARY KEY, A VARCHAR ( 8 CHARACTERS ) ) -# TODO: char unit: OCTETS -# statement ok -# CREATE TABLE TABLE_E021_02_01_11 ( ID INT PRIMARY KEY, A VARCHAR ( 8 OCTETS ) ) +statement ok +CREATE TABLE TABLE_E021_02_01_11 ( ID INT PRIMARY KEY, A VARCHAR ( 8 OCTETS ) ) statement ok CREATE TABLE TABLE_E021_02_01_12 ( ID INT PRIMARY KEY, A VARCHAR ) From 379080e74c0274d522b50ca790144d7d502cab47 Mon Sep 17 00:00:00 2001 From: Kould <2435992353@qq.com> Date: Wed, 27 Mar 2024 22:51:13 +0800 Subject: [PATCH 2/5] style: code fmt & fix `Char/Varchar` on Server --- src/bin/server.rs | 6 +- src/binder/create_table.rs | 9 +- src/binder/expr.rs | 5 +- src/catalog/column.rs | 9 +- src/execution/volcano/dml/analyze.rs | 4 +- src/execution/volcano/dml/copy_from_file.rs | 9 +- src/execution/volcano/dql/describe.rs | 2 +- src/execution/volcano/dql/explain.rs | 4 +- src/execution/volcano/dql/show_table.rs | 4 +- src/expression/evaluator.rs | 4 +- src/expression/mod.rs | 8 +- src/expression/value_compute.rs | 15 +- src/marcos/mod.rs | 9 +- src/optimizer/core/histogram.rs | 2 +- .../rule/normalization/column_pruning.rs | 2 +- src/storage/table_codec.rs | 2 +- src/types/mod.rs | 38 +-- src/types/tuple.rs | 30 ++- src/types/tuple_builder.rs | 2 +- src/types/value.rs | 235 +++++++++++------- 20 files changed, 263 insertions(+), 136 deletions(-) diff --git a/src/bin/server.rs b/src/bin/server.rs index 70747931..4c8450b8 100644 --- a/src/bin/server.rs +++ b/src/bin/server.rs @@ -198,7 +198,7 @@ fn encode_tuples<'a>(schema: &Schema, tuples: Vec) -> PgWireResult encoder.encode_field(&value.u64().map(|v| v as i64)), LogicalType::Float => encoder.encode_field(&value.float()), LogicalType::Double => encoder.encode_field(&value.double()), - LogicalType::Char(_) | LogicalType::Varchar(_) => { + LogicalType::Char(..) | LogicalType::Varchar(..) => { encoder.encode_field(&value.utf8()) } LogicalType::Date => encoder.encode_field(&value.date()), @@ -225,9 +225,9 @@ fn into_pg_type(data_type: &LogicalType) -> PgWireResult { LogicalType::Bigint | LogicalType::UBigint => Type::INT8, LogicalType::Float => Type::FLOAT4, LogicalType::Double => Type::FLOAT8, - LogicalType::Varchar(_) => Type::VARCHAR, + LogicalType::Varchar(..) => Type::VARCHAR, LogicalType::Date | LogicalType::DateTime => Type::DATE, - LogicalType::Char(_) => Type::CHAR, + LogicalType::Char(..) => Type::CHAR, LogicalType::Time => Type::TIME, LogicalType::Decimal(_, _) => todo!(), _ => { diff --git a/src/binder/create_table.rs b/src/binder/create_table.rs index 96f87f11..9af59f0f 100644 --- a/src/binder/create_table.rs +++ b/src/binder/create_table.rs @@ -146,8 +146,8 @@ mod tests { use crate::storage::kip::KipStorage; use crate::storage::Storage; use crate::types::LogicalType; - use std::sync::atomic::AtomicUsize; use sqlparser::ast::CharLengthUnits; + use std::sync::atomic::AtomicUsize; use tempfile::TempDir; #[tokio::test] @@ -178,7 +178,12 @@ mod tests { assert_eq!(op.columns[1].nullable, true); assert_eq!( op.columns[1].desc, - ColumnDesc::new(LogicalType::Varchar(Some(10), CharLengthUnits::Characters), false, false, None) + ColumnDesc::new( + LogicalType::Varchar(Some(10), CharLengthUnits::Characters), + false, + false, + None + ) ); } _ => unreachable!(), diff --git a/src/binder/expr.rs b/src/binder/expr.rs index 18530e29..71228dc3 100644 --- a/src/binder/expr.rs +++ b/src/binder/expr.rs @@ -3,7 +3,10 @@ use crate::errors::DatabaseError; use crate::expression; use crate::expression::agg::AggKind; use itertools::Itertools; -use sqlparser::ast::{BinaryOperator, CharLengthUnits, DataType, Expr, Function, FunctionArg, FunctionArgExpr, Ident, Query, UnaryOperator}; +use sqlparser::ast::{ + BinaryOperator, CharLengthUnits, DataType, Expr, Function, FunctionArg, FunctionArgExpr, Ident, + Query, UnaryOperator, +}; use std::slice; use std::sync::Arc; diff --git a/src/catalog/column.rs b/src/catalog/column.rs index 15f31908..258536fd 100644 --- a/src/catalog/column.rs +++ b/src/catalog/column.rs @@ -2,9 +2,9 @@ use crate::catalog::TableName; use crate::errors::DatabaseError; use crate::expression::ScalarExpression; use serde::{Deserialize, Serialize}; +use sqlparser::ast::CharLengthUnits; use std::hash::Hash; use std::sync::Arc; -use sqlparser::ast::CharLengthUnits; use crate::types::tuple::EMPTY_TUPLE; use crate::types::value::ValueRef; @@ -51,7 +51,12 @@ impl ColumnCatalog { table_name: None, }, nullable: true, - desc: ColumnDesc::new(LogicalType::Varchar(None, CharLengthUnits::Characters), false, false, None), + desc: ColumnDesc::new( + LogicalType::Varchar(None, CharLengthUnits::Characters), + false, + false, + None, + ), } } diff --git a/src/execution/volcano/dml/analyze.rs b/src/execution/volcano/dml/analyze.rs index be7b9a08..8e1a6178 100644 --- a/src/execution/volcano/dml/analyze.rs +++ b/src/execution/volcano/dml/analyze.rs @@ -12,11 +12,11 @@ use crate::types::tuple::Tuple; use crate::types::value::{DataValue, Utf8Type}; use futures_async_stream::try_stream; use itertools::Itertools; +use sqlparser::ast::CharLengthUnits; use std::fmt::Formatter; use std::sync::Arc; use std::time::{SystemTime, UNIX_EPOCH}; use std::{fmt, fs}; -use sqlparser::ast::CharLengthUnits; const DEFAULT_NUM_OF_BUCKETS: usize = 100; const DEFAULT_STATISTICS_META_PATH: &str = "fnck_sql_statistics_metas"; @@ -110,7 +110,7 @@ impl Analyze { values.push(Arc::new(DataValue::Utf8 { value: Some(path.clone()), ty: Utf8Type::Variable(None), - unit: CharLengthUnits::Characters + unit: CharLengthUnits::Characters, })); transaction.save_table_meta(&table_name, path, meta)?; } diff --git a/src/execution/volcano/dml/copy_from_file.rs b/src/execution/volcano/dml/copy_from_file.rs index cb9a3d60..3a8e7d4e 100644 --- a/src/execution/volcano/dml/copy_from_file.rs +++ b/src/execution/volcano/dml/copy_from_file.rs @@ -105,9 +105,9 @@ mod tests { use crate::catalog::{ColumnCatalog, ColumnDesc, ColumnSummary}; use crate::db::DataBaseBuilder; use futures::StreamExt; + use sqlparser::ast::CharLengthUnits; use std::io::Write; use std::sync::Arc; - use sqlparser::ast::CharLengthUnits; use tempfile::TempDir; use super::*; @@ -149,7 +149,12 @@ mod tests { table_name: None, }, nullable: false, - desc: ColumnDesc::new(LogicalType::Varchar(Some(10), CharLengthUnits::Characters), false, false, None), + desc: ColumnDesc::new( + LogicalType::Varchar(Some(10), CharLengthUnits::Characters), + false, + false, + None, + ), }), ]; diff --git a/src/execution/volcano/dql/describe.rs b/src/execution/volcano/dql/describe.rs index 75279a95..814a7145 100644 --- a/src/execution/volcano/dql/describe.rs +++ b/src/execution/volcano/dql/describe.rs @@ -7,8 +7,8 @@ use crate::types::tuple::Tuple; use crate::types::value::{DataValue, Utf8Type, ValueRef}; use futures_async_stream::try_stream; use lazy_static::lazy_static; -use std::sync::Arc; use sqlparser::ast::CharLengthUnits; +use std::sync::Arc; lazy_static! { static ref PRIMARY_KEY_TYPE: ValueRef = Arc::new(DataValue::Utf8 { diff --git a/src/execution/volcano/dql/explain.rs b/src/execution/volcano/dql/explain.rs index 6be7fd3e..162657ef 100644 --- a/src/execution/volcano/dql/explain.rs +++ b/src/execution/volcano/dql/explain.rs @@ -5,8 +5,8 @@ use crate::storage::Transaction; use crate::types::tuple::Tuple; use crate::types::value::{DataValue, Utf8Type}; use futures_async_stream::try_stream; -use std::sync::Arc; use sqlparser::ast::CharLengthUnits; +use std::sync::Arc; pub struct Explain { plan: LogicalPlan, @@ -30,7 +30,7 @@ impl Explain { let values = vec![Arc::new(DataValue::Utf8 { value: Some(self.plan.explain(0)), ty: Utf8Type::Variable(None), - unit: CharLengthUnits::Characters + unit: CharLengthUnits::Characters, })]; yield Tuple { id: None, values }; diff --git a/src/execution/volcano/dql/show_table.rs b/src/execution/volcano/dql/show_table.rs index 3ba7af6f..b9862f08 100644 --- a/src/execution/volcano/dql/show_table.rs +++ b/src/execution/volcano/dql/show_table.rs @@ -5,8 +5,8 @@ use crate::storage::Transaction; use crate::types::tuple::Tuple; use crate::types::value::{DataValue, Utf8Type}; use futures_async_stream::try_stream; -use std::sync::Arc; use sqlparser::ast::CharLengthUnits; +use std::sync::Arc; pub struct ShowTables; @@ -25,7 +25,7 @@ impl ShowTables { let values = vec![Arc::new(DataValue::Utf8 { value: Some(table_name.to_string()), ty: Utf8Type::Variable(None), - unit: CharLengthUnits::Characters + unit: CharLengthUnits::Characters, })]; yield Tuple { id: None, values }; diff --git a/src/expression/evaluator.rs b/src/expression/evaluator.rs index 37437ee8..6eea7aee 100644 --- a/src/expression/evaluator.rs +++ b/src/expression/evaluator.rs @@ -7,10 +7,10 @@ use crate::types::value::{DataValue, Utf8Type, ValueRef}; use crate::types::LogicalType; use itertools::Itertools; use lazy_static::lazy_static; +use sqlparser::ast::CharLengthUnits; use std::cmp; use std::cmp::Ordering; use std::sync::Arc; -use sqlparser::ast::CharLengthUnits; lazy_static! { static ref NULL_VALUE: ValueRef = Arc::new(DataValue::Null); @@ -27,7 +27,7 @@ macro_rules! eval_to_num { return Ok(Arc::new(DataValue::Utf8 { value: None, ty: Utf8Type::Variable(None), - unit: CharLengthUnits::Characters + unit: CharLengthUnits::Characters, })); } }; diff --git a/src/expression/mod.rs b/src/expression/mod.rs index db5f749a..3a7dc0c9 100644 --- a/src/expression/mod.rs +++ b/src/expression/mod.rs @@ -5,7 +5,9 @@ use std::hash::Hash; use std::sync::Arc; use std::{fmt, mem}; -use sqlparser::ast::{BinaryOperator as SqlBinaryOperator, CharLengthUnits, UnaryOperator as SqlUnaryOperator}; +use sqlparser::ast::{ + BinaryOperator as SqlBinaryOperator, CharLengthUnits, UnaryOperator as SqlUnaryOperator, +}; use self::agg::AggKind; use crate::catalog::{ColumnCatalog, ColumnDesc, ColumnRef}; @@ -394,7 +396,9 @@ impl ScalarExpression { ScalarExpression::IsNull { .. } | ScalarExpression::In { .. } | ScalarExpression::Between { .. } => LogicalType::Boolean, - ScalarExpression::SubString { .. } => LogicalType::Varchar(None, CharLengthUnits::Characters), + ScalarExpression::SubString { .. } => { + LogicalType::Varchar(None, CharLengthUnits::Characters) + } ScalarExpression::Position { .. } => LogicalType::Integer, ScalarExpression::Alias { expr, .. } | ScalarExpression::Reference { expr, .. } => { expr.return_type() diff --git a/src/expression/value_compute.rs b/src/expression/value_compute.rs index 11f7363b..297f2905 100644 --- a/src/expression/value_compute.rs +++ b/src/expression/value_compute.rs @@ -3,8 +3,8 @@ use crate::expression::{BinaryOperator, UnaryOperator}; use crate::types::value::{DataValue, Utf8Type, ValueRef}; use crate::types::LogicalType; use regex::Regex; -use std::cmp::Ordering; use sqlparser::ast::CharLengthUnits; +use std::cmp::Ordering; fn unpack_bool(value: DataValue) -> Option { match value { @@ -194,8 +194,15 @@ impl DataValue { op: &BinaryOperator, ) -> Result { if let BinaryOperator::Like(escape_char) | BinaryOperator::NotLike(escape_char) = op { - let value_option = unpack_utf8(self.clone().cast(&LogicalType::Varchar(None, CharLengthUnits::Characters))?); - let pattern_option = unpack_utf8(right.clone().cast(&LogicalType::Varchar(None, CharLengthUnits::Characters))?); + let value_option = unpack_utf8( + self.clone() + .cast(&LogicalType::Varchar(None, CharLengthUnits::Characters))?, + ); + let pattern_option = unpack_utf8( + right + .clone() + .cast(&LogicalType::Varchar(None, CharLengthUnits::Characters))?, + ); let mut is_match = if let (Some(value), Some(pattern)) = (value_option, pattern_option) { @@ -651,10 +658,10 @@ impl DataValue { #[cfg(test)] mod test { - use sqlparser::ast::CharLengthUnits; use crate::errors::DatabaseError; use crate::expression::BinaryOperator; use crate::types::value::{DataValue, Utf8Type}; + use sqlparser::ast::CharLengthUnits; #[test] fn test_binary_op_arithmetic_plus() -> Result<(), DatabaseError> { diff --git a/src/marcos/mod.rs b/src/marcos/mod.rs index f82ddd67..8042d41d 100644 --- a/src/marcos/mod.rs +++ b/src/marcos/mod.rs @@ -137,8 +137,8 @@ mod test { use crate::types::LogicalType; use serde::Deserialize; use serde::Serialize; - use std::sync::Arc; use sqlparser::ast::CharLengthUnits; + use std::sync::Arc; fn build_tuple() -> (Tuple, SchemaRef) { let schema_ref = Arc::new(vec![ @@ -150,7 +150,12 @@ mod test { Arc::new(ColumnCatalog::new( "c2".to_string(), false, - ColumnDesc::new(LogicalType::Varchar(None, CharLengthUnits::Characters), false, false, None), + ColumnDesc::new( + LogicalType::Varchar(None, CharLengthUnits::Characters), + false, + false, + None, + ), )), ]); let values = vec![ diff --git a/src/optimizer/core/histogram.rs b/src/optimizer/core/histogram.rs index a103565b..6f1b9954 100644 --- a/src/optimizer/core/histogram.rs +++ b/src/optimizer/core/histogram.rs @@ -255,7 +255,7 @@ impl Histogram { ) -> Result { let float_value = |value: &DataValue, prefix_len: usize| { let value = match value.logical_type() { - LogicalType::Varchar(_, ..) | LogicalType::Char(_, ..) => match value { + LogicalType::Varchar(..) | LogicalType::Char(..) => match value { DataValue::Utf8 { value, .. } => value.as_ref().map(|string| { if prefix_len > string.len() { return 0.0; diff --git a/src/optimizer/rule/normalization/column_pruning.rs b/src/optimizer/rule/normalization/column_pruning.rs index 1322a669..d86d16ee 100644 --- a/src/optimizer/rule/normalization/column_pruning.rs +++ b/src/optimizer/rule/normalization/column_pruning.rs @@ -10,9 +10,9 @@ use crate::types::value::{DataValue, Utf8Type}; use crate::types::LogicalType; use itertools::Itertools; use lazy_static::lazy_static; +use sqlparser::ast::CharLengthUnits; use std::collections::HashSet; use std::sync::Arc; -use sqlparser::ast::CharLengthUnits; lazy_static! { static ref COLUMN_PRUNING_RULE: Pattern = { diff --git a/src/storage/table_codec.rs b/src/storage/table_codec.rs index fc05b7d5..c74aba96 100644 --- a/src/storage/table_codec.rs +++ b/src/storage/table_codec.rs @@ -172,7 +172,7 @@ impl TableCodec { | LogicalType::USmallint | LogicalType::UInteger | LogicalType::UBigint - | LogicalType::Varchar(_, ..) + | LogicalType::Varchar(..) ) { return Err(DatabaseError::InvalidType); } diff --git a/src/types/mod.rs b/src/types/mod.rs index 8e9e05f7..53aec131 100644 --- a/src/types/mod.rs +++ b/src/types/mod.rs @@ -99,7 +99,7 @@ impl LogicalType { LogicalType::Varchar(_, _) => None, LogicalType::Char(len, unit) => match unit { CharLengthUnits::Characters => None, - CharLengthUnits::Octets => Some(*len as usize) + CharLengthUnits::Octets => Some(*len as usize), }, LogicalType::Decimal(_, _) => Some(16), LogicalType::Date => Some(4), @@ -182,8 +182,8 @@ impl LogicalType { } if matches!( (left, right), - (LogicalType::Date, LogicalType::Varchar(_, ..)) - | (LogicalType::Varchar(_, ..), LogicalType::Date) + (LogicalType::Date, LogicalType::Varchar(..)) + | (LogicalType::Varchar(..), LogicalType::Date) ) { return Ok(LogicalType::Date); } @@ -195,13 +195,13 @@ impl LogicalType { } if matches!( (left, right), - (LogicalType::DateTime, LogicalType::Varchar(_, ..)) - | (LogicalType::Varchar(_, ..), LogicalType::DateTime) + (LogicalType::DateTime, LogicalType::Varchar(..)) + | (LogicalType::Varchar(..), LogicalType::DateTime) ) { return Ok(LogicalType::DateTime); } - if let (LogicalType::Char(_, ..), LogicalType::Varchar(len, ..)) - | (LogicalType::Varchar(len, ..), LogicalType::Char(_, ..)) = (left, right) + if let (LogicalType::Char(..), LogicalType::Varchar(len, ..)) + | (LogicalType::Varchar(len, ..), LogicalType::Char(..)) = (left, right) { return Ok(LogicalType::Varchar(*len, CharLengthUnits::Characters)); } @@ -299,20 +299,22 @@ impl LogicalType { LogicalType::UBigint => matches!(to, LogicalType::Float | LogicalType::Double), LogicalType::Float => matches!(to, LogicalType::Double), LogicalType::Double => false, - LogicalType::Char(_, ..) => false, - LogicalType::Varchar(_, ..) => false, + LogicalType::Char(..) => false, + LogicalType::Varchar(..) => false, LogicalType::Date => matches!( to, - LogicalType::DateTime | LogicalType::Varchar(_, ..) | LogicalType::Char(_, ..) + LogicalType::DateTime | LogicalType::Varchar(..) | LogicalType::Char(..) ), LogicalType::DateTime => matches!( to, LogicalType::Date | LogicalType::Time - | LogicalType::Varchar(_, ..) - | LogicalType::Char(_, ..) + | LogicalType::Varchar(..) + | LogicalType::Char(..) ), - LogicalType::Time => matches!(to, LogicalType::Varchar(_, ..) | LogicalType::Char(_, ..)), + LogicalType::Time => { + matches!(to, LogicalType::Varchar(..) | LogicalType::Char(..)) + } LogicalType::Decimal(_, _) | LogicalType::Tuple => false, } } @@ -332,7 +334,10 @@ impl TryFrom for LogicalType { len = cmp::max(len, length); char_unit = unit; } - Ok(LogicalType::Char(len as u32, char_unit.unwrap_or(CharLengthUnits::Characters))) + Ok(LogicalType::Char( + len as u32, + char_unit.unwrap_or(CharLengthUnits::Characters), + )) } sqlparser::ast::DataType::CharVarying(varchar_len) | sqlparser::ast::DataType::CharacterVarying(varchar_len) @@ -343,7 +348,10 @@ impl TryFrom for LogicalType { len = Some(length as u32); char_unit = unit; } - Ok(LogicalType::Varchar(len, char_unit.unwrap_or(CharLengthUnits::Characters))) + Ok(LogicalType::Varchar( + len, + char_unit.unwrap_or(CharLengthUnits::Characters), + )) } sqlparser::ast::DataType::Float(_) => Ok(LogicalType::Float), sqlparser::ast::DataType::Double | sqlparser::ast::DataType::DoublePrecision => { diff --git a/src/types/tuple.rs b/src/types/tuple.rs index 97d5cd5d..5b852b96 100644 --- a/src/types/tuple.rs +++ b/src/types/tuple.rs @@ -175,8 +175,8 @@ mod tests { use crate::types::LogicalType; use itertools::Itertools; use rust_decimal::Decimal; - use std::sync::Arc; use sqlparser::ast::CharLengthUnits; + use std::sync::Arc; #[test] fn test_tuple_serialize_to_and_deserialize_from() { @@ -194,7 +194,12 @@ mod tests { Arc::new(ColumnCatalog::new( "c3".to_string(), false, - ColumnDesc::new(LogicalType::Varchar(Some(2), CharLengthUnits::Characters), false, false, None), + ColumnDesc::new( + LogicalType::Varchar(Some(2), CharLengthUnits::Characters), + false, + false, + None, + ), )), Arc::new(ColumnCatalog::new( "c4".to_string(), @@ -249,17 +254,32 @@ mod tests { Arc::new(ColumnCatalog::new( "c14".to_string(), false, - ColumnDesc::new(LogicalType::Char(1, CharLengthUnits::Characters), false, false, None), + ColumnDesc::new( + LogicalType::Char(1, CharLengthUnits::Characters), + false, + false, + None, + ), )), Arc::new(ColumnCatalog::new( "c15".to_string(), false, - ColumnDesc::new(LogicalType::Varchar(Some(2), CharLengthUnits::Octets), false, false, None), + ColumnDesc::new( + LogicalType::Varchar(Some(2), CharLengthUnits::Octets), + false, + false, + None, + ), )), Arc::new(ColumnCatalog::new( "c16".to_string(), false, - ColumnDesc::new(LogicalType::Char(1, CharLengthUnits::Octets), false, false, None), + ColumnDesc::new( + LogicalType::Char(1, CharLengthUnits::Octets), + false, + false, + None, + ), )), ]); diff --git a/src/types/tuple_builder.rs b/src/types/tuple_builder.rs index 95c16d33..40835e4b 100644 --- a/src/types/tuple_builder.rs +++ b/src/types/tuple_builder.rs @@ -1,8 +1,8 @@ use crate::errors::DatabaseError; use crate::types::tuple::{Schema, Tuple}; use crate::types::value::{DataValue, Utf8Type}; -use std::sync::Arc; use sqlparser::ast::CharLengthUnits; +use std::sync::Arc; pub struct TupleBuilder<'a> { schema: &'a Schema, diff --git a/src/types/value.rs b/src/types/value.rs index 9dac3b66..1bc1f272 100644 --- a/src/types/value.rs +++ b/src/types/value.rs @@ -14,7 +14,7 @@ use crate::errors::DatabaseError; use ordered_float::OrderedFloat; use rust_decimal::prelude::{FromPrimitive, ToPrimitive}; use serde::{Deserialize, Serialize}; -use sqlparser::ast::{CharLengthUnits}; +use sqlparser::ast::CharLengthUnits; use super::LogicalType; @@ -334,7 +334,7 @@ impl DataValue { DataValue::Utf8 { value: Some(val), ty: Utf8Type::Variable(_), - unit: CharLengthUnits::Characters + unit: CharLengthUnits::Characters, }, ) | ( @@ -342,7 +342,7 @@ impl DataValue { DataValue::Utf8 { value: Some(val), ty: Utf8Type::Fixed(_), - unit: CharLengthUnits::Characters + unit: CharLengthUnits::Characters, }, ) => Self::check_string_len(val, *len as usize, CharLengthUnits::Characters), ( @@ -350,7 +350,7 @@ impl DataValue { DataValue::Utf8 { value: Some(val), ty: Utf8Type::Variable(_), - unit: CharLengthUnits::Octets + unit: CharLengthUnits::Octets, }, ) | ( @@ -358,7 +358,7 @@ impl DataValue { DataValue::Utf8 { value: Some(val), ty: Utf8Type::Fixed(_), - unit: CharLengthUnits::Octets + unit: CharLengthUnits::Octets, }, ) => Self::check_string_len(val, *len as usize, CharLengthUnits::Octets), (LogicalType::Decimal(full_len, scale_len), DataValue::Decimal(Some(val))) => { @@ -434,12 +434,10 @@ impl DataValue { LogicalType::UBigint => DataValue::UInt64(None), LogicalType::Float => DataValue::Float32(None), LogicalType::Double => DataValue::Float64(None), - LogicalType::Char(len, unit) => { - DataValue::Utf8 { - value: None, - ty: Utf8Type::Fixed(*len), - unit: *unit, - } + LogicalType::Char(len, unit) => DataValue::Utf8 { + value: None, + ty: Utf8Type::Fixed(*len), + unit: *unit, }, LogicalType::Varchar(len, unit) => DataValue::Utf8 { value: None, @@ -469,12 +467,10 @@ impl DataValue { LogicalType::UBigint => DataValue::UInt64(Some(0)), LogicalType::Float => DataValue::Float32(Some(0.0)), LogicalType::Double => DataValue::Float64(Some(0.0)), - LogicalType::Char(len, unit) => { - DataValue::Utf8 { - value: Some(String::new()), - ty: Utf8Type::Fixed(*len), - unit: *unit, - } + LogicalType::Char(len, unit) => DataValue::Utf8 { + value: Some(String::new()), + ty: Utf8Type::Fixed(*len), + unit: *unit, }, LogicalType::Varchar(len, unit) => DataValue::Utf8 { value: Some(String::new()), @@ -559,32 +555,30 @@ impl DataValue { bytes.extend_from_slice(string_bytes); return Ok(len); } - Utf8Type::Fixed(len) => { - match unit { - CharLengthUnits::Characters => { - let chars_len = *len as usize; - let mut string_bytes = - format!("{:len$}", v, len = chars_len).into_bytes(); - let octets_len = string_bytes.len(); - - bytes.append(&mut string_bytes); - return Ok(octets_len); - } - CharLengthUnits::Octets => { - let octets_len = *len as usize; - let mut string = v.clone(); - - for _ in 0..(octets_len - string.len()) { - string.push(' ') - } - let mut string_bytes = string.into_bytes(); - - assert_eq!(octets_len, string_bytes.len()); - bytes.append(&mut string_bytes); - return Ok(octets_len); + Utf8Type::Fixed(len) => match unit { + CharLengthUnits::Characters => { + let chars_len = *len as usize; + let mut string_bytes = + format!("{:len$}", v, len = chars_len).into_bytes(); + let octets_len = string_bytes.len(); + + bytes.append(&mut string_bytes); + return Ok(octets_len); + } + CharLengthUnits::Octets => { + let octets_len = *len as usize; + let mut string = v.clone(); + + for _ in 0..(octets_len - string.len()) { + string.push(' ') } + let mut string_bytes = string.into_bytes(); + + assert_eq!(octets_len, string_bytes.len()); + bytes.append(&mut string_bytes); + return Ok(octets_len); } - } + }, } } } @@ -712,16 +706,12 @@ impl DataValue { ty: Utf8Type::Variable(len), unit, .. - } => { - LogicalType::Varchar(*len, *unit) - }, + } => LogicalType::Varchar(*len, *unit), DataValue::Utf8 { ty: Utf8Type::Fixed(len), unit, .. - } => { - LogicalType::Char(*len, *unit) - }, + } => LogicalType::Char(*len, *unit), DataValue::Date32(_) => LogicalType::Date, DataValue::Date64(_) => LogicalType::DateTime, DataValue::Time(_) => LogicalType::Time, @@ -860,13 +850,11 @@ impl DataValue { LogicalType::UBigint => Ok(DataValue::UInt64(None)), LogicalType::Float => Ok(DataValue::Float32(None)), LogicalType::Double => Ok(DataValue::Float64(None)), - LogicalType::Char(len, unit) => { - Ok(DataValue::Utf8 { - value: None, - ty: Utf8Type::Fixed(*len), - unit: *unit, - }) - }, + LogicalType::Char(len, unit) => Ok(DataValue::Utf8 { + value: None, + ty: Utf8Type::Fixed(*len), + unit: *unit, + }), LogicalType::Varchar(len, unit) => Ok(DataValue::Utf8 { value: None, ty: Utf8Type::Variable(*len), @@ -891,16 +879,24 @@ impl DataValue { LogicalType::UBigint => Ok(DataValue::UInt64(value.map(|v| v.into()))), LogicalType::Float => Ok(DataValue::Float32(value.map(|v| v.into()))), LogicalType::Double => Ok(DataValue::Float64(value.map(|v| v.into()))), - LogicalType::Char(len, unit) => varchar_cast!(value, Some(len), Utf8Type::Fixed(*len), *unit), - LogicalType::Varchar(len, unit) => varchar_cast!(value, len, Utf8Type::Variable(*len), *unit), + LogicalType::Char(len, unit) => { + varchar_cast!(value, Some(len), Utf8Type::Fixed(*len), *unit) + } + LogicalType::Varchar(len, unit) => { + varchar_cast!(value, len, Utf8Type::Variable(*len), *unit) + } _ => Err(DatabaseError::CastFail), }, DataValue::Float32(value) => match to { LogicalType::SqlNull => Ok(DataValue::Null), LogicalType::Float => Ok(DataValue::Float32(value)), LogicalType::Double => Ok(DataValue::Float64(value.map(|v| v.into()))), - LogicalType::Char(len, unit) => varchar_cast!(value, Some(len), Utf8Type::Fixed(*len), *unit), - LogicalType::Varchar(len, unit) => varchar_cast!(value, len, Utf8Type::Variable(*len), *unit), + LogicalType::Char(len, unit) => { + varchar_cast!(value, Some(len), Utf8Type::Fixed(*len), *unit) + } + LogicalType::Varchar(len, unit) => { + varchar_cast!(value, len, Utf8Type::Variable(*len), *unit) + } LogicalType::Decimal(_, option) => Ok(DataValue::Decimal( value .map(|v| { @@ -918,8 +914,12 @@ impl DataValue { LogicalType::SqlNull => Ok(DataValue::Null), LogicalType::Float => Ok(DataValue::Float32(value.map(|v| v as f32))), LogicalType::Double => Ok(DataValue::Float64(value)), - LogicalType::Char(len, unit) => varchar_cast!(value, Some(len), Utf8Type::Fixed(*len), *unit), - LogicalType::Varchar(len, unit) => varchar_cast!(value, len, Utf8Type::Variable(*len), *unit), + LogicalType::Char(len, unit) => { + varchar_cast!(value, Some(len), Utf8Type::Fixed(*len), *unit) + } + LogicalType::Varchar(len, unit) => { + varchar_cast!(value, len, Utf8Type::Variable(*len), *unit) + } LogicalType::Decimal(_, option) => Ok(DataValue::Decimal( value .map(|v| { @@ -951,8 +951,12 @@ impl DataValue { LogicalType::Bigint => Ok(DataValue::Int64(value.map(|v| v.into()))), LogicalType::Float => Ok(DataValue::Float32(value.map(|v| v.into()))), LogicalType::Double => Ok(DataValue::Float64(value.map(|v| v.into()))), - LogicalType::Char(len, unit) => varchar_cast!(value, Some(len), Utf8Type::Fixed(*len), *unit), - LogicalType::Varchar(len, unit) => varchar_cast!(value, len, Utf8Type::Variable(*len), *unit), + LogicalType::Char(len, unit) => { + varchar_cast!(value, Some(len), Utf8Type::Fixed(*len), *unit) + } + LogicalType::Varchar(len, unit) => { + varchar_cast!(value, len, Utf8Type::Variable(*len), *unit) + } LogicalType::Decimal(_, option) => Ok(DataValue::Decimal(value.map(|v| { let mut decimal = Decimal::from(v); Self::decimal_round_i(option, &mut decimal); @@ -979,8 +983,12 @@ impl DataValue { LogicalType::Bigint => Ok(DataValue::Int64(value.map(|v| v.into()))), LogicalType::Float => Ok(DataValue::Float32(value.map(|v| v.into()))), LogicalType::Double => Ok(DataValue::Float64(value.map(|v| v.into()))), - LogicalType::Char(len, unit) => varchar_cast!(value, Some(len), Utf8Type::Fixed(*len), *unit), - LogicalType::Varchar(len, unit) => varchar_cast!(value, len, Utf8Type::Variable(*len), *unit), + LogicalType::Char(len, unit) => { + varchar_cast!(value, Some(len), Utf8Type::Fixed(*len), *unit) + } + LogicalType::Varchar(len, unit) => { + varchar_cast!(value, len, Utf8Type::Variable(*len), *unit) + } LogicalType::Decimal(_, option) => Ok(DataValue::Decimal(value.map(|v| { let mut decimal = Decimal::from(v); Self::decimal_round_i(option, &mut decimal); @@ -1006,8 +1014,12 @@ impl DataValue { LogicalType::Bigint => Ok(DataValue::Int64(value.map(|v| v.into()))), LogicalType::Float => Ok(DataValue::Float32(value.map(|v| v as f32))), LogicalType::Double => Ok(DataValue::Float64(value.map(|v| v.into()))), - LogicalType::Char(len, unit) => varchar_cast!(value, Some(len), Utf8Type::Fixed(*len), *unit), - LogicalType::Varchar(len, unit) => varchar_cast!(value, len, Utf8Type::Variable(*len), *unit), + LogicalType::Char(len, unit) => { + varchar_cast!(value, Some(len), Utf8Type::Fixed(*len), *unit) + } + LogicalType::Varchar(len, unit) => { + varchar_cast!(value, len, Utf8Type::Variable(*len), *unit) + } LogicalType::Decimal(_, option) => Ok(DataValue::Decimal(value.map(|v| { let mut decimal = Decimal::from(v); Self::decimal_round_i(option, &mut decimal); @@ -1032,8 +1044,12 @@ impl DataValue { LogicalType::Bigint => Ok(DataValue::Int64(value)), LogicalType::Float => Ok(DataValue::Float32(value.map(|v| v as f32))), LogicalType::Double => Ok(DataValue::Float64(value.map(|v| v as f64))), - LogicalType::Char(len, unit) => varchar_cast!(value, Some(len), Utf8Type::Fixed(*len), *unit), - LogicalType::Varchar(len, unit) => varchar_cast!(value, len, Utf8Type::Variable(*len), *unit), + LogicalType::Char(len, unit) => { + varchar_cast!(value, Some(len), Utf8Type::Fixed(*len), *unit) + } + LogicalType::Varchar(len, unit) => { + varchar_cast!(value, len, Utf8Type::Variable(*len), *unit) + } LogicalType::Decimal(_, option) => Ok(DataValue::Decimal(value.map(|v| { let mut decimal = Decimal::from(v); Self::decimal_round_i(option, &mut decimal); @@ -1054,8 +1070,12 @@ impl DataValue { LogicalType::UBigint => Ok(DataValue::UInt64(value.map(|v| v.into()))), LogicalType::Float => Ok(DataValue::Float32(value.map(|v| v.into()))), LogicalType::Double => Ok(DataValue::Float64(value.map(|v| v.into()))), - LogicalType::Char(len, unit) => varchar_cast!(value, Some(len), Utf8Type::Fixed(*len), *unit), - LogicalType::Varchar(len, unit) => varchar_cast!(value, len, Utf8Type::Variable(*len), *unit), + LogicalType::Char(len, unit) => { + varchar_cast!(value, Some(len), Utf8Type::Fixed(*len), *unit) + } + LogicalType::Varchar(len, unit) => { + varchar_cast!(value, len, Utf8Type::Variable(*len), *unit) + } LogicalType::Decimal(_, option) => Ok(DataValue::Decimal(value.map(|v| { let mut decimal = Decimal::from(v); Self::decimal_round_i(option, &mut decimal); @@ -1074,8 +1094,12 @@ impl DataValue { LogicalType::UBigint => Ok(DataValue::UInt64(value.map(|v| v.into()))), LogicalType::Float => Ok(DataValue::Float32(value.map(|v| v.into()))), LogicalType::Double => Ok(DataValue::Float64(value.map(|v| v.into()))), - LogicalType::Char(len, unit) => varchar_cast!(value, Some(len), Utf8Type::Fixed(*len), *unit), - LogicalType::Varchar(len, unit) => varchar_cast!(value, len, Utf8Type::Variable(*len), *unit), + LogicalType::Char(len, unit) => { + varchar_cast!(value, Some(len), Utf8Type::Fixed(*len), *unit) + } + LogicalType::Varchar(len, unit) => { + varchar_cast!(value, len, Utf8Type::Variable(*len), *unit) + } LogicalType::Decimal(_, option) => Ok(DataValue::Decimal(value.map(|v| { let mut decimal = Decimal::from(v); Self::decimal_round_i(option, &mut decimal); @@ -1092,8 +1116,12 @@ impl DataValue { LogicalType::UBigint => Ok(DataValue::UInt64(value.map(|v| v.into()))), LogicalType::Float => Ok(DataValue::Float32(value.map(|v| v as f32))), LogicalType::Double => Ok(DataValue::Float64(value.map(|v| v.into()))), - LogicalType::Char(len, unit) => varchar_cast!(value, Some(len), Utf8Type::Fixed(*len), *unit), - LogicalType::Varchar(len, unit) => varchar_cast!(value, len, Utf8Type::Variable(*len), *unit), + LogicalType::Char(len, unit) => { + varchar_cast!(value, Some(len), Utf8Type::Fixed(*len), *unit) + } + LogicalType::Varchar(len, unit) => { + varchar_cast!(value, len, Utf8Type::Variable(*len), *unit) + } LogicalType::Decimal(_, option) => Ok(DataValue::Decimal(value.map(|v| { let mut decimal = Decimal::from(v); Self::decimal_round_i(option, &mut decimal); @@ -1108,8 +1136,12 @@ impl DataValue { LogicalType::UBigint => Ok(DataValue::UInt64(value)), LogicalType::Float => Ok(DataValue::Float32(value.map(|v| v as f32))), LogicalType::Double => Ok(DataValue::Float64(value.map(|v| v as f64))), - LogicalType::Char(len, unit) => varchar_cast!(value, Some(len), Utf8Type::Fixed(*len), *unit), - LogicalType::Varchar(len, unit) => varchar_cast!(value, len, Utf8Type::Variable(*len), *unit), + LogicalType::Char(len, unit) => { + varchar_cast!(value, Some(len), Utf8Type::Fixed(*len), *unit) + } + LogicalType::Varchar(len, unit) => { + varchar_cast!(value, len, Utf8Type::Variable(*len), *unit) + } LogicalType::Decimal(_, option) => Ok(DataValue::Decimal(value.map(|v| { let mut decimal = Decimal::from(v); Self::decimal_round_i(option, &mut decimal); @@ -1155,8 +1187,12 @@ impl DataValue { LogicalType::Double => Ok(DataValue::Float64( value.map(|v| f64::from_str(&v)).transpose()?, )), - LogicalType::Char(len, unit) => varchar_cast!(value, Some(len), Utf8Type::Fixed(*len), *unit), - LogicalType::Varchar(len, unit) => varchar_cast!(value, len, Utf8Type::Variable(*len), *unit), + LogicalType::Char(len, unit) => { + varchar_cast!(value, Some(len), Utf8Type::Fixed(*len), *unit) + } + LogicalType::Varchar(len, unit) => { + varchar_cast!(value, len, Utf8Type::Variable(*len), *unit) + } LogicalType::Date => { let option = value .map(|v| { @@ -1199,10 +1235,20 @@ impl DataValue { DataValue::Date32(value) => match to { LogicalType::SqlNull => Ok(DataValue::Null), LogicalType::Char(len, unit) => { - varchar_cast!(Self::format_date(value), Some(len), Utf8Type::Fixed(*len), *unit) + varchar_cast!( + Self::format_date(value), + Some(len), + Utf8Type::Fixed(*len), + *unit + ) } LogicalType::Varchar(len, unit) => { - varchar_cast!(Self::format_date(value), len, Utf8Type::Variable(*len), *unit) + varchar_cast!( + Self::format_date(value), + len, + Utf8Type::Variable(*len), + *unit + ) } LogicalType::Date => Ok(DataValue::Date32(value)), LogicalType::DateTime => { @@ -1227,7 +1273,12 @@ impl DataValue { ) } LogicalType::Varchar(len, unit) => { - varchar_cast!(Self::format_datetime(value), len, Utf8Type::Variable(*len), *unit) + varchar_cast!( + Self::format_datetime(value), + len, + Utf8Type::Variable(*len), + *unit + ) } LogicalType::Date => { let option = value.and_then(|v| { @@ -1251,10 +1302,20 @@ impl DataValue { DataValue::Time(value) => match to { LogicalType::SqlNull => Ok(DataValue::Null), LogicalType::Char(len, unit) => { - varchar_cast!(Self::format_time(value), Some(len), Utf8Type::Fixed(*len), *unit) + varchar_cast!( + Self::format_time(value), + Some(len), + Utf8Type::Fixed(*len), + *unit + ) } LogicalType::Varchar(len, unit) => { - varchar_cast!(Self::format_time(value), len, Utf8Type::Variable(*len), *unit) + varchar_cast!( + Self::format_time(value), + len, + Utf8Type::Variable(*len), + *unit + ) } _ => Err(DatabaseError::CastFail), }, @@ -1263,8 +1324,12 @@ impl DataValue { LogicalType::Float => Ok(DataValue::Float32(value.and_then(|v| v.to_f32()))), LogicalType::Double => Ok(DataValue::Float64(value.and_then(|v| v.to_f64()))), LogicalType::Decimal(_, _) => Ok(DataValue::Decimal(value)), - LogicalType::Char(len, unit) => varchar_cast!(value, Some(len), Utf8Type::Fixed(*len), *unit), - LogicalType::Varchar(len, unit) => varchar_cast!(value, len, Utf8Type::Variable(*len), *unit), + LogicalType::Char(len, unit) => { + varchar_cast!(value, Some(len), Utf8Type::Fixed(*len), *unit) + } + LogicalType::Varchar(len, unit) => { + varchar_cast!(value, len, Utf8Type::Variable(*len), *unit) + } _ => Err(DatabaseError::CastFail), }, DataValue::Tuple(values) => match to { From d019641591439b23d36cc5f24a92227c2799ea28 Mon Sep 17 00:00:00 2001 From: Kould <2435992353@qq.com> Date: Thu, 28 Mar 2024 02:14:56 +0800 Subject: [PATCH 3/5] docs: add ospp icon --- README.md | 1 + 1 file changed, 1 insertion(+) diff --git a/README.md b/README.md index 1a03ef85..2f2cc062 100755 --- a/README.md +++ b/README.md @@ -24,6 +24,7 @@ Built by @KipData   +

From 706a92aa7f629b4fbf853839b1c4a8eb331235a0 Mon Sep 17 00:00:00 2001 From: Kould <2435992353@qq.com> Date: Thu, 28 Mar 2024 02:16:33 +0800 Subject: [PATCH 4/5] docs: change icons place --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 2f2cc062..e97a946a 100755 --- a/README.md +++ b/README.md @@ -17,6 +17,7 @@ Built by @KipData

+   CI   @@ -24,7 +25,6 @@ Built by @KipData   -

From 317d76443053b6c227408b7befff2be2ce53c6f8 Mon Sep 17 00:00:00 2001 From: Kould <2435992353@qq.com> Date: Thu, 28 Mar 2024 15:55:30 +0800 Subject: [PATCH 5/5] style: while push -> resize on `src/types/values.rs` --- src/types/value.rs | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/src/types/value.rs b/src/types/value.rs index 1bc1f272..0768b46c 100644 --- a/src/types/value.rs +++ b/src/types/value.rs @@ -567,13 +567,9 @@ impl DataValue { } CharLengthUnits::Octets => { let octets_len = *len as usize; - let mut string = v.clone(); - - for _ in 0..(octets_len - string.len()) { - string.push(' ') - } - let mut string_bytes = string.into_bytes(); + let mut string_bytes = v.clone().into_bytes(); + string_bytes.resize(octets_len, b' '); assert_eq!(octets_len, string_bytes.len()); bytes.append(&mut string_bytes); return Ok(octets_len);