From 126c6c86b7548cfc539616858e51b613a279b2e2 Mon Sep 17 00:00:00 2001 From: Kould <2435992353@qq.com> Date: Thu, 28 Mar 2024 17:12:23 +0800 Subject: [PATCH] feat: support `Octets` for `Char/Varchar` (#184) * feat: support `Octets` for `Char/Varchar` * style: code fmt & fix `Char/Varchar` on Server * docs: add ospp icon * docs: change icons place * style: while push -> resize on `src/types/values.rs` --- Cargo.toml | 2 +- README.md | 1 + src/bin/server.rs | 6 +- src/binder/create_table.rs | 8 +- src/binder/expr.rs | 12 +- src/catalog/column.rs | 8 +- src/execution/volcano/dml/analyze.rs | 4 +- src/execution/volcano/dml/copy_from_file.rs | 8 +- src/execution/volcano/dql/describe.rs | 25 +- src/execution/volcano/dql/explain.rs | 4 +- src/execution/volcano/dql/show_table.rs | 4 +- src/expression/evaluator.rs | 17 +- src/expression/mod.rs | 8 +- src/expression/value_compute.rs | 84 ++++-- src/marcos/mod.rs | 14 +- src/optimizer/core/histogram.rs | 2 +- .../rule/normalization/column_pruning.rs | 4 +- src/storage/table_codec.rs | 2 +- src/types/mod.rs | 77 ++--- src/types/tuple.rs | 67 ++++- src/types/tuple_builder.rs | 7 +- src/types/value.rs | 276 +++++++++++++----- tests/slt/char.slt | 47 +++ tests/slt/sql_2016/E021_01.slt | 10 +- tests/slt/sql_2016/E021_02.slt | 15 +- 25 files changed, 517 insertions(+), 195 deletions(-) create mode 100644 tests/slt/char.slt diff --git a/Cargo.toml b/Cargo.toml index 6d8125f0..5f854b1c 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -58,7 +58,7 @@ regex = { version = "1.10.3" } rust_decimal = { version = "1.34.3" } serde = { version = "1.0.197", features = ["derive", "rc"] } siphasher = { version = "1.0.0", features = ["serde"] } -sqlparser = { version = "0.34.0" } +sqlparser = { version = "0.34.0", features = ["serde"] } strum_macros = { version = "0.26.2" } thiserror = { version = "1.0.58" } tokio = { version = "1.36.0", features = ["full"] } diff --git a/README.md b/README.md index 1a03ef85..e97a946a 100755 --- a/README.md +++ b/README.md @@ -17,6 +17,7 @@ Built by @KipData

+   CI   diff --git a/src/bin/server.rs b/src/bin/server.rs index 70747931..4c8450b8 100644 --- a/src/bin/server.rs +++ b/src/bin/server.rs @@ -198,7 +198,7 @@ fn encode_tuples<'a>(schema: &Schema, tuples: Vec) -> PgWireResult encoder.encode_field(&value.u64().map(|v| v as i64)), LogicalType::Float => encoder.encode_field(&value.float()), LogicalType::Double => encoder.encode_field(&value.double()), - LogicalType::Char(_) | LogicalType::Varchar(_) => { + LogicalType::Char(..) | LogicalType::Varchar(..) => { encoder.encode_field(&value.utf8()) } LogicalType::Date => encoder.encode_field(&value.date()), @@ -225,9 +225,9 @@ fn into_pg_type(data_type: &LogicalType) -> PgWireResult { LogicalType::Bigint | LogicalType::UBigint => Type::INT8, LogicalType::Float => Type::FLOAT4, LogicalType::Double => Type::FLOAT8, - LogicalType::Varchar(_) => Type::VARCHAR, + LogicalType::Varchar(..) => Type::VARCHAR, LogicalType::Date | LogicalType::DateTime => Type::DATE, - LogicalType::Char(_) => Type::CHAR, + LogicalType::Char(..) => Type::CHAR, LogicalType::Time => Type::TIME, LogicalType::Decimal(_, _) => todo!(), _ => { diff --git a/src/binder/create_table.rs b/src/binder/create_table.rs index 70717fcc..9af59f0f 100644 --- a/src/binder/create_table.rs +++ b/src/binder/create_table.rs @@ -146,6 +146,7 @@ mod tests { use crate::storage::kip::KipStorage; use crate::storage::Storage; use crate::types::LogicalType; + use sqlparser::ast::CharLengthUnits; use std::sync::atomic::AtomicUsize; use tempfile::TempDir; @@ -177,7 +178,12 @@ mod tests { assert_eq!(op.columns[1].nullable, true); assert_eq!( op.columns[1].desc, - ColumnDesc::new(LogicalType::Varchar(Some(10)), false, false, None) + ColumnDesc::new( + LogicalType::Varchar(Some(10), CharLengthUnits::Characters), + false, + false, + None + ) ); } _ => unreachable!(), diff --git a/src/binder/expr.rs b/src/binder/expr.rs index 3e8e0275..71228dc3 100644 --- a/src/binder/expr.rs +++ b/src/binder/expr.rs @@ -4,8 +4,8 @@ use crate::expression; use crate::expression::agg::AggKind; use itertools::Itertools; use sqlparser::ast::{ - BinaryOperator, DataType, Expr, Function, FunctionArg, FunctionArgExpr, Ident, Query, - UnaryOperator, + BinaryOperator, CharLengthUnits, DataType, Expr, Function, FunctionArg, FunctionArgExpr, Ident, + Query, UnaryOperator, }; use std::slice; use std::sync::Arc; @@ -69,7 +69,8 @@ impl<'a, T: Transaction> Binder<'a, T> { let logical_type = LogicalType::try_from(data_type.clone())?; let value = DataValue::Utf8 { value: Some(value.to_string()), - ty: Utf8Type::Variable, + ty: Utf8Type::Variable(None), + unit: CharLengthUnits::Characters, } .cast(&logical_type)?; @@ -354,7 +355,7 @@ impl<'a, T: Transaction> Binder<'a, T> { | BinaryOperator::And | BinaryOperator::Or | BinaryOperator::Xor => LogicalType::Boolean, - BinaryOperator::StringConcat => LogicalType::Varchar(None), + BinaryOperator::StringConcat => LogicalType::Varchar(None, CharLengthUnits::Characters), _ => todo!(), }; @@ -603,7 +604,8 @@ impl<'a, T: Transaction> Binder<'a, T> { fn wildcard_expr() -> ScalarExpression { ScalarExpression::Constant(Arc::new(DataValue::Utf8 { value: Some("*".to_string()), - ty: Utf8Type::Variable, + ty: Utf8Type::Variable(None), + unit: CharLengthUnits::Characters, })) } } diff --git a/src/catalog/column.rs b/src/catalog/column.rs index 32d7653f..258536fd 100644 --- a/src/catalog/column.rs +++ b/src/catalog/column.rs @@ -2,6 +2,7 @@ use crate::catalog::TableName; use crate::errors::DatabaseError; use crate::expression::ScalarExpression; use serde::{Deserialize, Serialize}; +use sqlparser::ast::CharLengthUnits; use std::hash::Hash; use std::sync::Arc; @@ -50,7 +51,12 @@ impl ColumnCatalog { table_name: None, }, nullable: true, - desc: ColumnDesc::new(LogicalType::Varchar(None), false, false, None), + desc: ColumnDesc::new( + LogicalType::Varchar(None, CharLengthUnits::Characters), + false, + false, + None, + ), } } diff --git a/src/execution/volcano/dml/analyze.rs b/src/execution/volcano/dml/analyze.rs index ae8b6a8f..8e1a6178 100644 --- a/src/execution/volcano/dml/analyze.rs +++ b/src/execution/volcano/dml/analyze.rs @@ -12,6 +12,7 @@ use crate::types::tuple::Tuple; use crate::types::value::{DataValue, Utf8Type}; use futures_async_stream::try_stream; use itertools::Itertools; +use sqlparser::ast::CharLengthUnits; use std::fmt::Formatter; use std::sync::Arc; use std::time::{SystemTime, UNIX_EPOCH}; @@ -108,7 +109,8 @@ impl Analyze { meta.to_file(&path)?; values.push(Arc::new(DataValue::Utf8 { value: Some(path.clone()), - ty: Utf8Type::Variable, + ty: Utf8Type::Variable(None), + unit: CharLengthUnits::Characters, })); transaction.save_table_meta(&table_name, path, meta)?; } diff --git a/src/execution/volcano/dml/copy_from_file.rs b/src/execution/volcano/dml/copy_from_file.rs index 935ebc46..3a8e7d4e 100644 --- a/src/execution/volcano/dml/copy_from_file.rs +++ b/src/execution/volcano/dml/copy_from_file.rs @@ -105,6 +105,7 @@ mod tests { use crate::catalog::{ColumnCatalog, ColumnDesc, ColumnSummary}; use crate::db::DataBaseBuilder; use futures::StreamExt; + use sqlparser::ast::CharLengthUnits; use std::io::Write; use std::sync::Arc; use tempfile::TempDir; @@ -148,7 +149,12 @@ mod tests { table_name: None, }, nullable: false, - desc: ColumnDesc::new(LogicalType::Varchar(Some(10)), false, false, None), + desc: ColumnDesc::new( + LogicalType::Varchar(Some(10), CharLengthUnits::Characters), + false, + false, + None, + ), }), ]; diff --git a/src/execution/volcano/dql/describe.rs b/src/execution/volcano/dql/describe.rs index fb27ce64..814a7145 100644 --- a/src/execution/volcano/dql/describe.rs +++ b/src/execution/volcano/dql/describe.rs @@ -7,20 +7,24 @@ use crate::types::tuple::Tuple; use crate::types::value::{DataValue, Utf8Type, ValueRef}; use futures_async_stream::try_stream; use lazy_static::lazy_static; +use sqlparser::ast::CharLengthUnits; use std::sync::Arc; lazy_static! { static ref PRIMARY_KEY_TYPE: ValueRef = Arc::new(DataValue::Utf8 { value: Some(String::from("PRIMARY")), - ty: Utf8Type::Variable + ty: Utf8Type::Variable(None), + unit: CharLengthUnits::Characters }); static ref UNIQUE_KEY_TYPE: ValueRef = Arc::new(DataValue::Utf8 { value: Some(String::from("UNIQUE")), - ty: Utf8Type::Variable + ty: Utf8Type::Variable(None), + unit: CharLengthUnits::Characters }); static ref EMPTY_KEY_TYPE: ValueRef = Arc::new(DataValue::Utf8 { value: Some(String::from("EMPTY")), - ty: Utf8Type::Variable + ty: Utf8Type::Variable(None), + unit: CharLengthUnits::Characters }); } @@ -69,24 +73,29 @@ impl Describe { let values = vec![ Arc::new(DataValue::Utf8 { value: Some(column.name().to_string()), - ty: Utf8Type::Variable, + ty: Utf8Type::Variable(None), + unit: CharLengthUnits::Characters, }), Arc::new(DataValue::Utf8 { value: Some(datatype.to_string()), - ty: Utf8Type::Variable, + ty: Utf8Type::Variable(None), + unit: CharLengthUnits::Characters, }), Arc::new(DataValue::Utf8 { value: datatype.raw_len().map(|len| len.to_string()), - ty: Utf8Type::Variable, + ty: Utf8Type::Variable(None), + unit: CharLengthUnits::Characters, }), Arc::new(DataValue::Utf8 { value: Some(column.nullable.to_string()), - ty: Utf8Type::Variable, + ty: Utf8Type::Variable(None), + unit: CharLengthUnits::Characters, }), key_fn(column), Arc::new(DataValue::Utf8 { value: Some(default), - ty: Utf8Type::Variable, + ty: Utf8Type::Variable(None), + unit: CharLengthUnits::Characters, }), ]; yield Tuple { id: None, values }; diff --git a/src/execution/volcano/dql/explain.rs b/src/execution/volcano/dql/explain.rs index f1dbb3ca..162657ef 100644 --- a/src/execution/volcano/dql/explain.rs +++ b/src/execution/volcano/dql/explain.rs @@ -5,6 +5,7 @@ use crate::storage::Transaction; use crate::types::tuple::Tuple; use crate::types::value::{DataValue, Utf8Type}; use futures_async_stream::try_stream; +use sqlparser::ast::CharLengthUnits; use std::sync::Arc; pub struct Explain { @@ -28,7 +29,8 @@ impl Explain { pub async fn _execute(self) { let values = vec![Arc::new(DataValue::Utf8 { value: Some(self.plan.explain(0)), - ty: Utf8Type::Variable, + ty: Utf8Type::Variable(None), + unit: CharLengthUnits::Characters, })]; yield Tuple { id: None, values }; diff --git a/src/execution/volcano/dql/show_table.rs b/src/execution/volcano/dql/show_table.rs index f64b6dd1..b9862f08 100644 --- a/src/execution/volcano/dql/show_table.rs +++ b/src/execution/volcano/dql/show_table.rs @@ -5,6 +5,7 @@ use crate::storage::Transaction; use crate::types::tuple::Tuple; use crate::types::value::{DataValue, Utf8Type}; use futures_async_stream::try_stream; +use sqlparser::ast::CharLengthUnits; use std::sync::Arc; pub struct ShowTables; @@ -23,7 +24,8 @@ impl ShowTables { for TableMeta { table_name } in metas { let values = vec![Arc::new(DataValue::Utf8 { value: Some(table_name.to_string()), - ty: Utf8Type::Variable, + ty: Utf8Type::Variable(None), + unit: CharLengthUnits::Characters, })]; yield Tuple { id: None, values }; diff --git a/src/expression/evaluator.rs b/src/expression/evaluator.rs index f97a1d4c..6eea7aee 100644 --- a/src/expression/evaluator.rs +++ b/src/expression/evaluator.rs @@ -7,6 +7,7 @@ use crate::types::value::{DataValue, Utf8Type, ValueRef}; use crate::types::LogicalType; use itertools::Itertools; use lazy_static::lazy_static; +use sqlparser::ast::CharLengthUnits; use std::cmp; use std::cmp::Ordering; use std::sync::Arc; @@ -25,7 +26,8 @@ macro_rules! eval_to_num { } else { return Ok(Arc::new(DataValue::Utf8 { value: None, - ty: Utf8Type::Variable, + ty: Utf8Type::Variable(None), + unit: CharLengthUnits::Characters, })); } }; @@ -156,7 +158,7 @@ impl ScalarExpression { from_expr, } => { if let Some(mut string) = DataValue::clone(expr.eval(tuple, schema)?.as_ref()) - .cast(&LogicalType::Varchar(None))? + .cast(&LogicalType::Varchar(None, CharLengthUnits::Characters))? .utf8() { if let Some(from_expr) = from_expr { @@ -169,7 +171,8 @@ impl ScalarExpression { if from > len_i { return Ok(Arc::new(DataValue::Utf8 { value: None, - ty: Utf8Type::Variable, + ty: Utf8Type::Variable(None), + unit: CharLengthUnits::Characters, })); } string = string.split_off(from as usize); @@ -182,19 +185,21 @@ impl ScalarExpression { Ok(Arc::new(DataValue::Utf8 { value: Some(string), - ty: Utf8Type::Variable, + ty: Utf8Type::Variable(None), + unit: CharLengthUnits::Characters, })) } else { Ok(Arc::new(DataValue::Utf8 { value: None, - ty: Utf8Type::Variable, + ty: Utf8Type::Variable(None), + unit: CharLengthUnits::Characters, })) } } ScalarExpression::Position { expr, in_expr } => { let unpack = |expr: &ScalarExpression| -> Result { Ok(DataValue::clone(expr.eval(tuple, schema)?.as_ref()) - .cast(&LogicalType::Varchar(None))? + .cast(&LogicalType::Varchar(None, CharLengthUnits::Characters))? .utf8() .unwrap_or("".to_owned())) }; diff --git a/src/expression/mod.rs b/src/expression/mod.rs index ab97c43c..3a7dc0c9 100644 --- a/src/expression/mod.rs +++ b/src/expression/mod.rs @@ -5,7 +5,9 @@ use std::hash::Hash; use std::sync::Arc; use std::{fmt, mem}; -use sqlparser::ast::{BinaryOperator as SqlBinaryOperator, UnaryOperator as SqlUnaryOperator}; +use sqlparser::ast::{ + BinaryOperator as SqlBinaryOperator, CharLengthUnits, UnaryOperator as SqlUnaryOperator, +}; use self::agg::AggKind; use crate::catalog::{ColumnCatalog, ColumnDesc, ColumnRef}; @@ -394,7 +396,9 @@ impl ScalarExpression { ScalarExpression::IsNull { .. } | ScalarExpression::In { .. } | ScalarExpression::Between { .. } => LogicalType::Boolean, - ScalarExpression::SubString { .. } => LogicalType::Varchar(None), + ScalarExpression::SubString { .. } => { + LogicalType::Varchar(None, CharLengthUnits::Characters) + } ScalarExpression::Position { .. } => LogicalType::Integer, ScalarExpression::Alias { expr, .. } | ScalarExpression::Reference { expr, .. } => { expr.return_type() diff --git a/src/expression/value_compute.rs b/src/expression/value_compute.rs index 3ceb7031..297f2905 100644 --- a/src/expression/value_compute.rs +++ b/src/expression/value_compute.rs @@ -3,6 +3,7 @@ use crate::expression::{BinaryOperator, UnaryOperator}; use crate::types::value::{DataValue, Utf8Type, ValueRef}; use crate::types::LogicalType; use regex::Regex; +use sqlparser::ast::CharLengthUnits; use std::cmp::Ordering; fn unpack_bool(value: DataValue) -> Option { @@ -193,8 +194,15 @@ impl DataValue { op: &BinaryOperator, ) -> Result { if let BinaryOperator::Like(escape_char) | BinaryOperator::NotLike(escape_char) = op { - let value_option = unpack_utf8(self.clone().cast(&LogicalType::Varchar(None))?); - let pattern_option = unpack_utf8(right.clone().cast(&LogicalType::Varchar(None))?); + let value_option = unpack_utf8( + self.clone() + .cast(&LogicalType::Varchar(None, CharLengthUnits::Characters))?, + ); + let pattern_option = unpack_utf8( + right + .clone() + .cast(&LogicalType::Varchar(None, CharLengthUnits::Characters))?, + ); let mut is_match = if let (Some(value), Some(pattern)) = (value_option, pattern_option) { @@ -511,7 +519,7 @@ impl DataValue { _ => return Err(DatabaseError::UnsupportedBinaryOperator(unified_type, *op)), } } - LogicalType::Varchar(_) | LogicalType::Char(_) => { + LogicalType::Varchar(_, _) | LogicalType::Char(_, _) => { let left_value = unpack_utf8(self.clone().cast(&unified_type)?); let right_value = unpack_utf8(right.clone().cast(&unified_type)?); @@ -576,7 +584,8 @@ impl DataValue { DataValue::Utf8 { value, - ty: Utf8Type::Variable, + ty: Utf8Type::Variable(None), + unit: CharLengthUnits::Characters, } } _ => return Err(DatabaseError::UnsupportedBinaryOperator(unified_type, *op)), @@ -652,6 +661,7 @@ mod test { use crate::errors::DatabaseError; use crate::expression::BinaryOperator; use crate::types::value::{DataValue, Utf8Type}; + use sqlparser::ast::CharLengthUnits; #[test] fn test_binary_op_arithmetic_plus() -> Result<(), DatabaseError> { @@ -1544,11 +1554,13 @@ mod test { DataValue::binary_op( &DataValue::Utf8 { value: Some("a".to_string()), - ty: Utf8Type::Variable + ty: Utf8Type::Variable(None), + unit: CharLengthUnits::Characters, }, &DataValue::Utf8 { value: Some("b".to_string()), - ty: Utf8Type::Variable + ty: Utf8Type::Variable(None), + unit: CharLengthUnits::Characters, }, &BinaryOperator::Gt )?, @@ -1558,11 +1570,13 @@ mod test { DataValue::binary_op( &DataValue::Utf8 { value: Some("a".to_string()), - ty: Utf8Type::Variable + ty: Utf8Type::Variable(None), + unit: CharLengthUnits::Characters, }, &DataValue::Utf8 { value: Some("b".to_string()), - ty: Utf8Type::Variable + ty: Utf8Type::Variable(None), + unit: CharLengthUnits::Characters, }, &BinaryOperator::Lt )?, @@ -1572,11 +1586,13 @@ mod test { DataValue::binary_op( &DataValue::Utf8 { value: Some("a".to_string()), - ty: Utf8Type::Variable + ty: Utf8Type::Variable(None), + unit: CharLengthUnits::Characters, }, &DataValue::Utf8 { value: Some("a".to_string()), - ty: Utf8Type::Variable + ty: Utf8Type::Variable(None), + unit: CharLengthUnits::Characters, }, &BinaryOperator::GtEq )?, @@ -1586,11 +1602,13 @@ mod test { DataValue::binary_op( &DataValue::Utf8 { value: Some("a".to_string()), - ty: Utf8Type::Variable + ty: Utf8Type::Variable(None), + unit: CharLengthUnits::Characters, }, &DataValue::Utf8 { value: Some("a".to_string()), - ty: Utf8Type::Variable + ty: Utf8Type::Variable(None), + unit: CharLengthUnits::Characters, }, &BinaryOperator::LtEq )?, @@ -1600,11 +1618,13 @@ mod test { DataValue::binary_op( &DataValue::Utf8 { value: Some("a".to_string()), - ty: Utf8Type::Variable + ty: Utf8Type::Variable(None), + unit: CharLengthUnits::Characters, }, &DataValue::Utf8 { value: Some("a".to_string()), - ty: Utf8Type::Variable + ty: Utf8Type::Variable(None), + unit: CharLengthUnits::Characters, }, &BinaryOperator::NotEq )?, @@ -1614,11 +1634,13 @@ mod test { DataValue::binary_op( &DataValue::Utf8 { value: Some("a".to_string()), - ty: Utf8Type::Variable + ty: Utf8Type::Variable(None), + unit: CharLengthUnits::Characters, }, &DataValue::Utf8 { value: Some("a".to_string()), - ty: Utf8Type::Variable + ty: Utf8Type::Variable(None), + unit: CharLengthUnits::Characters, }, &BinaryOperator::Eq )?, @@ -1629,11 +1651,13 @@ mod test { DataValue::binary_op( &DataValue::Utf8 { value: None, - ty: Utf8Type::Variable + ty: Utf8Type::Variable(None), + unit: CharLengthUnits::Characters, }, &DataValue::Utf8 { value: Some("a".to_string()), - ty: Utf8Type::Variable + ty: Utf8Type::Variable(None), + unit: CharLengthUnits::Characters, }, &BinaryOperator::Gt )?, @@ -1643,11 +1667,13 @@ mod test { DataValue::binary_op( &DataValue::Utf8 { value: None, - ty: Utf8Type::Variable + ty: Utf8Type::Variable(None), + unit: CharLengthUnits::Characters, }, &DataValue::Utf8 { value: Some("a".to_string()), - ty: Utf8Type::Variable + ty: Utf8Type::Variable(None), + unit: CharLengthUnits::Characters, }, &BinaryOperator::Lt )?, @@ -1657,11 +1683,13 @@ mod test { DataValue::binary_op( &DataValue::Utf8 { value: None, - ty: Utf8Type::Variable + ty: Utf8Type::Variable(None), + unit: CharLengthUnits::Characters, }, &DataValue::Utf8 { value: Some("a".to_string()), - ty: Utf8Type::Variable + ty: Utf8Type::Variable(None), + unit: CharLengthUnits::Characters, }, &BinaryOperator::GtEq )?, @@ -1671,11 +1699,13 @@ mod test { DataValue::binary_op( &DataValue::Utf8 { value: None, - ty: Utf8Type::Variable + ty: Utf8Type::Variable(None), + unit: CharLengthUnits::Characters, }, &DataValue::Utf8 { value: Some("a".to_string()), - ty: Utf8Type::Variable + ty: Utf8Type::Variable(None), + unit: CharLengthUnits::Characters, }, &BinaryOperator::LtEq )?, @@ -1685,11 +1715,13 @@ mod test { DataValue::binary_op( &DataValue::Utf8 { value: None, - ty: Utf8Type::Variable + ty: Utf8Type::Variable(None), + unit: CharLengthUnits::Characters, }, &DataValue::Utf8 { value: Some("a".to_string()), - ty: Utf8Type::Variable + ty: Utf8Type::Variable(None), + unit: CharLengthUnits::Characters, }, &BinaryOperator::NotEq )?, diff --git a/src/marcos/mod.rs b/src/marcos/mod.rs index 39332a1d..8042d41d 100644 --- a/src/marcos/mod.rs +++ b/src/marcos/mod.rs @@ -137,6 +137,7 @@ mod test { use crate::types::LogicalType; use serde::Deserialize; use serde::Serialize; + use sqlparser::ast::CharLengthUnits; use std::sync::Arc; fn build_tuple() -> (Tuple, SchemaRef) { @@ -149,14 +150,20 @@ mod test { Arc::new(ColumnCatalog::new( "c2".to_string(), false, - ColumnDesc::new(LogicalType::Varchar(None), false, false, None), + ColumnDesc::new( + LogicalType::Varchar(None, CharLengthUnits::Characters), + false, + false, + None, + ), )), ]); let values = vec![ Arc::new(DataValue::Int32(Some(9))), Arc::new(DataValue::Utf8 { value: Some("LOL".to_string()), - ty: Utf8Type::Variable, + ty: Utf8Type::Variable(None), + unit: CharLengthUnits::Characters, }), ]; @@ -207,7 +214,8 @@ mod test { ScalarExpression::Constant(Arc::new(DataValue::Int8(Some(1)))), ScalarExpression::Constant(Arc::new(DataValue::Utf8 { value: Some("1".to_string()), - ty: Utf8Type::Variable, + ty: Utf8Type::Variable(None), + unit: CharLengthUnits::Characters, })), ], &Tuple { diff --git a/src/optimizer/core/histogram.rs b/src/optimizer/core/histogram.rs index 17cdf193..6f1b9954 100644 --- a/src/optimizer/core/histogram.rs +++ b/src/optimizer/core/histogram.rs @@ -255,7 +255,7 @@ impl Histogram { ) -> Result { let float_value = |value: &DataValue, prefix_len: usize| { let value = match value.logical_type() { - LogicalType::Varchar(_) | LogicalType::Char(_) => match value { + LogicalType::Varchar(..) | LogicalType::Char(..) => match value { DataValue::Utf8 { value, .. } => value.as_ref().map(|string| { if prefix_len > string.len() { return 0.0; diff --git a/src/optimizer/rule/normalization/column_pruning.rs b/src/optimizer/rule/normalization/column_pruning.rs index e48e98e1..d86d16ee 100644 --- a/src/optimizer/rule/normalization/column_pruning.rs +++ b/src/optimizer/rule/normalization/column_pruning.rs @@ -10,6 +10,7 @@ use crate::types::value::{DataValue, Utf8Type}; use crate::types::LogicalType; use itertools::Itertools; use lazy_static::lazy_static; +use sqlparser::ast::CharLengthUnits; use std::collections::HashSet; use std::sync::Arc; @@ -63,7 +64,8 @@ impl ColumnPruning { if op.agg_calls.is_empty() && op.groupby_exprs.is_empty() { let value = Arc::new(DataValue::Utf8 { value: Some("*".to_string()), - ty: Utf8Type::Variable, + ty: Utf8Type::Variable(None), + unit: CharLengthUnits::Characters, }); // only single COUNT(*) is not depend on any column // removed all expressions from the aggregate: push a COUNT(*) diff --git a/src/storage/table_codec.rs b/src/storage/table_codec.rs index 3994e489..c74aba96 100644 --- a/src/storage/table_codec.rs +++ b/src/storage/table_codec.rs @@ -172,7 +172,7 @@ impl TableCodec { | LogicalType::USmallint | LogicalType::UInteger | LogicalType::UBigint - | LogicalType::Varchar(_) + | LogicalType::Varchar(..) ) { return Err(DatabaseError::InvalidType); } diff --git a/src/types/mod.rs b/src/types/mod.rs index 209dcda6..53aec131 100644 --- a/src/types/mod.rs +++ b/src/types/mod.rs @@ -10,7 +10,7 @@ use std::any::TypeId; use std::cmp; use crate::errors::DatabaseError; -use sqlparser::ast::{CharLengthUnits, CharacterLength, ExactNumberInfo, TimezoneInfo}; +use sqlparser::ast::{CharLengthUnits, ExactNumberInfo, TimezoneInfo}; use strum_macros::AsRefStr; pub type ColumnId = u32; @@ -34,8 +34,8 @@ pub enum LogicalType { UBigint, Float, Double, - Char(u32), - Varchar(Option), + Char(u32, CharLengthUnits), + Varchar(Option, CharLengthUnits), Date, DateTime, Time, @@ -75,7 +75,7 @@ impl LogicalType { } else if type_id == TypeId::of::() { Some(LogicalType::Decimal(None, None)) } else if type_id == TypeId::of::() { - Some(LogicalType::Varchar(None)) + Some(LogicalType::Varchar(None, CharLengthUnits::Characters)) } else { None } @@ -96,8 +96,11 @@ impl LogicalType { LogicalType::Float => Some(4), LogicalType::Double => Some(8), /// Note: The non-fixed length type's raw_len is None e.g. Varchar - LogicalType::Varchar(_) => None, - LogicalType::Char(len) => Some(*len as usize), + LogicalType::Varchar(_, _) => None, + LogicalType::Char(len, unit) => match unit { + CharLengthUnits::Characters => None, + CharLengthUnits::Octets => Some(*len as usize), + }, LogicalType::Decimal(_, _) => Some(16), LogicalType::Date => Some(4), LogicalType::DateTime => Some(8), @@ -179,8 +182,8 @@ impl LogicalType { } if matches!( (left, right), - (LogicalType::Date, LogicalType::Varchar(_)) - | (LogicalType::Varchar(_), LogicalType::Date) + (LogicalType::Date, LogicalType::Varchar(..)) + | (LogicalType::Varchar(..), LogicalType::Date) ) { return Ok(LogicalType::Date); } @@ -192,15 +195,15 @@ impl LogicalType { } if matches!( (left, right), - (LogicalType::DateTime, LogicalType::Varchar(_)) - | (LogicalType::Varchar(_), LogicalType::DateTime) + (LogicalType::DateTime, LogicalType::Varchar(..)) + | (LogicalType::Varchar(..), LogicalType::DateTime) ) { return Ok(LogicalType::DateTime); } - if let (LogicalType::Char(_), LogicalType::Varchar(len)) - | (LogicalType::Varchar(len), LogicalType::Char(_)) = (left, right) + if let (LogicalType::Char(..), LogicalType::Varchar(len, ..)) + | (LogicalType::Varchar(len, ..), LogicalType::Char(..)) = (left, right) { - return Ok(LogicalType::Varchar(*len)); + return Ok(LogicalType::Varchar(*len, CharLengthUnits::Characters)); } Err(DatabaseError::Incomparable(*left, *right)) } @@ -296,20 +299,22 @@ impl LogicalType { LogicalType::UBigint => matches!(to, LogicalType::Float | LogicalType::Double), LogicalType::Float => matches!(to, LogicalType::Double), LogicalType::Double => false, - LogicalType::Char(_) => false, - LogicalType::Varchar(_) => false, + LogicalType::Char(..) => false, + LogicalType::Varchar(..) => false, LogicalType::Date => matches!( to, - LogicalType::DateTime | LogicalType::Varchar(_) | LogicalType::Char(_) + LogicalType::DateTime | LogicalType::Varchar(..) | LogicalType::Char(..) ), LogicalType::DateTime => matches!( to, LogicalType::Date | LogicalType::Time - | LogicalType::Varchar(_) - | LogicalType::Char(_) + | LogicalType::Varchar(..) + | LogicalType::Char(..) ), - LogicalType::Time => matches!(to, LogicalType::Varchar(_) | LogicalType::Char(_)), + LogicalType::Time => { + matches!(to, LogicalType::Varchar(..) | LogicalType::Char(..)) + } LogicalType::Decimal(_, _) | LogicalType::Tuple => false, } } @@ -324,31 +329,29 @@ impl TryFrom for LogicalType { sqlparser::ast::DataType::Char(char_len) | sqlparser::ast::DataType::Character(char_len) => { let mut len = 1; - if let Some(CharacterLength { length, unit }) = char_len { - if matches!(unit, Some(CharLengthUnits::Octets)) { - return Err(DatabaseError::UnsupportedStmt(format!( - "char unit: {:?}", - unit - ))); - } - len = cmp::max(len, length) + let mut char_unit = None; + if let Some(sqlparser::ast::CharacterLength { length, unit }) = char_len { + len = cmp::max(len, length); + char_unit = unit; } - Ok(LogicalType::Char(len as u32)) + Ok(LogicalType::Char( + len as u32, + char_unit.unwrap_or(CharLengthUnits::Characters), + )) } sqlparser::ast::DataType::CharVarying(varchar_len) | sqlparser::ast::DataType::CharacterVarying(varchar_len) | sqlparser::ast::DataType::Varchar(varchar_len) => { let mut len = None; - if let Some(CharacterLength { length, unit }) = varchar_len { - if matches!(unit, Some(CharLengthUnits::Octets)) { - return Err(DatabaseError::UnsupportedStmt(format!( - "char unit: {:?}", - unit - ))); - } - len = Some(length as u32) + let mut char_unit = None; + if let Some(sqlparser::ast::CharacterLength { length, unit }) = varchar_len { + len = Some(length as u32); + char_unit = unit; } - Ok(LogicalType::Varchar(len)) + Ok(LogicalType::Varchar( + len, + char_unit.unwrap_or(CharLengthUnits::Characters), + )) } sqlparser::ast::DataType::Float(_) => Ok(LogicalType::Float), sqlparser::ast::DataType::Double | sqlparser::ast::DataType::DoublePrecision => { diff --git a/src/types/tuple.rs b/src/types/tuple.rs index dff31ffb..5b852b96 100644 --- a/src/types/tuple.rs +++ b/src/types/tuple.rs @@ -175,6 +175,7 @@ mod tests { use crate::types::LogicalType; use itertools::Itertools; use rust_decimal::Decimal; + use sqlparser::ast::CharLengthUnits; use std::sync::Arc; #[test] @@ -193,7 +194,12 @@ mod tests { Arc::new(ColumnCatalog::new( "c3".to_string(), false, - ColumnDesc::new(LogicalType::Varchar(Some(2)), false, false, None), + ColumnDesc::new( + LogicalType::Varchar(Some(2), CharLengthUnits::Characters), + false, + false, + None, + ), )), Arc::new(ColumnCatalog::new( "c4".to_string(), @@ -248,7 +254,32 @@ mod tests { Arc::new(ColumnCatalog::new( "c14".to_string(), false, - ColumnDesc::new(LogicalType::Char(1), false, false, None), + ColumnDesc::new( + LogicalType::Char(1, CharLengthUnits::Characters), + false, + false, + None, + ), + )), + Arc::new(ColumnCatalog::new( + "c15".to_string(), + false, + ColumnDesc::new( + LogicalType::Varchar(Some(2), CharLengthUnits::Octets), + false, + false, + None, + ), + )), + Arc::new(ColumnCatalog::new( + "c16".to_string(), + false, + ColumnDesc::new( + LogicalType::Char(1, CharLengthUnits::Octets), + false, + false, + None, + ), )), ]); @@ -260,7 +291,8 @@ mod tests { Arc::new(DataValue::UInt32(Some(1))), Arc::new(DataValue::Utf8 { value: Some("LOL".to_string()), - ty: Utf8Type::Variable, + ty: Utf8Type::Variable(Some(2)), + unit: CharLengthUnits::Characters, }), Arc::new(DataValue::Int16(Some(1))), Arc::new(DataValue::UInt16(Some(1))), @@ -275,6 +307,17 @@ mod tests { Arc::new(DataValue::Utf8 { value: Some("K".to_string()), ty: Utf8Type::Fixed(1), + unit: CharLengthUnits::Characters, + }), + Arc::new(DataValue::Utf8 { + value: Some("LOL".to_string()), + ty: Utf8Type::Variable(Some(2)), + unit: CharLengthUnits::Octets, + }), + Arc::new(DataValue::Utf8 { + value: Some("K".to_string()), + ty: Utf8Type::Fixed(1), + unit: CharLengthUnits::Octets, }), ], }, @@ -285,7 +328,8 @@ mod tests { Arc::new(DataValue::UInt32(None)), Arc::new(DataValue::Utf8 { value: None, - ty: Utf8Type::Variable, + ty: Utf8Type::Variable(Some(2)), + unit: CharLengthUnits::Characters, }), Arc::new(DataValue::Int16(None)), Arc::new(DataValue::UInt16(None)), @@ -300,6 +344,17 @@ mod tests { Arc::new(DataValue::Utf8 { value: None, ty: Utf8Type::Fixed(1), + unit: CharLengthUnits::Characters, + }), + Arc::new(DataValue::Utf8 { + value: None, + ty: Utf8Type::Variable(Some(2)), + unit: CharLengthUnits::Octets, + }), + Arc::new(DataValue::Utf8 { + value: None, + ty: Utf8Type::Fixed(1), + unit: CharLengthUnits::Octets, }), ], }, @@ -312,13 +367,13 @@ mod tests { let tuple_0 = Tuple::deserialize_from( &types, - &[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13], + &[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15], &columns, &tuples[0].serialize_to(&types).unwrap(), ); let tuple_1 = Tuple::deserialize_from( &types, - &[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13], + &[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15], &columns, &tuples[1].serialize_to(&types).unwrap(), ); diff --git a/src/types/tuple_builder.rs b/src/types/tuple_builder.rs index d9763a87..40835e4b 100644 --- a/src/types/tuple_builder.rs +++ b/src/types/tuple_builder.rs @@ -1,6 +1,7 @@ use crate::errors::DatabaseError; use crate::types::tuple::{Schema, Tuple}; use crate::types::value::{DataValue, Utf8Type}; +use sqlparser::ast::CharLengthUnits; use std::sync::Arc; pub struct TupleBuilder<'a> { @@ -15,7 +16,8 @@ impl<'a> TupleBuilder<'a> { pub fn build_result(message: String) -> Tuple { let values = vec![Arc::new(DataValue::Utf8 { value: Some(message), - ty: Utf8Type::Variable, + ty: Utf8Type::Variable(None), + unit: CharLengthUnits::Characters, })]; Tuple { id: None, values } @@ -32,7 +34,8 @@ impl<'a> TupleBuilder<'a> { let data_value = Arc::new( DataValue::Utf8 { value: Some(value.to_string()), - ty: Utf8Type::Variable, + ty: Utf8Type::Variable(None), + unit: CharLengthUnits::Characters, } .cast(self.schema[i].datatype())?, ); diff --git a/src/types/value.rs b/src/types/value.rs index 9e4300d0..0768b46c 100644 --- a/src/types/value.rs +++ b/src/types/value.rs @@ -14,6 +14,7 @@ use crate::errors::DatabaseError; use ordered_float::OrderedFloat; use rust_decimal::prelude::{FromPrimitive, ToPrimitive}; use serde::{Deserialize, Serialize}; +use sqlparser::ast::CharLengthUnits; use super::LogicalType; @@ -34,7 +35,7 @@ pub type ValueRef = Arc; #[derive(Clone, Serialize, Deserialize)] pub enum Utf8Type { - Variable, + Variable(Option), Fixed(u32), } @@ -55,6 +56,7 @@ pub enum DataValue { Utf8 { value: Option, ty: Utf8Type, + unit: CharLengthUnits, }, /// Date stored as a signed 32bit int days since UNIX epoch 1970-01-01 Date32(Option), @@ -248,23 +250,25 @@ impl Hash for DataValue { } } macro_rules! varchar_cast { - ($value:expr, $len:expr, $ty:expr) => { + ($value:expr, $len:expr, $ty:expr, $unit:expr) => { $value .map(|v| { let string_value = format!("{}", v); if let Some(len) = $len { - if string_value.len() > *len as usize { + if Self::check_string_len(&string_value, *len as usize, $unit) { return Err(DatabaseError::TooLong); } } Ok(DataValue::Utf8 { value: Some(string_value), ty: $ty, + unit: $unit, }) }) .unwrap_or(Ok(DataValue::Utf8 { value: None, ty: $ty, + unit: $unit, })) }; } @@ -315,22 +319,48 @@ impl DataValue { } } + pub(crate) fn check_string_len(string: &str, len: usize, unit: CharLengthUnits) -> bool { + match unit { + CharLengthUnits::Characters => string.chars().count() > len, + CharLengthUnits::Octets => string.len() > len, + } + } + pub(crate) fn check_len(&self, logic_type: &LogicalType) -> Result<(), DatabaseError> { let is_over_len = match (logic_type, self) { + (LogicalType::Varchar(None, _), _) => false, + ( + LogicalType::Varchar(Some(len), CharLengthUnits::Characters), + DataValue::Utf8 { + value: Some(val), + ty: Utf8Type::Variable(_), + unit: CharLengthUnits::Characters, + }, + ) + | ( + LogicalType::Char(len, CharLengthUnits::Characters), + DataValue::Utf8 { + value: Some(val), + ty: Utf8Type::Fixed(_), + unit: CharLengthUnits::Characters, + }, + ) => Self::check_string_len(val, *len as usize, CharLengthUnits::Characters), ( - LogicalType::Varchar(Some(len)), + LogicalType::Varchar(Some(len), CharLengthUnits::Octets), DataValue::Utf8 { value: Some(val), - ty: Utf8Type::Variable, + ty: Utf8Type::Variable(_), + unit: CharLengthUnits::Octets, }, ) | ( - LogicalType::Char(len), + LogicalType::Char(len, CharLengthUnits::Octets), DataValue::Utf8 { value: Some(val), ty: Utf8Type::Fixed(_), + unit: CharLengthUnits::Octets, }, - ) => val.len() > *len as usize, + ) => Self::check_string_len(val, *len as usize, CharLengthUnits::Octets), (LogicalType::Decimal(full_len, scale_len), DataValue::Decimal(Some(val))) => { if let Some(len) = full_len { if val.mantissa().ilog10() + 1 > *len as u32 { @@ -404,13 +434,15 @@ impl DataValue { LogicalType::UBigint => DataValue::UInt64(None), LogicalType::Float => DataValue::Float32(None), LogicalType::Double => DataValue::Float64(None), - LogicalType::Char(len) => DataValue::Utf8 { + LogicalType::Char(len, unit) => DataValue::Utf8 { value: None, ty: Utf8Type::Fixed(*len), + unit: *unit, }, - LogicalType::Varchar(_) => DataValue::Utf8 { + LogicalType::Varchar(len, unit) => DataValue::Utf8 { value: None, - ty: Utf8Type::Variable, + ty: Utf8Type::Variable(*len), + unit: *unit, }, LogicalType::Date => DataValue::Date32(None), LogicalType::DateTime => DataValue::Date64(None), @@ -435,13 +467,15 @@ impl DataValue { LogicalType::UBigint => DataValue::UInt64(Some(0)), LogicalType::Float => DataValue::Float32(Some(0.0)), LogicalType::Double => DataValue::Float64(Some(0.0)), - LogicalType::Char(len) => DataValue::Utf8 { + LogicalType::Char(len, unit) => DataValue::Utf8 { value: Some(String::new()), ty: Utf8Type::Fixed(*len), + unit: *unit, }, - LogicalType::Varchar(_) => DataValue::Utf8 { + LogicalType::Varchar(len, unit) => DataValue::Utf8 { value: Some(String::new()), - ty: Utf8Type::Variable, + ty: Utf8Type::Variable(*len), + unit: *unit, }, LogicalType::Date => DataValue::Date32(Some(UNIX_DATETIME.num_days_from_ce())), LogicalType::DateTime => DataValue::Date64(Some(UNIX_DATETIME.timestamp())), @@ -511,24 +545,36 @@ impl DataValue { return Ok(bytes.write_fixedint(*v)?); } } - DataValue::Utf8 { value: v, ty } => { + DataValue::Utf8 { value: v, ty, unit } => { if let Some(v) = v { match ty { - Utf8Type::Variable => { + Utf8Type::Variable(_) => { let string_bytes = v.as_bytes(); let len = string_bytes.len(); bytes.extend_from_slice(string_bytes); return Ok(len); } - Utf8Type::Fixed(len) => { - let mut string_bytes = - format!("{:len$}", v, len = *len as usize).into_bytes(); - let len = string_bytes.len(); - - bytes.append(&mut string_bytes); - return Ok(len); - } + Utf8Type::Fixed(len) => match unit { + CharLengthUnits::Characters => { + let chars_len = *len as usize; + let mut string_bytes = + format!("{:len$}", v, len = chars_len).into_bytes(); + let octets_len = string_bytes.len(); + + bytes.append(&mut string_bytes); + return Ok(octets_len); + } + CharLengthUnits::Octets => { + let octets_len = *len as usize; + let mut string_bytes = v.clone().into_bytes(); + + string_bytes.resize(octets_len, b' '); + assert_eq!(octets_len, string_bytes.len()); + bytes.append(&mut string_bytes); + return Ok(octets_len); + } + }, } } } @@ -597,7 +643,7 @@ impl DataValue { buf.copy_from_slice(bytes); f64::from_ne_bytes(buf) })), - LogicalType::Char(len) => { + LogicalType::Char(len, unit) => { // https://dev.mysql.com/doc/refman/8.0/en/char.html#:~:text=If%20a%20given%20value%20is%20stored%20into%20the%20CHAR(4)%20and%20VARCHAR(4)%20columns%2C%20the%20values%20retrieved%20from%20the%20columns%20are%20not%20always%20the%20same%20because%20trailing%20spaces%20are%20removed%20from%20CHAR%20columns%20upon%20retrieval.%20The%20following%20example%20illustrates%20this%20difference%3A let value = (!bytes.is_empty()).then(|| { let last_non_zero_index = match bytes.iter().rposition(|&x| x != b' ') { @@ -609,14 +655,16 @@ impl DataValue { DataValue::Utf8 { value, ty: Utf8Type::Fixed(*len), + unit: *unit, } } - LogicalType::Varchar(_) => { + LogicalType::Varchar(len, unit) => { let value = (!bytes.is_empty()).then(|| String::from_utf8(bytes.to_owned()).unwrap()); DataValue::Utf8 { value, - ty: Utf8Type::Variable, + ty: Utf8Type::Variable(*len), + unit: *unit, } } LogicalType::Date => { @@ -651,13 +699,15 @@ impl DataValue { DataValue::UInt32(_) => LogicalType::UInteger, DataValue::UInt64(_) => LogicalType::UBigint, DataValue::Utf8 { - ty: Utf8Type::Variable, + ty: Utf8Type::Variable(len), + unit, .. - } => LogicalType::Varchar(None), + } => LogicalType::Varchar(*len, *unit), DataValue::Utf8 { ty: Utf8Type::Fixed(len), + unit, .. - } => LogicalType::Char(*len), + } => LogicalType::Char(*len, *unit), DataValue::Date32(_) => LogicalType::Date, DataValue::Date64(_) => LogicalType::DateTime, DataValue::Time(_) => LogicalType::Time, @@ -796,13 +846,15 @@ impl DataValue { LogicalType::UBigint => Ok(DataValue::UInt64(None)), LogicalType::Float => Ok(DataValue::Float32(None)), LogicalType::Double => Ok(DataValue::Float64(None)), - LogicalType::Char(len) => Ok(DataValue::Utf8 { + LogicalType::Char(len, unit) => Ok(DataValue::Utf8 { value: None, ty: Utf8Type::Fixed(*len), + unit: *unit, }), - LogicalType::Varchar(_) => Ok(DataValue::Utf8 { + LogicalType::Varchar(len, unit) => Ok(DataValue::Utf8 { value: None, - ty: Utf8Type::Variable, + ty: Utf8Type::Variable(*len), + unit: *unit, }), LogicalType::Date => Ok(DataValue::Date32(None)), LogicalType::DateTime => Ok(DataValue::Date64(None)), @@ -823,16 +875,24 @@ impl DataValue { LogicalType::UBigint => Ok(DataValue::UInt64(value.map(|v| v.into()))), LogicalType::Float => Ok(DataValue::Float32(value.map(|v| v.into()))), LogicalType::Double => Ok(DataValue::Float64(value.map(|v| v.into()))), - LogicalType::Char(len) => varchar_cast!(value, Some(len), Utf8Type::Fixed(*len)), - LogicalType::Varchar(len) => varchar_cast!(value, len, Utf8Type::Variable), + LogicalType::Char(len, unit) => { + varchar_cast!(value, Some(len), Utf8Type::Fixed(*len), *unit) + } + LogicalType::Varchar(len, unit) => { + varchar_cast!(value, len, Utf8Type::Variable(*len), *unit) + } _ => Err(DatabaseError::CastFail), }, DataValue::Float32(value) => match to { LogicalType::SqlNull => Ok(DataValue::Null), LogicalType::Float => Ok(DataValue::Float32(value)), LogicalType::Double => Ok(DataValue::Float64(value.map(|v| v.into()))), - LogicalType::Char(len) => varchar_cast!(value, Some(len), Utf8Type::Fixed(*len)), - LogicalType::Varchar(len) => varchar_cast!(value, len, Utf8Type::Variable), + LogicalType::Char(len, unit) => { + varchar_cast!(value, Some(len), Utf8Type::Fixed(*len), *unit) + } + LogicalType::Varchar(len, unit) => { + varchar_cast!(value, len, Utf8Type::Variable(*len), *unit) + } LogicalType::Decimal(_, option) => Ok(DataValue::Decimal( value .map(|v| { @@ -850,8 +910,12 @@ impl DataValue { LogicalType::SqlNull => Ok(DataValue::Null), LogicalType::Float => Ok(DataValue::Float32(value.map(|v| v as f32))), LogicalType::Double => Ok(DataValue::Float64(value)), - LogicalType::Char(len) => varchar_cast!(value, Some(len), Utf8Type::Fixed(*len)), - LogicalType::Varchar(len) => varchar_cast!(value, len, Utf8Type::Variable), + LogicalType::Char(len, unit) => { + varchar_cast!(value, Some(len), Utf8Type::Fixed(*len), *unit) + } + LogicalType::Varchar(len, unit) => { + varchar_cast!(value, len, Utf8Type::Variable(*len), *unit) + } LogicalType::Decimal(_, option) => Ok(DataValue::Decimal( value .map(|v| { @@ -883,8 +947,12 @@ impl DataValue { LogicalType::Bigint => Ok(DataValue::Int64(value.map(|v| v.into()))), LogicalType::Float => Ok(DataValue::Float32(value.map(|v| v.into()))), LogicalType::Double => Ok(DataValue::Float64(value.map(|v| v.into()))), - LogicalType::Char(len) => varchar_cast!(value, Some(len), Utf8Type::Fixed(*len)), - LogicalType::Varchar(len) => varchar_cast!(value, len, Utf8Type::Variable), + LogicalType::Char(len, unit) => { + varchar_cast!(value, Some(len), Utf8Type::Fixed(*len), *unit) + } + LogicalType::Varchar(len, unit) => { + varchar_cast!(value, len, Utf8Type::Variable(*len), *unit) + } LogicalType::Decimal(_, option) => Ok(DataValue::Decimal(value.map(|v| { let mut decimal = Decimal::from(v); Self::decimal_round_i(option, &mut decimal); @@ -911,8 +979,12 @@ impl DataValue { LogicalType::Bigint => Ok(DataValue::Int64(value.map(|v| v.into()))), LogicalType::Float => Ok(DataValue::Float32(value.map(|v| v.into()))), LogicalType::Double => Ok(DataValue::Float64(value.map(|v| v.into()))), - LogicalType::Char(len) => varchar_cast!(value, Some(len), Utf8Type::Fixed(*len)), - LogicalType::Varchar(len) => varchar_cast!(value, len, Utf8Type::Variable), + LogicalType::Char(len, unit) => { + varchar_cast!(value, Some(len), Utf8Type::Fixed(*len), *unit) + } + LogicalType::Varchar(len, unit) => { + varchar_cast!(value, len, Utf8Type::Variable(*len), *unit) + } LogicalType::Decimal(_, option) => Ok(DataValue::Decimal(value.map(|v| { let mut decimal = Decimal::from(v); Self::decimal_round_i(option, &mut decimal); @@ -938,8 +1010,12 @@ impl DataValue { LogicalType::Bigint => Ok(DataValue::Int64(value.map(|v| v.into()))), LogicalType::Float => Ok(DataValue::Float32(value.map(|v| v as f32))), LogicalType::Double => Ok(DataValue::Float64(value.map(|v| v.into()))), - LogicalType::Char(len) => varchar_cast!(value, Some(len), Utf8Type::Fixed(*len)), - LogicalType::Varchar(len) => varchar_cast!(value, len, Utf8Type::Variable), + LogicalType::Char(len, unit) => { + varchar_cast!(value, Some(len), Utf8Type::Fixed(*len), *unit) + } + LogicalType::Varchar(len, unit) => { + varchar_cast!(value, len, Utf8Type::Variable(*len), *unit) + } LogicalType::Decimal(_, option) => Ok(DataValue::Decimal(value.map(|v| { let mut decimal = Decimal::from(v); Self::decimal_round_i(option, &mut decimal); @@ -964,8 +1040,12 @@ impl DataValue { LogicalType::Bigint => Ok(DataValue::Int64(value)), LogicalType::Float => Ok(DataValue::Float32(value.map(|v| v as f32))), LogicalType::Double => Ok(DataValue::Float64(value.map(|v| v as f64))), - LogicalType::Char(len) => varchar_cast!(value, Some(len), Utf8Type::Fixed(*len)), - LogicalType::Varchar(len) => varchar_cast!(value, len, Utf8Type::Variable), + LogicalType::Char(len, unit) => { + varchar_cast!(value, Some(len), Utf8Type::Fixed(*len), *unit) + } + LogicalType::Varchar(len, unit) => { + varchar_cast!(value, len, Utf8Type::Variable(*len), *unit) + } LogicalType::Decimal(_, option) => Ok(DataValue::Decimal(value.map(|v| { let mut decimal = Decimal::from(v); Self::decimal_round_i(option, &mut decimal); @@ -986,8 +1066,12 @@ impl DataValue { LogicalType::UBigint => Ok(DataValue::UInt64(value.map(|v| v.into()))), LogicalType::Float => Ok(DataValue::Float32(value.map(|v| v.into()))), LogicalType::Double => Ok(DataValue::Float64(value.map(|v| v.into()))), - LogicalType::Char(len) => varchar_cast!(value, Some(len), Utf8Type::Fixed(*len)), - LogicalType::Varchar(len) => varchar_cast!(value, len, Utf8Type::Variable), + LogicalType::Char(len, unit) => { + varchar_cast!(value, Some(len), Utf8Type::Fixed(*len), *unit) + } + LogicalType::Varchar(len, unit) => { + varchar_cast!(value, len, Utf8Type::Variable(*len), *unit) + } LogicalType::Decimal(_, option) => Ok(DataValue::Decimal(value.map(|v| { let mut decimal = Decimal::from(v); Self::decimal_round_i(option, &mut decimal); @@ -1006,8 +1090,12 @@ impl DataValue { LogicalType::UBigint => Ok(DataValue::UInt64(value.map(|v| v.into()))), LogicalType::Float => Ok(DataValue::Float32(value.map(|v| v.into()))), LogicalType::Double => Ok(DataValue::Float64(value.map(|v| v.into()))), - LogicalType::Char(len) => varchar_cast!(value, Some(len), Utf8Type::Fixed(*len)), - LogicalType::Varchar(len) => varchar_cast!(value, len, Utf8Type::Variable), + LogicalType::Char(len, unit) => { + varchar_cast!(value, Some(len), Utf8Type::Fixed(*len), *unit) + } + LogicalType::Varchar(len, unit) => { + varchar_cast!(value, len, Utf8Type::Variable(*len), *unit) + } LogicalType::Decimal(_, option) => Ok(DataValue::Decimal(value.map(|v| { let mut decimal = Decimal::from(v); Self::decimal_round_i(option, &mut decimal); @@ -1024,8 +1112,12 @@ impl DataValue { LogicalType::UBigint => Ok(DataValue::UInt64(value.map(|v| v.into()))), LogicalType::Float => Ok(DataValue::Float32(value.map(|v| v as f32))), LogicalType::Double => Ok(DataValue::Float64(value.map(|v| v.into()))), - LogicalType::Char(len) => varchar_cast!(value, Some(len), Utf8Type::Fixed(*len)), - LogicalType::Varchar(len) => varchar_cast!(value, len, Utf8Type::Variable), + LogicalType::Char(len, unit) => { + varchar_cast!(value, Some(len), Utf8Type::Fixed(*len), *unit) + } + LogicalType::Varchar(len, unit) => { + varchar_cast!(value, len, Utf8Type::Variable(*len), *unit) + } LogicalType::Decimal(_, option) => Ok(DataValue::Decimal(value.map(|v| { let mut decimal = Decimal::from(v); Self::decimal_round_i(option, &mut decimal); @@ -1040,8 +1132,12 @@ impl DataValue { LogicalType::UBigint => Ok(DataValue::UInt64(value)), LogicalType::Float => Ok(DataValue::Float32(value.map(|v| v as f32))), LogicalType::Double => Ok(DataValue::Float64(value.map(|v| v as f64))), - LogicalType::Char(len) => varchar_cast!(value, Some(len), Utf8Type::Fixed(*len)), - LogicalType::Varchar(len) => varchar_cast!(value, len, Utf8Type::Variable), + LogicalType::Char(len, unit) => { + varchar_cast!(value, Some(len), Utf8Type::Fixed(*len), *unit) + } + LogicalType::Varchar(len, unit) => { + varchar_cast!(value, len, Utf8Type::Variable(*len), *unit) + } LogicalType::Decimal(_, option) => Ok(DataValue::Decimal(value.map(|v| { let mut decimal = Decimal::from(v); Self::decimal_round_i(option, &mut decimal); @@ -1087,8 +1183,12 @@ impl DataValue { LogicalType::Double => Ok(DataValue::Float64( value.map(|v| f64::from_str(&v)).transpose()?, )), - LogicalType::Char(len) => varchar_cast!(value, Some(len), Utf8Type::Fixed(*len)), - LogicalType::Varchar(len) => varchar_cast!(value, len, Utf8Type::Variable), + LogicalType::Char(len, unit) => { + varchar_cast!(value, Some(len), Utf8Type::Fixed(*len), *unit) + } + LogicalType::Varchar(len, unit) => { + varchar_cast!(value, len, Utf8Type::Variable(*len), *unit) + } LogicalType::Date => { let option = value .map(|v| { @@ -1130,11 +1230,21 @@ impl DataValue { }, DataValue::Date32(value) => match to { LogicalType::SqlNull => Ok(DataValue::Null), - LogicalType::Char(len) => { - varchar_cast!(Self::format_date(value), Some(len), Utf8Type::Fixed(*len)) + LogicalType::Char(len, unit) => { + varchar_cast!( + Self::format_date(value), + Some(len), + Utf8Type::Fixed(*len), + *unit + ) } - LogicalType::Varchar(len) => { - varchar_cast!(Self::format_date(value), len, Utf8Type::Variable) + LogicalType::Varchar(len, unit) => { + varchar_cast!( + Self::format_date(value), + len, + Utf8Type::Variable(*len), + *unit + ) } LogicalType::Date => Ok(DataValue::Date32(value)), LogicalType::DateTime => { @@ -1150,15 +1260,21 @@ impl DataValue { }, DataValue::Date64(value) => match to { LogicalType::SqlNull => Ok(DataValue::Null), - LogicalType::Char(len) => { + LogicalType::Char(len, unit) => { varchar_cast!( Self::format_datetime(value), Some(len), - Utf8Type::Fixed(*len) + Utf8Type::Fixed(*len), + *unit ) } - LogicalType::Varchar(len) => { - varchar_cast!(Self::format_datetime(value), len, Utf8Type::Variable) + LogicalType::Varchar(len, unit) => { + varchar_cast!( + Self::format_datetime(value), + len, + Utf8Type::Variable(*len), + *unit + ) } LogicalType::Date => { let option = value.and_then(|v| { @@ -1181,11 +1297,21 @@ impl DataValue { }, DataValue::Time(value) => match to { LogicalType::SqlNull => Ok(DataValue::Null), - LogicalType::Char(len) => { - varchar_cast!(Self::format_time(value), Some(len), Utf8Type::Fixed(*len)) + LogicalType::Char(len, unit) => { + varchar_cast!( + Self::format_time(value), + Some(len), + Utf8Type::Fixed(*len), + *unit + ) } - LogicalType::Varchar(len) => { - varchar_cast!(Self::format_time(value), len, Utf8Type::Variable) + LogicalType::Varchar(len, unit) => { + varchar_cast!( + Self::format_time(value), + len, + Utf8Type::Variable(*len), + *unit + ) } _ => Err(DatabaseError::CastFail), }, @@ -1194,8 +1320,12 @@ impl DataValue { LogicalType::Float => Ok(DataValue::Float32(value.and_then(|v| v.to_f32()))), LogicalType::Double => Ok(DataValue::Float64(value.and_then(|v| v.to_f64()))), LogicalType::Decimal(_, _) => Ok(DataValue::Decimal(value)), - LogicalType::Char(len) => varchar_cast!(value, Some(len), Utf8Type::Fixed(*len)), - LogicalType::Varchar(len) => varchar_cast!(value, len, Utf8Type::Variable), + LogicalType::Char(len, unit) => { + varchar_cast!(value, Some(len), Utf8Type::Fixed(*len), *unit) + } + LogicalType::Varchar(len, unit) => { + varchar_cast!(value, len, Utf8Type::Variable(*len), *unit) + } _ => Err(DatabaseError::CastFail), }, DataValue::Tuple(values) => match to { @@ -1306,7 +1436,8 @@ impl From for DataValue { fn from(value: String) -> Self { DataValue::Utf8 { value: Some(value), - ty: Utf8Type::Variable, + ty: Utf8Type::Variable(None), + unit: CharLengthUnits::Characters, } } } @@ -1315,7 +1446,8 @@ impl From> for DataValue { fn from(value: Option) -> Self { DataValue::Utf8 { value, - ty: Utf8Type::Variable, + ty: Utf8Type::Variable(None), + unit: CharLengthUnits::Characters, } } } diff --git a/tests/slt/char.slt b/tests/slt/char.slt new file mode 100644 index 00000000..16a98891 --- /dev/null +++ b/tests/slt/char.slt @@ -0,0 +1,47 @@ +statement ok +create table t1(id int primary key, v1 char, v2 char(5 characters), v3 char(5 octets)) + +statement ok +insert into t1 values(0, '🖕', '🖕🖕🖕🖕🖕', '🖕'); + +statement ok +insert into t1 values(1, null, null, null); + +statement error +insert into t1 values(1, '🖕', '🖕🖕🖕🖕🖕🖕', '🖕'); + +statement error +insert into t1 values(1, '🖕', '🖕🖕🖕🖕🖕', '🖕🖕'); + +query ITT +select * from t1; +---- +0 🖕 🖕🖕🖕🖕🖕 🖕 +1 null null null + +statement ok +create table t2(id int primary key, v1 varchar, v2 varchar(5 characters), v3 varchar(5 octets)) + +statement ok +insert into t2 values(0, '🖕', '🖕🖕🖕🖕🖕', '🖕'); + +statement ok +insert into t2 values(1, null, null, null); + +statement error +insert into t2 values(1, '🖕', '🖕🖕🖕🖕🖕🖕', '🖕'); + +statement error +insert into t2 values(1, '🖕', '🖕🖕🖕🖕🖕', '🖕🖕'); + +query ITT +select * from t2; +---- +0 🖕 🖕🖕🖕🖕🖕 🖕 +1 null null null + +statement ok +drop table t1; + +statement ok +drop table t2; \ No newline at end of file diff --git a/tests/slt/sql_2016/E021_01.slt b/tests/slt/sql_2016/E021_01.slt index ffe33d48..1c0cea04 100644 --- a/tests/slt/sql_2016/E021_01.slt +++ b/tests/slt/sql_2016/E021_01.slt @@ -6,9 +6,8 @@ CREATE TABLE TABLE_E021_01_01_01 ( ID INT PRIMARY KEY, A CHAR ( 8 ) ) statement ok CREATE TABLE TABLE_E021_01_01_02 ( ID INT PRIMARY KEY, A CHAR ( 8 CHARACTERS ) ) -# TODO: char unit: OCTETS -# statement ok -# CREATE TABLE TABLE_E021_01_01_03 ( ID INT PRIMARY KEY, A CHAR ( 8 OCTETS ) ) +statement ok +CREATE TABLE TABLE_E021_01_01_03 ( ID INT PRIMARY KEY, A CHAR ( 8 OCTETS ) ) statement ok CREATE TABLE TABLE_E021_01_01_04 ( ID INT PRIMARY KEY, A CHAR ) @@ -19,9 +18,8 @@ CREATE TABLE TABLE_E021_01_01_05 ( ID INT PRIMARY KEY, A CHARACTER ( 8 ) ) statement ok CREATE TABLE TABLE_E021_01_01_06 ( ID INT PRIMARY KEY, A CHARACTER ( 8 CHARACTERS ) ) -# TODO: char unit: OCTETS -# statement ok -# CREATE TABLE TABLE_E021_01_01_07 ( ID INT PRIMARY KEY, A CHARACTER ( 8 OCTETS ) ) +statement ok +CREATE TABLE TABLE_E021_01_01_07 ( ID INT PRIMARY KEY, A CHARACTER ( 8 OCTETS ) ) statement ok CREATE TABLE TABLE_E021_01_01_08 ( ID INT PRIMARY KEY, A CHARACTER ) diff --git a/tests/slt/sql_2016/E021_02.slt b/tests/slt/sql_2016/E021_02.slt index 45045a94..b4cc607c 100644 --- a/tests/slt/sql_2016/E021_02.slt +++ b/tests/slt/sql_2016/E021_02.slt @@ -6,9 +6,8 @@ CREATE TABLE TABLE_E021_02_01_01 ( ID INT PRIMARY KEY, A CHAR VARYING ( 8 ) ) statement ok CREATE TABLE TABLE_E021_02_01_02 ( ID INT PRIMARY KEY, A CHAR VARYING ( 8 CHARACTERS ) ) -# TODO: char unit: OCTETS -# statement ok -# CREATE TABLE TABLE_E021_02_01_03 ( ID INT PRIMARY KEY, A CHAR VARYING ( 8 OCTETS ) ) +statement ok +CREATE TABLE TABLE_E021_02_01_03 ( ID INT PRIMARY KEY, A CHAR VARYING ( 8 OCTETS ) ) statement ok CREATE TABLE TABLE_E021_02_01_04 ( ID INT PRIMARY KEY, A CHAR VARYING ) @@ -19,9 +18,8 @@ CREATE TABLE TABLE_E021_02_01_05 ( ID INT PRIMARY KEY, A CHARACTER VARYING ( 8 ) statement ok CREATE TABLE TABLE_E021_02_01_06 ( ID INT PRIMARY KEY, A CHARACTER VARYING ( 8 CHARACTERS ) ) -# TODO: char unit: OCTETS -# statement ok -# CREATE TABLE TABLE_E021_02_01_07 ( ID INT PRIMARY KEY, A CHARACTER VARYING ( 8 OCTETS ) ) +statement ok +CREATE TABLE TABLE_E021_02_01_07 ( ID INT PRIMARY KEY, A CHARACTER VARYING ( 8 OCTETS ) ) statement ok CREATE TABLE TABLE_E021_02_01_08 ( ID INT PRIMARY KEY, A CHARACTER VARYING ) @@ -32,9 +30,8 @@ CREATE TABLE TABLE_E021_02_01_09 ( ID INT PRIMARY KEY, A VARCHAR ( 8 ) ) statement ok CREATE TABLE TABLE_E021_02_01_10 ( ID INT PRIMARY KEY, A VARCHAR ( 8 CHARACTERS ) ) -# TODO: char unit: OCTETS -# statement ok -# CREATE TABLE TABLE_E021_02_01_11 ( ID INT PRIMARY KEY, A VARCHAR ( 8 OCTETS ) ) +statement ok +CREATE TABLE TABLE_E021_02_01_11 ( ID INT PRIMARY KEY, A VARCHAR ( 8 OCTETS ) ) statement ok CREATE TABLE TABLE_E021_02_01_12 ( ID INT PRIMARY KEY, A VARCHAR )