From ba036dba6b5ac13632322e5738b81985b47e3618 Mon Sep 17 00:00:00 2001
From: Kould <2435992353@qq.com>
Date: Wed, 27 Mar 2024 22:41:13 +0800
Subject: [PATCH 1/5] feat: support `Octets` for `Char/Varchar`
---
Cargo.toml | 2 +-
src/binder/create_table.rs | 3 +-
src/binder/expr.rs | 13 +-
src/catalog/column.rs | 3 +-
src/execution/volcano/dml/analyze.rs | 4 +-
src/execution/volcano/dml/copy_from_file.rs | 3 +-
src/execution/volcano/dql/describe.rs | 25 +-
src/execution/volcano/dql/explain.rs | 4 +-
src/execution/volcano/dql/show_table.rs | 4 +-
src/expression/evaluator.rs | 17 +-
src/expression/mod.rs | 4 +-
src/expression/value_compute.rs | 77 ++++--
src/marcos/mod.rs | 9 +-
src/optimizer/core/histogram.rs | 2 +-
.../rule/normalization/column_pruning.rs | 4 +-
src/storage/table_codec.rs | 2 +-
src/types/mod.rs | 69 +++---
src/types/tuple.rs | 47 +++-
src/types/tuple_builder.rs | 7 +-
src/types/value.rs | 225 ++++++++++++------
tests/slt/char.slt | 47 ++++
tests/slt/sql_2016/E021_01.slt | 10 +-
tests/slt/sql_2016/E021_02.slt | 15 +-
23 files changed, 397 insertions(+), 199 deletions(-)
create mode 100644 tests/slt/char.slt
diff --git a/Cargo.toml b/Cargo.toml
index 6d8125f0..5f854b1c 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -58,7 +58,7 @@ regex = { version = "1.10.3" }
rust_decimal = { version = "1.34.3" }
serde = { version = "1.0.197", features = ["derive", "rc"] }
siphasher = { version = "1.0.0", features = ["serde"] }
-sqlparser = { version = "0.34.0" }
+sqlparser = { version = "0.34.0", features = ["serde"] }
strum_macros = { version = "0.26.2" }
thiserror = { version = "1.0.58" }
tokio = { version = "1.36.0", features = ["full"] }
diff --git a/src/binder/create_table.rs b/src/binder/create_table.rs
index 70717fcc..96f87f11 100644
--- a/src/binder/create_table.rs
+++ b/src/binder/create_table.rs
@@ -147,6 +147,7 @@ mod tests {
use crate::storage::Storage;
use crate::types::LogicalType;
use std::sync::atomic::AtomicUsize;
+ use sqlparser::ast::CharLengthUnits;
use tempfile::TempDir;
#[tokio::test]
@@ -177,7 +178,7 @@ mod tests {
assert_eq!(op.columns[1].nullable, true);
assert_eq!(
op.columns[1].desc,
- ColumnDesc::new(LogicalType::Varchar(Some(10)), false, false, None)
+ ColumnDesc::new(LogicalType::Varchar(Some(10), CharLengthUnits::Characters), false, false, None)
);
}
_ => unreachable!(),
diff --git a/src/binder/expr.rs b/src/binder/expr.rs
index 3e8e0275..18530e29 100644
--- a/src/binder/expr.rs
+++ b/src/binder/expr.rs
@@ -3,10 +3,7 @@ use crate::errors::DatabaseError;
use crate::expression;
use crate::expression::agg::AggKind;
use itertools::Itertools;
-use sqlparser::ast::{
- BinaryOperator, DataType, Expr, Function, FunctionArg, FunctionArgExpr, Ident, Query,
- UnaryOperator,
-};
+use sqlparser::ast::{BinaryOperator, CharLengthUnits, DataType, Expr, Function, FunctionArg, FunctionArgExpr, Ident, Query, UnaryOperator};
use std::slice;
use std::sync::Arc;
@@ -69,7 +66,8 @@ impl<'a, T: Transaction> Binder<'a, T> {
let logical_type = LogicalType::try_from(data_type.clone())?;
let value = DataValue::Utf8 {
value: Some(value.to_string()),
- ty: Utf8Type::Variable,
+ ty: Utf8Type::Variable(None),
+ unit: CharLengthUnits::Characters,
}
.cast(&logical_type)?;
@@ -354,7 +352,7 @@ impl<'a, T: Transaction> Binder<'a, T> {
| BinaryOperator::And
| BinaryOperator::Or
| BinaryOperator::Xor => LogicalType::Boolean,
- BinaryOperator::StringConcat => LogicalType::Varchar(None),
+ BinaryOperator::StringConcat => LogicalType::Varchar(None, CharLengthUnits::Characters),
_ => todo!(),
};
@@ -603,7 +601,8 @@ impl<'a, T: Transaction> Binder<'a, T> {
fn wildcard_expr() -> ScalarExpression {
ScalarExpression::Constant(Arc::new(DataValue::Utf8 {
value: Some("*".to_string()),
- ty: Utf8Type::Variable,
+ ty: Utf8Type::Variable(None),
+ unit: CharLengthUnits::Characters,
}))
}
}
diff --git a/src/catalog/column.rs b/src/catalog/column.rs
index 32d7653f..15f31908 100644
--- a/src/catalog/column.rs
+++ b/src/catalog/column.rs
@@ -4,6 +4,7 @@ use crate::expression::ScalarExpression;
use serde::{Deserialize, Serialize};
use std::hash::Hash;
use std::sync::Arc;
+use sqlparser::ast::CharLengthUnits;
use crate::types::tuple::EMPTY_TUPLE;
use crate::types::value::ValueRef;
@@ -50,7 +51,7 @@ impl ColumnCatalog {
table_name: None,
},
nullable: true,
- desc: ColumnDesc::new(LogicalType::Varchar(None), false, false, None),
+ desc: ColumnDesc::new(LogicalType::Varchar(None, CharLengthUnits::Characters), false, false, None),
}
}
diff --git a/src/execution/volcano/dml/analyze.rs b/src/execution/volcano/dml/analyze.rs
index ae8b6a8f..be7b9a08 100644
--- a/src/execution/volcano/dml/analyze.rs
+++ b/src/execution/volcano/dml/analyze.rs
@@ -16,6 +16,7 @@ use std::fmt::Formatter;
use std::sync::Arc;
use std::time::{SystemTime, UNIX_EPOCH};
use std::{fmt, fs};
+use sqlparser::ast::CharLengthUnits;
const DEFAULT_NUM_OF_BUCKETS: usize = 100;
const DEFAULT_STATISTICS_META_PATH: &str = "fnck_sql_statistics_metas";
@@ -108,7 +109,8 @@ impl Analyze {
meta.to_file(&path)?;
values.push(Arc::new(DataValue::Utf8 {
value: Some(path.clone()),
- ty: Utf8Type::Variable,
+ ty: Utf8Type::Variable(None),
+ unit: CharLengthUnits::Characters
}));
transaction.save_table_meta(&table_name, path, meta)?;
}
diff --git a/src/execution/volcano/dml/copy_from_file.rs b/src/execution/volcano/dml/copy_from_file.rs
index 935ebc46..cb9a3d60 100644
--- a/src/execution/volcano/dml/copy_from_file.rs
+++ b/src/execution/volcano/dml/copy_from_file.rs
@@ -107,6 +107,7 @@ mod tests {
use futures::StreamExt;
use std::io::Write;
use std::sync::Arc;
+ use sqlparser::ast::CharLengthUnits;
use tempfile::TempDir;
use super::*;
@@ -148,7 +149,7 @@ mod tests {
table_name: None,
},
nullable: false,
- desc: ColumnDesc::new(LogicalType::Varchar(Some(10)), false, false, None),
+ desc: ColumnDesc::new(LogicalType::Varchar(Some(10), CharLengthUnits::Characters), false, false, None),
}),
];
diff --git a/src/execution/volcano/dql/describe.rs b/src/execution/volcano/dql/describe.rs
index fb27ce64..75279a95 100644
--- a/src/execution/volcano/dql/describe.rs
+++ b/src/execution/volcano/dql/describe.rs
@@ -8,19 +8,23 @@ use crate::types::value::{DataValue, Utf8Type, ValueRef};
use futures_async_stream::try_stream;
use lazy_static::lazy_static;
use std::sync::Arc;
+use sqlparser::ast::CharLengthUnits;
lazy_static! {
static ref PRIMARY_KEY_TYPE: ValueRef = Arc::new(DataValue::Utf8 {
value: Some(String::from("PRIMARY")),
- ty: Utf8Type::Variable
+ ty: Utf8Type::Variable(None),
+ unit: CharLengthUnits::Characters
});
static ref UNIQUE_KEY_TYPE: ValueRef = Arc::new(DataValue::Utf8 {
value: Some(String::from("UNIQUE")),
- ty: Utf8Type::Variable
+ ty: Utf8Type::Variable(None),
+ unit: CharLengthUnits::Characters
});
static ref EMPTY_KEY_TYPE: ValueRef = Arc::new(DataValue::Utf8 {
value: Some(String::from("EMPTY")),
- ty: Utf8Type::Variable
+ ty: Utf8Type::Variable(None),
+ unit: CharLengthUnits::Characters
});
}
@@ -69,24 +73,29 @@ impl Describe {
let values = vec![
Arc::new(DataValue::Utf8 {
value: Some(column.name().to_string()),
- ty: Utf8Type::Variable,
+ ty: Utf8Type::Variable(None),
+ unit: CharLengthUnits::Characters,
}),
Arc::new(DataValue::Utf8 {
value: Some(datatype.to_string()),
- ty: Utf8Type::Variable,
+ ty: Utf8Type::Variable(None),
+ unit: CharLengthUnits::Characters,
}),
Arc::new(DataValue::Utf8 {
value: datatype.raw_len().map(|len| len.to_string()),
- ty: Utf8Type::Variable,
+ ty: Utf8Type::Variable(None),
+ unit: CharLengthUnits::Characters,
}),
Arc::new(DataValue::Utf8 {
value: Some(column.nullable.to_string()),
- ty: Utf8Type::Variable,
+ ty: Utf8Type::Variable(None),
+ unit: CharLengthUnits::Characters,
}),
key_fn(column),
Arc::new(DataValue::Utf8 {
value: Some(default),
- ty: Utf8Type::Variable,
+ ty: Utf8Type::Variable(None),
+ unit: CharLengthUnits::Characters,
}),
];
yield Tuple { id: None, values };
diff --git a/src/execution/volcano/dql/explain.rs b/src/execution/volcano/dql/explain.rs
index f1dbb3ca..6be7fd3e 100644
--- a/src/execution/volcano/dql/explain.rs
+++ b/src/execution/volcano/dql/explain.rs
@@ -6,6 +6,7 @@ use crate::types::tuple::Tuple;
use crate::types::value::{DataValue, Utf8Type};
use futures_async_stream::try_stream;
use std::sync::Arc;
+use sqlparser::ast::CharLengthUnits;
pub struct Explain {
plan: LogicalPlan,
@@ -28,7 +29,8 @@ impl Explain {
pub async fn _execute(self) {
let values = vec![Arc::new(DataValue::Utf8 {
value: Some(self.plan.explain(0)),
- ty: Utf8Type::Variable,
+ ty: Utf8Type::Variable(None),
+ unit: CharLengthUnits::Characters
})];
yield Tuple { id: None, values };
diff --git a/src/execution/volcano/dql/show_table.rs b/src/execution/volcano/dql/show_table.rs
index f64b6dd1..3ba7af6f 100644
--- a/src/execution/volcano/dql/show_table.rs
+++ b/src/execution/volcano/dql/show_table.rs
@@ -6,6 +6,7 @@ use crate::types::tuple::Tuple;
use crate::types::value::{DataValue, Utf8Type};
use futures_async_stream::try_stream;
use std::sync::Arc;
+use sqlparser::ast::CharLengthUnits;
pub struct ShowTables;
@@ -23,7 +24,8 @@ impl ShowTables {
for TableMeta { table_name } in metas {
let values = vec![Arc::new(DataValue::Utf8 {
value: Some(table_name.to_string()),
- ty: Utf8Type::Variable,
+ ty: Utf8Type::Variable(None),
+ unit: CharLengthUnits::Characters
})];
yield Tuple { id: None, values };
diff --git a/src/expression/evaluator.rs b/src/expression/evaluator.rs
index f97a1d4c..37437ee8 100644
--- a/src/expression/evaluator.rs
+++ b/src/expression/evaluator.rs
@@ -10,6 +10,7 @@ use lazy_static::lazy_static;
use std::cmp;
use std::cmp::Ordering;
use std::sync::Arc;
+use sqlparser::ast::CharLengthUnits;
lazy_static! {
static ref NULL_VALUE: ValueRef = Arc::new(DataValue::Null);
@@ -25,7 +26,8 @@ macro_rules! eval_to_num {
} else {
return Ok(Arc::new(DataValue::Utf8 {
value: None,
- ty: Utf8Type::Variable,
+ ty: Utf8Type::Variable(None),
+ unit: CharLengthUnits::Characters
}));
}
};
@@ -156,7 +158,7 @@ impl ScalarExpression {
from_expr,
} => {
if let Some(mut string) = DataValue::clone(expr.eval(tuple, schema)?.as_ref())
- .cast(&LogicalType::Varchar(None))?
+ .cast(&LogicalType::Varchar(None, CharLengthUnits::Characters))?
.utf8()
{
if let Some(from_expr) = from_expr {
@@ -169,7 +171,8 @@ impl ScalarExpression {
if from > len_i {
return Ok(Arc::new(DataValue::Utf8 {
value: None,
- ty: Utf8Type::Variable,
+ ty: Utf8Type::Variable(None),
+ unit: CharLengthUnits::Characters,
}));
}
string = string.split_off(from as usize);
@@ -182,19 +185,21 @@ impl ScalarExpression {
Ok(Arc::new(DataValue::Utf8 {
value: Some(string),
- ty: Utf8Type::Variable,
+ ty: Utf8Type::Variable(None),
+ unit: CharLengthUnits::Characters,
}))
} else {
Ok(Arc::new(DataValue::Utf8 {
value: None,
- ty: Utf8Type::Variable,
+ ty: Utf8Type::Variable(None),
+ unit: CharLengthUnits::Characters,
}))
}
}
ScalarExpression::Position { expr, in_expr } => {
let unpack = |expr: &ScalarExpression| -> Result {
Ok(DataValue::clone(expr.eval(tuple, schema)?.as_ref())
- .cast(&LogicalType::Varchar(None))?
+ .cast(&LogicalType::Varchar(None, CharLengthUnits::Characters))?
.utf8()
.unwrap_or("".to_owned()))
};
diff --git a/src/expression/mod.rs b/src/expression/mod.rs
index ab97c43c..db5f749a 100644
--- a/src/expression/mod.rs
+++ b/src/expression/mod.rs
@@ -5,7 +5,7 @@ use std::hash::Hash;
use std::sync::Arc;
use std::{fmt, mem};
-use sqlparser::ast::{BinaryOperator as SqlBinaryOperator, UnaryOperator as SqlUnaryOperator};
+use sqlparser::ast::{BinaryOperator as SqlBinaryOperator, CharLengthUnits, UnaryOperator as SqlUnaryOperator};
use self::agg::AggKind;
use crate::catalog::{ColumnCatalog, ColumnDesc, ColumnRef};
@@ -394,7 +394,7 @@ impl ScalarExpression {
ScalarExpression::IsNull { .. }
| ScalarExpression::In { .. }
| ScalarExpression::Between { .. } => LogicalType::Boolean,
- ScalarExpression::SubString { .. } => LogicalType::Varchar(None),
+ ScalarExpression::SubString { .. } => LogicalType::Varchar(None, CharLengthUnits::Characters),
ScalarExpression::Position { .. } => LogicalType::Integer,
ScalarExpression::Alias { expr, .. } | ScalarExpression::Reference { expr, .. } => {
expr.return_type()
diff --git a/src/expression/value_compute.rs b/src/expression/value_compute.rs
index 3ceb7031..11f7363b 100644
--- a/src/expression/value_compute.rs
+++ b/src/expression/value_compute.rs
@@ -4,6 +4,7 @@ use crate::types::value::{DataValue, Utf8Type, ValueRef};
use crate::types::LogicalType;
use regex::Regex;
use std::cmp::Ordering;
+use sqlparser::ast::CharLengthUnits;
fn unpack_bool(value: DataValue) -> Option {
match value {
@@ -193,8 +194,8 @@ impl DataValue {
op: &BinaryOperator,
) -> Result {
if let BinaryOperator::Like(escape_char) | BinaryOperator::NotLike(escape_char) = op {
- let value_option = unpack_utf8(self.clone().cast(&LogicalType::Varchar(None))?);
- let pattern_option = unpack_utf8(right.clone().cast(&LogicalType::Varchar(None))?);
+ let value_option = unpack_utf8(self.clone().cast(&LogicalType::Varchar(None, CharLengthUnits::Characters))?);
+ let pattern_option = unpack_utf8(right.clone().cast(&LogicalType::Varchar(None, CharLengthUnits::Characters))?);
let mut is_match = if let (Some(value), Some(pattern)) = (value_option, pattern_option)
{
@@ -511,7 +512,7 @@ impl DataValue {
_ => return Err(DatabaseError::UnsupportedBinaryOperator(unified_type, *op)),
}
}
- LogicalType::Varchar(_) | LogicalType::Char(_) => {
+ LogicalType::Varchar(_, _) | LogicalType::Char(_, _) => {
let left_value = unpack_utf8(self.clone().cast(&unified_type)?);
let right_value = unpack_utf8(right.clone().cast(&unified_type)?);
@@ -576,7 +577,8 @@ impl DataValue {
DataValue::Utf8 {
value,
- ty: Utf8Type::Variable,
+ ty: Utf8Type::Variable(None),
+ unit: CharLengthUnits::Characters,
}
}
_ => return Err(DatabaseError::UnsupportedBinaryOperator(unified_type, *op)),
@@ -649,6 +651,7 @@ impl DataValue {
#[cfg(test)]
mod test {
+ use sqlparser::ast::CharLengthUnits;
use crate::errors::DatabaseError;
use crate::expression::BinaryOperator;
use crate::types::value::{DataValue, Utf8Type};
@@ -1544,11 +1547,13 @@ mod test {
DataValue::binary_op(
&DataValue::Utf8 {
value: Some("a".to_string()),
- ty: Utf8Type::Variable
+ ty: Utf8Type::Variable(None),
+ unit: CharLengthUnits::Characters,
},
&DataValue::Utf8 {
value: Some("b".to_string()),
- ty: Utf8Type::Variable
+ ty: Utf8Type::Variable(None),
+ unit: CharLengthUnits::Characters,
},
&BinaryOperator::Gt
)?,
@@ -1558,11 +1563,13 @@ mod test {
DataValue::binary_op(
&DataValue::Utf8 {
value: Some("a".to_string()),
- ty: Utf8Type::Variable
+ ty: Utf8Type::Variable(None),
+ unit: CharLengthUnits::Characters,
},
&DataValue::Utf8 {
value: Some("b".to_string()),
- ty: Utf8Type::Variable
+ ty: Utf8Type::Variable(None),
+ unit: CharLengthUnits::Characters,
},
&BinaryOperator::Lt
)?,
@@ -1572,11 +1579,13 @@ mod test {
DataValue::binary_op(
&DataValue::Utf8 {
value: Some("a".to_string()),
- ty: Utf8Type::Variable
+ ty: Utf8Type::Variable(None),
+ unit: CharLengthUnits::Characters,
},
&DataValue::Utf8 {
value: Some("a".to_string()),
- ty: Utf8Type::Variable
+ ty: Utf8Type::Variable(None),
+ unit: CharLengthUnits::Characters,
},
&BinaryOperator::GtEq
)?,
@@ -1586,11 +1595,13 @@ mod test {
DataValue::binary_op(
&DataValue::Utf8 {
value: Some("a".to_string()),
- ty: Utf8Type::Variable
+ ty: Utf8Type::Variable(None),
+ unit: CharLengthUnits::Characters,
},
&DataValue::Utf8 {
value: Some("a".to_string()),
- ty: Utf8Type::Variable
+ ty: Utf8Type::Variable(None),
+ unit: CharLengthUnits::Characters,
},
&BinaryOperator::LtEq
)?,
@@ -1600,11 +1611,13 @@ mod test {
DataValue::binary_op(
&DataValue::Utf8 {
value: Some("a".to_string()),
- ty: Utf8Type::Variable
+ ty: Utf8Type::Variable(None),
+ unit: CharLengthUnits::Characters,
},
&DataValue::Utf8 {
value: Some("a".to_string()),
- ty: Utf8Type::Variable
+ ty: Utf8Type::Variable(None),
+ unit: CharLengthUnits::Characters,
},
&BinaryOperator::NotEq
)?,
@@ -1614,11 +1627,13 @@ mod test {
DataValue::binary_op(
&DataValue::Utf8 {
value: Some("a".to_string()),
- ty: Utf8Type::Variable
+ ty: Utf8Type::Variable(None),
+ unit: CharLengthUnits::Characters,
},
&DataValue::Utf8 {
value: Some("a".to_string()),
- ty: Utf8Type::Variable
+ ty: Utf8Type::Variable(None),
+ unit: CharLengthUnits::Characters,
},
&BinaryOperator::Eq
)?,
@@ -1629,11 +1644,13 @@ mod test {
DataValue::binary_op(
&DataValue::Utf8 {
value: None,
- ty: Utf8Type::Variable
+ ty: Utf8Type::Variable(None),
+ unit: CharLengthUnits::Characters,
},
&DataValue::Utf8 {
value: Some("a".to_string()),
- ty: Utf8Type::Variable
+ ty: Utf8Type::Variable(None),
+ unit: CharLengthUnits::Characters,
},
&BinaryOperator::Gt
)?,
@@ -1643,11 +1660,13 @@ mod test {
DataValue::binary_op(
&DataValue::Utf8 {
value: None,
- ty: Utf8Type::Variable
+ ty: Utf8Type::Variable(None),
+ unit: CharLengthUnits::Characters,
},
&DataValue::Utf8 {
value: Some("a".to_string()),
- ty: Utf8Type::Variable
+ ty: Utf8Type::Variable(None),
+ unit: CharLengthUnits::Characters,
},
&BinaryOperator::Lt
)?,
@@ -1657,11 +1676,13 @@ mod test {
DataValue::binary_op(
&DataValue::Utf8 {
value: None,
- ty: Utf8Type::Variable
+ ty: Utf8Type::Variable(None),
+ unit: CharLengthUnits::Characters,
},
&DataValue::Utf8 {
value: Some("a".to_string()),
- ty: Utf8Type::Variable
+ ty: Utf8Type::Variable(None),
+ unit: CharLengthUnits::Characters,
},
&BinaryOperator::GtEq
)?,
@@ -1671,11 +1692,13 @@ mod test {
DataValue::binary_op(
&DataValue::Utf8 {
value: None,
- ty: Utf8Type::Variable
+ ty: Utf8Type::Variable(None),
+ unit: CharLengthUnits::Characters,
},
&DataValue::Utf8 {
value: Some("a".to_string()),
- ty: Utf8Type::Variable
+ ty: Utf8Type::Variable(None),
+ unit: CharLengthUnits::Characters,
},
&BinaryOperator::LtEq
)?,
@@ -1685,11 +1708,13 @@ mod test {
DataValue::binary_op(
&DataValue::Utf8 {
value: None,
- ty: Utf8Type::Variable
+ ty: Utf8Type::Variable(None),
+ unit: CharLengthUnits::Characters,
},
&DataValue::Utf8 {
value: Some("a".to_string()),
- ty: Utf8Type::Variable
+ ty: Utf8Type::Variable(None),
+ unit: CharLengthUnits::Characters,
},
&BinaryOperator::NotEq
)?,
diff --git a/src/marcos/mod.rs b/src/marcos/mod.rs
index 39332a1d..f82ddd67 100644
--- a/src/marcos/mod.rs
+++ b/src/marcos/mod.rs
@@ -138,6 +138,7 @@ mod test {
use serde::Deserialize;
use serde::Serialize;
use std::sync::Arc;
+ use sqlparser::ast::CharLengthUnits;
fn build_tuple() -> (Tuple, SchemaRef) {
let schema_ref = Arc::new(vec![
@@ -149,14 +150,15 @@ mod test {
Arc::new(ColumnCatalog::new(
"c2".to_string(),
false,
- ColumnDesc::new(LogicalType::Varchar(None), false, false, None),
+ ColumnDesc::new(LogicalType::Varchar(None, CharLengthUnits::Characters), false, false, None),
)),
]);
let values = vec![
Arc::new(DataValue::Int32(Some(9))),
Arc::new(DataValue::Utf8 {
value: Some("LOL".to_string()),
- ty: Utf8Type::Variable,
+ ty: Utf8Type::Variable(None),
+ unit: CharLengthUnits::Characters,
}),
];
@@ -207,7 +209,8 @@ mod test {
ScalarExpression::Constant(Arc::new(DataValue::Int8(Some(1)))),
ScalarExpression::Constant(Arc::new(DataValue::Utf8 {
value: Some("1".to_string()),
- ty: Utf8Type::Variable,
+ ty: Utf8Type::Variable(None),
+ unit: CharLengthUnits::Characters,
})),
],
&Tuple {
diff --git a/src/optimizer/core/histogram.rs b/src/optimizer/core/histogram.rs
index 17cdf193..a103565b 100644
--- a/src/optimizer/core/histogram.rs
+++ b/src/optimizer/core/histogram.rs
@@ -255,7 +255,7 @@ impl Histogram {
) -> Result {
let float_value = |value: &DataValue, prefix_len: usize| {
let value = match value.logical_type() {
- LogicalType::Varchar(_) | LogicalType::Char(_) => match value {
+ LogicalType::Varchar(_, ..) | LogicalType::Char(_, ..) => match value {
DataValue::Utf8 { value, .. } => value.as_ref().map(|string| {
if prefix_len > string.len() {
return 0.0;
diff --git a/src/optimizer/rule/normalization/column_pruning.rs b/src/optimizer/rule/normalization/column_pruning.rs
index e48e98e1..1322a669 100644
--- a/src/optimizer/rule/normalization/column_pruning.rs
+++ b/src/optimizer/rule/normalization/column_pruning.rs
@@ -12,6 +12,7 @@ use itertools::Itertools;
use lazy_static::lazy_static;
use std::collections::HashSet;
use std::sync::Arc;
+use sqlparser::ast::CharLengthUnits;
lazy_static! {
static ref COLUMN_PRUNING_RULE: Pattern = {
@@ -63,7 +64,8 @@ impl ColumnPruning {
if op.agg_calls.is_empty() && op.groupby_exprs.is_empty() {
let value = Arc::new(DataValue::Utf8 {
value: Some("*".to_string()),
- ty: Utf8Type::Variable,
+ ty: Utf8Type::Variable(None),
+ unit: CharLengthUnits::Characters,
});
// only single COUNT(*) is not depend on any column
// removed all expressions from the aggregate: push a COUNT(*)
diff --git a/src/storage/table_codec.rs b/src/storage/table_codec.rs
index 3994e489..fc05b7d5 100644
--- a/src/storage/table_codec.rs
+++ b/src/storage/table_codec.rs
@@ -172,7 +172,7 @@ impl TableCodec {
| LogicalType::USmallint
| LogicalType::UInteger
| LogicalType::UBigint
- | LogicalType::Varchar(_)
+ | LogicalType::Varchar(_, ..)
) {
return Err(DatabaseError::InvalidType);
}
diff --git a/src/types/mod.rs b/src/types/mod.rs
index 209dcda6..8e9e05f7 100644
--- a/src/types/mod.rs
+++ b/src/types/mod.rs
@@ -10,7 +10,7 @@ use std::any::TypeId;
use std::cmp;
use crate::errors::DatabaseError;
-use sqlparser::ast::{CharLengthUnits, CharacterLength, ExactNumberInfo, TimezoneInfo};
+use sqlparser::ast::{CharLengthUnits, ExactNumberInfo, TimezoneInfo};
use strum_macros::AsRefStr;
pub type ColumnId = u32;
@@ -34,8 +34,8 @@ pub enum LogicalType {
UBigint,
Float,
Double,
- Char(u32),
- Varchar(Option),
+ Char(u32, CharLengthUnits),
+ Varchar(Option, CharLengthUnits),
Date,
DateTime,
Time,
@@ -75,7 +75,7 @@ impl LogicalType {
} else if type_id == TypeId::of::() {
Some(LogicalType::Decimal(None, None))
} else if type_id == TypeId::of::() {
- Some(LogicalType::Varchar(None))
+ Some(LogicalType::Varchar(None, CharLengthUnits::Characters))
} else {
None
}
@@ -96,8 +96,11 @@ impl LogicalType {
LogicalType::Float => Some(4),
LogicalType::Double => Some(8),
/// Note: The non-fixed length type's raw_len is None e.g. Varchar
- LogicalType::Varchar(_) => None,
- LogicalType::Char(len) => Some(*len as usize),
+ LogicalType::Varchar(_, _) => None,
+ LogicalType::Char(len, unit) => match unit {
+ CharLengthUnits::Characters => None,
+ CharLengthUnits::Octets => Some(*len as usize)
+ },
LogicalType::Decimal(_, _) => Some(16),
LogicalType::Date => Some(4),
LogicalType::DateTime => Some(8),
@@ -179,8 +182,8 @@ impl LogicalType {
}
if matches!(
(left, right),
- (LogicalType::Date, LogicalType::Varchar(_))
- | (LogicalType::Varchar(_), LogicalType::Date)
+ (LogicalType::Date, LogicalType::Varchar(_, ..))
+ | (LogicalType::Varchar(_, ..), LogicalType::Date)
) {
return Ok(LogicalType::Date);
}
@@ -192,15 +195,15 @@ impl LogicalType {
}
if matches!(
(left, right),
- (LogicalType::DateTime, LogicalType::Varchar(_))
- | (LogicalType::Varchar(_), LogicalType::DateTime)
+ (LogicalType::DateTime, LogicalType::Varchar(_, ..))
+ | (LogicalType::Varchar(_, ..), LogicalType::DateTime)
) {
return Ok(LogicalType::DateTime);
}
- if let (LogicalType::Char(_), LogicalType::Varchar(len))
- | (LogicalType::Varchar(len), LogicalType::Char(_)) = (left, right)
+ if let (LogicalType::Char(_, ..), LogicalType::Varchar(len, ..))
+ | (LogicalType::Varchar(len, ..), LogicalType::Char(_, ..)) = (left, right)
{
- return Ok(LogicalType::Varchar(*len));
+ return Ok(LogicalType::Varchar(*len, CharLengthUnits::Characters));
}
Err(DatabaseError::Incomparable(*left, *right))
}
@@ -296,20 +299,20 @@ impl LogicalType {
LogicalType::UBigint => matches!(to, LogicalType::Float | LogicalType::Double),
LogicalType::Float => matches!(to, LogicalType::Double),
LogicalType::Double => false,
- LogicalType::Char(_) => false,
- LogicalType::Varchar(_) => false,
+ LogicalType::Char(_, ..) => false,
+ LogicalType::Varchar(_, ..) => false,
LogicalType::Date => matches!(
to,
- LogicalType::DateTime | LogicalType::Varchar(_) | LogicalType::Char(_)
+ LogicalType::DateTime | LogicalType::Varchar(_, ..) | LogicalType::Char(_, ..)
),
LogicalType::DateTime => matches!(
to,
LogicalType::Date
| LogicalType::Time
- | LogicalType::Varchar(_)
- | LogicalType::Char(_)
+ | LogicalType::Varchar(_, ..)
+ | LogicalType::Char(_, ..)
),
- LogicalType::Time => matches!(to, LogicalType::Varchar(_) | LogicalType::Char(_)),
+ LogicalType::Time => matches!(to, LogicalType::Varchar(_, ..) | LogicalType::Char(_, ..)),
LogicalType::Decimal(_, _) | LogicalType::Tuple => false,
}
}
@@ -324,31 +327,23 @@ impl TryFrom for LogicalType {
sqlparser::ast::DataType::Char(char_len)
| sqlparser::ast::DataType::Character(char_len) => {
let mut len = 1;
- if let Some(CharacterLength { length, unit }) = char_len {
- if matches!(unit, Some(CharLengthUnits::Octets)) {
- return Err(DatabaseError::UnsupportedStmt(format!(
- "char unit: {:?}",
- unit
- )));
- }
- len = cmp::max(len, length)
+ let mut char_unit = None;
+ if let Some(sqlparser::ast::CharacterLength { length, unit }) = char_len {
+ len = cmp::max(len, length);
+ char_unit = unit;
}
- Ok(LogicalType::Char(len as u32))
+ Ok(LogicalType::Char(len as u32, char_unit.unwrap_or(CharLengthUnits::Characters)))
}
sqlparser::ast::DataType::CharVarying(varchar_len)
| sqlparser::ast::DataType::CharacterVarying(varchar_len)
| sqlparser::ast::DataType::Varchar(varchar_len) => {
let mut len = None;
- if let Some(CharacterLength { length, unit }) = varchar_len {
- if matches!(unit, Some(CharLengthUnits::Octets)) {
- return Err(DatabaseError::UnsupportedStmt(format!(
- "char unit: {:?}",
- unit
- )));
- }
- len = Some(length as u32)
+ let mut char_unit = None;
+ if let Some(sqlparser::ast::CharacterLength { length, unit }) = varchar_len {
+ len = Some(length as u32);
+ char_unit = unit;
}
- Ok(LogicalType::Varchar(len))
+ Ok(LogicalType::Varchar(len, char_unit.unwrap_or(CharLengthUnits::Characters)))
}
sqlparser::ast::DataType::Float(_) => Ok(LogicalType::Float),
sqlparser::ast::DataType::Double | sqlparser::ast::DataType::DoublePrecision => {
diff --git a/src/types/tuple.rs b/src/types/tuple.rs
index dff31ffb..97d5cd5d 100644
--- a/src/types/tuple.rs
+++ b/src/types/tuple.rs
@@ -176,6 +176,7 @@ mod tests {
use itertools::Itertools;
use rust_decimal::Decimal;
use std::sync::Arc;
+ use sqlparser::ast::CharLengthUnits;
#[test]
fn test_tuple_serialize_to_and_deserialize_from() {
@@ -193,7 +194,7 @@ mod tests {
Arc::new(ColumnCatalog::new(
"c3".to_string(),
false,
- ColumnDesc::new(LogicalType::Varchar(Some(2)), false, false, None),
+ ColumnDesc::new(LogicalType::Varchar(Some(2), CharLengthUnits::Characters), false, false, None),
)),
Arc::new(ColumnCatalog::new(
"c4".to_string(),
@@ -248,7 +249,17 @@ mod tests {
Arc::new(ColumnCatalog::new(
"c14".to_string(),
false,
- ColumnDesc::new(LogicalType::Char(1), false, false, None),
+ ColumnDesc::new(LogicalType::Char(1, CharLengthUnits::Characters), false, false, None),
+ )),
+ Arc::new(ColumnCatalog::new(
+ "c15".to_string(),
+ false,
+ ColumnDesc::new(LogicalType::Varchar(Some(2), CharLengthUnits::Octets), false, false, None),
+ )),
+ Arc::new(ColumnCatalog::new(
+ "c16".to_string(),
+ false,
+ ColumnDesc::new(LogicalType::Char(1, CharLengthUnits::Octets), false, false, None),
)),
]);
@@ -260,7 +271,8 @@ mod tests {
Arc::new(DataValue::UInt32(Some(1))),
Arc::new(DataValue::Utf8 {
value: Some("LOL".to_string()),
- ty: Utf8Type::Variable,
+ ty: Utf8Type::Variable(Some(2)),
+ unit: CharLengthUnits::Characters,
}),
Arc::new(DataValue::Int16(Some(1))),
Arc::new(DataValue::UInt16(Some(1))),
@@ -275,6 +287,17 @@ mod tests {
Arc::new(DataValue::Utf8 {
value: Some("K".to_string()),
ty: Utf8Type::Fixed(1),
+ unit: CharLengthUnits::Characters,
+ }),
+ Arc::new(DataValue::Utf8 {
+ value: Some("LOL".to_string()),
+ ty: Utf8Type::Variable(Some(2)),
+ unit: CharLengthUnits::Octets,
+ }),
+ Arc::new(DataValue::Utf8 {
+ value: Some("K".to_string()),
+ ty: Utf8Type::Fixed(1),
+ unit: CharLengthUnits::Octets,
}),
],
},
@@ -285,7 +308,8 @@ mod tests {
Arc::new(DataValue::UInt32(None)),
Arc::new(DataValue::Utf8 {
value: None,
- ty: Utf8Type::Variable,
+ ty: Utf8Type::Variable(Some(2)),
+ unit: CharLengthUnits::Characters,
}),
Arc::new(DataValue::Int16(None)),
Arc::new(DataValue::UInt16(None)),
@@ -300,6 +324,17 @@ mod tests {
Arc::new(DataValue::Utf8 {
value: None,
ty: Utf8Type::Fixed(1),
+ unit: CharLengthUnits::Characters,
+ }),
+ Arc::new(DataValue::Utf8 {
+ value: None,
+ ty: Utf8Type::Variable(Some(2)),
+ unit: CharLengthUnits::Octets,
+ }),
+ Arc::new(DataValue::Utf8 {
+ value: None,
+ ty: Utf8Type::Fixed(1),
+ unit: CharLengthUnits::Octets,
}),
],
},
@@ -312,13 +347,13 @@ mod tests {
let tuple_0 = Tuple::deserialize_from(
&types,
- &[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13],
+ &[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15],
&columns,
&tuples[0].serialize_to(&types).unwrap(),
);
let tuple_1 = Tuple::deserialize_from(
&types,
- &[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13],
+ &[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15],
&columns,
&tuples[1].serialize_to(&types).unwrap(),
);
diff --git a/src/types/tuple_builder.rs b/src/types/tuple_builder.rs
index d9763a87..95c16d33 100644
--- a/src/types/tuple_builder.rs
+++ b/src/types/tuple_builder.rs
@@ -2,6 +2,7 @@ use crate::errors::DatabaseError;
use crate::types::tuple::{Schema, Tuple};
use crate::types::value::{DataValue, Utf8Type};
use std::sync::Arc;
+use sqlparser::ast::CharLengthUnits;
pub struct TupleBuilder<'a> {
schema: &'a Schema,
@@ -15,7 +16,8 @@ impl<'a> TupleBuilder<'a> {
pub fn build_result(message: String) -> Tuple {
let values = vec![Arc::new(DataValue::Utf8 {
value: Some(message),
- ty: Utf8Type::Variable,
+ ty: Utf8Type::Variable(None),
+ unit: CharLengthUnits::Characters,
})];
Tuple { id: None, values }
@@ -32,7 +34,8 @@ impl<'a> TupleBuilder<'a> {
let data_value = Arc::new(
DataValue::Utf8 {
value: Some(value.to_string()),
- ty: Utf8Type::Variable,
+ ty: Utf8Type::Variable(None),
+ unit: CharLengthUnits::Characters,
}
.cast(self.schema[i].datatype())?,
);
diff --git a/src/types/value.rs b/src/types/value.rs
index 9e4300d0..9dac3b66 100644
--- a/src/types/value.rs
+++ b/src/types/value.rs
@@ -14,6 +14,7 @@ use crate::errors::DatabaseError;
use ordered_float::OrderedFloat;
use rust_decimal::prelude::{FromPrimitive, ToPrimitive};
use serde::{Deserialize, Serialize};
+use sqlparser::ast::{CharLengthUnits};
use super::LogicalType;
@@ -34,7 +35,7 @@ pub type ValueRef = Arc;
#[derive(Clone, Serialize, Deserialize)]
pub enum Utf8Type {
- Variable,
+ Variable(Option),
Fixed(u32),
}
@@ -55,6 +56,7 @@ pub enum DataValue {
Utf8 {
value: Option,
ty: Utf8Type,
+ unit: CharLengthUnits,
},
/// Date stored as a signed 32bit int days since UNIX epoch 1970-01-01
Date32(Option),
@@ -248,23 +250,25 @@ impl Hash for DataValue {
}
}
macro_rules! varchar_cast {
- ($value:expr, $len:expr, $ty:expr) => {
+ ($value:expr, $len:expr, $ty:expr, $unit:expr) => {
$value
.map(|v| {
let string_value = format!("{}", v);
if let Some(len) = $len {
- if string_value.len() > *len as usize {
+ if Self::check_string_len(&string_value, *len as usize, $unit) {
return Err(DatabaseError::TooLong);
}
}
Ok(DataValue::Utf8 {
value: Some(string_value),
ty: $ty,
+ unit: $unit,
})
})
.unwrap_or(Ok(DataValue::Utf8 {
value: None,
ty: $ty,
+ unit: $unit,
}))
};
}
@@ -315,22 +319,48 @@ impl DataValue {
}
}
+ pub(crate) fn check_string_len(string: &str, len: usize, unit: CharLengthUnits) -> bool {
+ match unit {
+ CharLengthUnits::Characters => string.chars().count() > len,
+ CharLengthUnits::Octets => string.len() > len,
+ }
+ }
+
pub(crate) fn check_len(&self, logic_type: &LogicalType) -> Result<(), DatabaseError> {
let is_over_len = match (logic_type, self) {
+ (LogicalType::Varchar(None, _), _) => false,
+ (
+ LogicalType::Varchar(Some(len), CharLengthUnits::Characters),
+ DataValue::Utf8 {
+ value: Some(val),
+ ty: Utf8Type::Variable(_),
+ unit: CharLengthUnits::Characters
+ },
+ )
+ | (
+ LogicalType::Char(len, CharLengthUnits::Characters),
+ DataValue::Utf8 {
+ value: Some(val),
+ ty: Utf8Type::Fixed(_),
+ unit: CharLengthUnits::Characters
+ },
+ ) => Self::check_string_len(val, *len as usize, CharLengthUnits::Characters),
(
- LogicalType::Varchar(Some(len)),
+ LogicalType::Varchar(Some(len), CharLengthUnits::Octets),
DataValue::Utf8 {
value: Some(val),
- ty: Utf8Type::Variable,
+ ty: Utf8Type::Variable(_),
+ unit: CharLengthUnits::Octets
},
)
| (
- LogicalType::Char(len),
+ LogicalType::Char(len, CharLengthUnits::Octets),
DataValue::Utf8 {
value: Some(val),
ty: Utf8Type::Fixed(_),
+ unit: CharLengthUnits::Octets
},
- ) => val.len() > *len as usize,
+ ) => Self::check_string_len(val, *len as usize, CharLengthUnits::Octets),
(LogicalType::Decimal(full_len, scale_len), DataValue::Decimal(Some(val))) => {
if let Some(len) = full_len {
if val.mantissa().ilog10() + 1 > *len as u32 {
@@ -404,13 +434,17 @@ impl DataValue {
LogicalType::UBigint => DataValue::UInt64(None),
LogicalType::Float => DataValue::Float32(None),
LogicalType::Double => DataValue::Float64(None),
- LogicalType::Char(len) => DataValue::Utf8 {
- value: None,
- ty: Utf8Type::Fixed(*len),
+ LogicalType::Char(len, unit) => {
+ DataValue::Utf8 {
+ value: None,
+ ty: Utf8Type::Fixed(*len),
+ unit: *unit,
+ }
},
- LogicalType::Varchar(_) => DataValue::Utf8 {
+ LogicalType::Varchar(len, unit) => DataValue::Utf8 {
value: None,
- ty: Utf8Type::Variable,
+ ty: Utf8Type::Variable(*len),
+ unit: *unit,
},
LogicalType::Date => DataValue::Date32(None),
LogicalType::DateTime => DataValue::Date64(None),
@@ -435,13 +469,17 @@ impl DataValue {
LogicalType::UBigint => DataValue::UInt64(Some(0)),
LogicalType::Float => DataValue::Float32(Some(0.0)),
LogicalType::Double => DataValue::Float64(Some(0.0)),
- LogicalType::Char(len) => DataValue::Utf8 {
- value: Some(String::new()),
- ty: Utf8Type::Fixed(*len),
+ LogicalType::Char(len, unit) => {
+ DataValue::Utf8 {
+ value: Some(String::new()),
+ ty: Utf8Type::Fixed(*len),
+ unit: *unit,
+ }
},
- LogicalType::Varchar(_) => DataValue::Utf8 {
+ LogicalType::Varchar(len, unit) => DataValue::Utf8 {
value: Some(String::new()),
- ty: Utf8Type::Variable,
+ ty: Utf8Type::Variable(*len),
+ unit: *unit,
},
LogicalType::Date => DataValue::Date32(Some(UNIX_DATETIME.num_days_from_ce())),
LogicalType::DateTime => DataValue::Date64(Some(UNIX_DATETIME.timestamp())),
@@ -511,10 +549,10 @@ impl DataValue {
return Ok(bytes.write_fixedint(*v)?);
}
}
- DataValue::Utf8 { value: v, ty } => {
+ DataValue::Utf8 { value: v, ty, unit } => {
if let Some(v) = v {
match ty {
- Utf8Type::Variable => {
+ Utf8Type::Variable(_) => {
let string_bytes = v.as_bytes();
let len = string_bytes.len();
@@ -522,12 +560,30 @@ impl DataValue {
return Ok(len);
}
Utf8Type::Fixed(len) => {
- let mut string_bytes =
- format!("{:len$}", v, len = *len as usize).into_bytes();
- let len = string_bytes.len();
-
- bytes.append(&mut string_bytes);
- return Ok(len);
+ match unit {
+ CharLengthUnits::Characters => {
+ let chars_len = *len as usize;
+ let mut string_bytes =
+ format!("{:len$}", v, len = chars_len).into_bytes();
+ let octets_len = string_bytes.len();
+
+ bytes.append(&mut string_bytes);
+ return Ok(octets_len);
+ }
+ CharLengthUnits::Octets => {
+ let octets_len = *len as usize;
+ let mut string = v.clone();
+
+ for _ in 0..(octets_len - string.len()) {
+ string.push(' ')
+ }
+ let mut string_bytes = string.into_bytes();
+
+ assert_eq!(octets_len, string_bytes.len());
+ bytes.append(&mut string_bytes);
+ return Ok(octets_len);
+ }
+ }
}
}
}
@@ -597,7 +653,7 @@ impl DataValue {
buf.copy_from_slice(bytes);
f64::from_ne_bytes(buf)
})),
- LogicalType::Char(len) => {
+ LogicalType::Char(len, unit) => {
// https://dev.mysql.com/doc/refman/8.0/en/char.html#:~:text=If%20a%20given%20value%20is%20stored%20into%20the%20CHAR(4)%20and%20VARCHAR(4)%20columns%2C%20the%20values%20retrieved%20from%20the%20columns%20are%20not%20always%20the%20same%20because%20trailing%20spaces%20are%20removed%20from%20CHAR%20columns%20upon%20retrieval.%20The%20following%20example%20illustrates%20this%20difference%3A
let value = (!bytes.is_empty()).then(|| {
let last_non_zero_index = match bytes.iter().rposition(|&x| x != b' ') {
@@ -609,14 +665,16 @@ impl DataValue {
DataValue::Utf8 {
value,
ty: Utf8Type::Fixed(*len),
+ unit: *unit,
}
}
- LogicalType::Varchar(_) => {
+ LogicalType::Varchar(len, unit) => {
let value =
(!bytes.is_empty()).then(|| String::from_utf8(bytes.to_owned()).unwrap());
DataValue::Utf8 {
value,
- ty: Utf8Type::Variable,
+ ty: Utf8Type::Variable(*len),
+ unit: *unit,
}
}
LogicalType::Date => {
@@ -651,13 +709,19 @@ impl DataValue {
DataValue::UInt32(_) => LogicalType::UInteger,
DataValue::UInt64(_) => LogicalType::UBigint,
DataValue::Utf8 {
- ty: Utf8Type::Variable,
+ ty: Utf8Type::Variable(len),
+ unit,
..
- } => LogicalType::Varchar(None),
+ } => {
+ LogicalType::Varchar(*len, *unit)
+ },
DataValue::Utf8 {
ty: Utf8Type::Fixed(len),
+ unit,
..
- } => LogicalType::Char(*len),
+ } => {
+ LogicalType::Char(*len, *unit)
+ },
DataValue::Date32(_) => LogicalType::Date,
DataValue::Date64(_) => LogicalType::DateTime,
DataValue::Time(_) => LogicalType::Time,
@@ -796,13 +860,17 @@ impl DataValue {
LogicalType::UBigint => Ok(DataValue::UInt64(None)),
LogicalType::Float => Ok(DataValue::Float32(None)),
LogicalType::Double => Ok(DataValue::Float64(None)),
- LogicalType::Char(len) => Ok(DataValue::Utf8 {
- value: None,
- ty: Utf8Type::Fixed(*len),
- }),
- LogicalType::Varchar(_) => Ok(DataValue::Utf8 {
+ LogicalType::Char(len, unit) => {
+ Ok(DataValue::Utf8 {
+ value: None,
+ ty: Utf8Type::Fixed(*len),
+ unit: *unit,
+ })
+ },
+ LogicalType::Varchar(len, unit) => Ok(DataValue::Utf8 {
value: None,
- ty: Utf8Type::Variable,
+ ty: Utf8Type::Variable(*len),
+ unit: *unit,
}),
LogicalType::Date => Ok(DataValue::Date32(None)),
LogicalType::DateTime => Ok(DataValue::Date64(None)),
@@ -823,16 +891,16 @@ impl DataValue {
LogicalType::UBigint => Ok(DataValue::UInt64(value.map(|v| v.into()))),
LogicalType::Float => Ok(DataValue::Float32(value.map(|v| v.into()))),
LogicalType::Double => Ok(DataValue::Float64(value.map(|v| v.into()))),
- LogicalType::Char(len) => varchar_cast!(value, Some(len), Utf8Type::Fixed(*len)),
- LogicalType::Varchar(len) => varchar_cast!(value, len, Utf8Type::Variable),
+ LogicalType::Char(len, unit) => varchar_cast!(value, Some(len), Utf8Type::Fixed(*len), *unit),
+ LogicalType::Varchar(len, unit) => varchar_cast!(value, len, Utf8Type::Variable(*len), *unit),
_ => Err(DatabaseError::CastFail),
},
DataValue::Float32(value) => match to {
LogicalType::SqlNull => Ok(DataValue::Null),
LogicalType::Float => Ok(DataValue::Float32(value)),
LogicalType::Double => Ok(DataValue::Float64(value.map(|v| v.into()))),
- LogicalType::Char(len) => varchar_cast!(value, Some(len), Utf8Type::Fixed(*len)),
- LogicalType::Varchar(len) => varchar_cast!(value, len, Utf8Type::Variable),
+ LogicalType::Char(len, unit) => varchar_cast!(value, Some(len), Utf8Type::Fixed(*len), *unit),
+ LogicalType::Varchar(len, unit) => varchar_cast!(value, len, Utf8Type::Variable(*len), *unit),
LogicalType::Decimal(_, option) => Ok(DataValue::Decimal(
value
.map(|v| {
@@ -850,8 +918,8 @@ impl DataValue {
LogicalType::SqlNull => Ok(DataValue::Null),
LogicalType::Float => Ok(DataValue::Float32(value.map(|v| v as f32))),
LogicalType::Double => Ok(DataValue::Float64(value)),
- LogicalType::Char(len) => varchar_cast!(value, Some(len), Utf8Type::Fixed(*len)),
- LogicalType::Varchar(len) => varchar_cast!(value, len, Utf8Type::Variable),
+ LogicalType::Char(len, unit) => varchar_cast!(value, Some(len), Utf8Type::Fixed(*len), *unit),
+ LogicalType::Varchar(len, unit) => varchar_cast!(value, len, Utf8Type::Variable(*len), *unit),
LogicalType::Decimal(_, option) => Ok(DataValue::Decimal(
value
.map(|v| {
@@ -883,8 +951,8 @@ impl DataValue {
LogicalType::Bigint => Ok(DataValue::Int64(value.map(|v| v.into()))),
LogicalType::Float => Ok(DataValue::Float32(value.map(|v| v.into()))),
LogicalType::Double => Ok(DataValue::Float64(value.map(|v| v.into()))),
- LogicalType::Char(len) => varchar_cast!(value, Some(len), Utf8Type::Fixed(*len)),
- LogicalType::Varchar(len) => varchar_cast!(value, len, Utf8Type::Variable),
+ LogicalType::Char(len, unit) => varchar_cast!(value, Some(len), Utf8Type::Fixed(*len), *unit),
+ LogicalType::Varchar(len, unit) => varchar_cast!(value, len, Utf8Type::Variable(*len), *unit),
LogicalType::Decimal(_, option) => Ok(DataValue::Decimal(value.map(|v| {
let mut decimal = Decimal::from(v);
Self::decimal_round_i(option, &mut decimal);
@@ -911,8 +979,8 @@ impl DataValue {
LogicalType::Bigint => Ok(DataValue::Int64(value.map(|v| v.into()))),
LogicalType::Float => Ok(DataValue::Float32(value.map(|v| v.into()))),
LogicalType::Double => Ok(DataValue::Float64(value.map(|v| v.into()))),
- LogicalType::Char(len) => varchar_cast!(value, Some(len), Utf8Type::Fixed(*len)),
- LogicalType::Varchar(len) => varchar_cast!(value, len, Utf8Type::Variable),
+ LogicalType::Char(len, unit) => varchar_cast!(value, Some(len), Utf8Type::Fixed(*len), *unit),
+ LogicalType::Varchar(len, unit) => varchar_cast!(value, len, Utf8Type::Variable(*len), *unit),
LogicalType::Decimal(_, option) => Ok(DataValue::Decimal(value.map(|v| {
let mut decimal = Decimal::from(v);
Self::decimal_round_i(option, &mut decimal);
@@ -938,8 +1006,8 @@ impl DataValue {
LogicalType::Bigint => Ok(DataValue::Int64(value.map(|v| v.into()))),
LogicalType::Float => Ok(DataValue::Float32(value.map(|v| v as f32))),
LogicalType::Double => Ok(DataValue::Float64(value.map(|v| v.into()))),
- LogicalType::Char(len) => varchar_cast!(value, Some(len), Utf8Type::Fixed(*len)),
- LogicalType::Varchar(len) => varchar_cast!(value, len, Utf8Type::Variable),
+ LogicalType::Char(len, unit) => varchar_cast!(value, Some(len), Utf8Type::Fixed(*len), *unit),
+ LogicalType::Varchar(len, unit) => varchar_cast!(value, len, Utf8Type::Variable(*len), *unit),
LogicalType::Decimal(_, option) => Ok(DataValue::Decimal(value.map(|v| {
let mut decimal = Decimal::from(v);
Self::decimal_round_i(option, &mut decimal);
@@ -964,8 +1032,8 @@ impl DataValue {
LogicalType::Bigint => Ok(DataValue::Int64(value)),
LogicalType::Float => Ok(DataValue::Float32(value.map(|v| v as f32))),
LogicalType::Double => Ok(DataValue::Float64(value.map(|v| v as f64))),
- LogicalType::Char(len) => varchar_cast!(value, Some(len), Utf8Type::Fixed(*len)),
- LogicalType::Varchar(len) => varchar_cast!(value, len, Utf8Type::Variable),
+ LogicalType::Char(len, unit) => varchar_cast!(value, Some(len), Utf8Type::Fixed(*len), *unit),
+ LogicalType::Varchar(len, unit) => varchar_cast!(value, len, Utf8Type::Variable(*len), *unit),
LogicalType::Decimal(_, option) => Ok(DataValue::Decimal(value.map(|v| {
let mut decimal = Decimal::from(v);
Self::decimal_round_i(option, &mut decimal);
@@ -986,8 +1054,8 @@ impl DataValue {
LogicalType::UBigint => Ok(DataValue::UInt64(value.map(|v| v.into()))),
LogicalType::Float => Ok(DataValue::Float32(value.map(|v| v.into()))),
LogicalType::Double => Ok(DataValue::Float64(value.map(|v| v.into()))),
- LogicalType::Char(len) => varchar_cast!(value, Some(len), Utf8Type::Fixed(*len)),
- LogicalType::Varchar(len) => varchar_cast!(value, len, Utf8Type::Variable),
+ LogicalType::Char(len, unit) => varchar_cast!(value, Some(len), Utf8Type::Fixed(*len), *unit),
+ LogicalType::Varchar(len, unit) => varchar_cast!(value, len, Utf8Type::Variable(*len), *unit),
LogicalType::Decimal(_, option) => Ok(DataValue::Decimal(value.map(|v| {
let mut decimal = Decimal::from(v);
Self::decimal_round_i(option, &mut decimal);
@@ -1006,8 +1074,8 @@ impl DataValue {
LogicalType::UBigint => Ok(DataValue::UInt64(value.map(|v| v.into()))),
LogicalType::Float => Ok(DataValue::Float32(value.map(|v| v.into()))),
LogicalType::Double => Ok(DataValue::Float64(value.map(|v| v.into()))),
- LogicalType::Char(len) => varchar_cast!(value, Some(len), Utf8Type::Fixed(*len)),
- LogicalType::Varchar(len) => varchar_cast!(value, len, Utf8Type::Variable),
+ LogicalType::Char(len, unit) => varchar_cast!(value, Some(len), Utf8Type::Fixed(*len), *unit),
+ LogicalType::Varchar(len, unit) => varchar_cast!(value, len, Utf8Type::Variable(*len), *unit),
LogicalType::Decimal(_, option) => Ok(DataValue::Decimal(value.map(|v| {
let mut decimal = Decimal::from(v);
Self::decimal_round_i(option, &mut decimal);
@@ -1024,8 +1092,8 @@ impl DataValue {
LogicalType::UBigint => Ok(DataValue::UInt64(value.map(|v| v.into()))),
LogicalType::Float => Ok(DataValue::Float32(value.map(|v| v as f32))),
LogicalType::Double => Ok(DataValue::Float64(value.map(|v| v.into()))),
- LogicalType::Char(len) => varchar_cast!(value, Some(len), Utf8Type::Fixed(*len)),
- LogicalType::Varchar(len) => varchar_cast!(value, len, Utf8Type::Variable),
+ LogicalType::Char(len, unit) => varchar_cast!(value, Some(len), Utf8Type::Fixed(*len), *unit),
+ LogicalType::Varchar(len, unit) => varchar_cast!(value, len, Utf8Type::Variable(*len), *unit),
LogicalType::Decimal(_, option) => Ok(DataValue::Decimal(value.map(|v| {
let mut decimal = Decimal::from(v);
Self::decimal_round_i(option, &mut decimal);
@@ -1040,8 +1108,8 @@ impl DataValue {
LogicalType::UBigint => Ok(DataValue::UInt64(value)),
LogicalType::Float => Ok(DataValue::Float32(value.map(|v| v as f32))),
LogicalType::Double => Ok(DataValue::Float64(value.map(|v| v as f64))),
- LogicalType::Char(len) => varchar_cast!(value, Some(len), Utf8Type::Fixed(*len)),
- LogicalType::Varchar(len) => varchar_cast!(value, len, Utf8Type::Variable),
+ LogicalType::Char(len, unit) => varchar_cast!(value, Some(len), Utf8Type::Fixed(*len), *unit),
+ LogicalType::Varchar(len, unit) => varchar_cast!(value, len, Utf8Type::Variable(*len), *unit),
LogicalType::Decimal(_, option) => Ok(DataValue::Decimal(value.map(|v| {
let mut decimal = Decimal::from(v);
Self::decimal_round_i(option, &mut decimal);
@@ -1087,8 +1155,8 @@ impl DataValue {
LogicalType::Double => Ok(DataValue::Float64(
value.map(|v| f64::from_str(&v)).transpose()?,
)),
- LogicalType::Char(len) => varchar_cast!(value, Some(len), Utf8Type::Fixed(*len)),
- LogicalType::Varchar(len) => varchar_cast!(value, len, Utf8Type::Variable),
+ LogicalType::Char(len, unit) => varchar_cast!(value, Some(len), Utf8Type::Fixed(*len), *unit),
+ LogicalType::Varchar(len, unit) => varchar_cast!(value, len, Utf8Type::Variable(*len), *unit),
LogicalType::Date => {
let option = value
.map(|v| {
@@ -1130,11 +1198,11 @@ impl DataValue {
},
DataValue::Date32(value) => match to {
LogicalType::SqlNull => Ok(DataValue::Null),
- LogicalType::Char(len) => {
- varchar_cast!(Self::format_date(value), Some(len), Utf8Type::Fixed(*len))
+ LogicalType::Char(len, unit) => {
+ varchar_cast!(Self::format_date(value), Some(len), Utf8Type::Fixed(*len), *unit)
}
- LogicalType::Varchar(len) => {
- varchar_cast!(Self::format_date(value), len, Utf8Type::Variable)
+ LogicalType::Varchar(len, unit) => {
+ varchar_cast!(Self::format_date(value), len, Utf8Type::Variable(*len), *unit)
}
LogicalType::Date => Ok(DataValue::Date32(value)),
LogicalType::DateTime => {
@@ -1150,15 +1218,16 @@ impl DataValue {
},
DataValue::Date64(value) => match to {
LogicalType::SqlNull => Ok(DataValue::Null),
- LogicalType::Char(len) => {
+ LogicalType::Char(len, unit) => {
varchar_cast!(
Self::format_datetime(value),
Some(len),
- Utf8Type::Fixed(*len)
+ Utf8Type::Fixed(*len),
+ *unit
)
}
- LogicalType::Varchar(len) => {
- varchar_cast!(Self::format_datetime(value), len, Utf8Type::Variable)
+ LogicalType::Varchar(len, unit) => {
+ varchar_cast!(Self::format_datetime(value), len, Utf8Type::Variable(*len), *unit)
}
LogicalType::Date => {
let option = value.and_then(|v| {
@@ -1181,11 +1250,11 @@ impl DataValue {
},
DataValue::Time(value) => match to {
LogicalType::SqlNull => Ok(DataValue::Null),
- LogicalType::Char(len) => {
- varchar_cast!(Self::format_time(value), Some(len), Utf8Type::Fixed(*len))
+ LogicalType::Char(len, unit) => {
+ varchar_cast!(Self::format_time(value), Some(len), Utf8Type::Fixed(*len), *unit)
}
- LogicalType::Varchar(len) => {
- varchar_cast!(Self::format_time(value), len, Utf8Type::Variable)
+ LogicalType::Varchar(len, unit) => {
+ varchar_cast!(Self::format_time(value), len, Utf8Type::Variable(*len), *unit)
}
_ => Err(DatabaseError::CastFail),
},
@@ -1194,8 +1263,8 @@ impl DataValue {
LogicalType::Float => Ok(DataValue::Float32(value.and_then(|v| v.to_f32()))),
LogicalType::Double => Ok(DataValue::Float64(value.and_then(|v| v.to_f64()))),
LogicalType::Decimal(_, _) => Ok(DataValue::Decimal(value)),
- LogicalType::Char(len) => varchar_cast!(value, Some(len), Utf8Type::Fixed(*len)),
- LogicalType::Varchar(len) => varchar_cast!(value, len, Utf8Type::Variable),
+ LogicalType::Char(len, unit) => varchar_cast!(value, Some(len), Utf8Type::Fixed(*len), *unit),
+ LogicalType::Varchar(len, unit) => varchar_cast!(value, len, Utf8Type::Variable(*len), *unit),
_ => Err(DatabaseError::CastFail),
},
DataValue::Tuple(values) => match to {
@@ -1306,7 +1375,8 @@ impl From for DataValue {
fn from(value: String) -> Self {
DataValue::Utf8 {
value: Some(value),
- ty: Utf8Type::Variable,
+ ty: Utf8Type::Variable(None),
+ unit: CharLengthUnits::Characters,
}
}
}
@@ -1315,7 +1385,8 @@ impl From
From 706a92aa7f629b4fbf853839b1c4a8eb331235a0 Mon Sep 17 00:00:00 2001
From: Kould <2435992353@qq.com>
Date: Thu, 28 Mar 2024 02:16:33 +0800
Subject: [PATCH 4/5] docs: change icons place
---
README.md | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/README.md b/README.md
index 2f2cc062..e97a946a 100755
--- a/README.md
+++ b/README.md
@@ -17,6 +17,7 @@ Built by @KipData
+
@@ -24,7 +25,6 @@ Built by @KipData
-
From 317d76443053b6c227408b7befff2be2ce53c6f8 Mon Sep 17 00:00:00 2001
From: Kould <2435992353@qq.com>
Date: Thu, 28 Mar 2024 15:55:30 +0800
Subject: [PATCH 5/5] style: while push -> resize on `src/types/values.rs`
---
src/types/value.rs | 8 ++------
1 file changed, 2 insertions(+), 6 deletions(-)
diff --git a/src/types/value.rs b/src/types/value.rs
index 1bc1f272..0768b46c 100644
--- a/src/types/value.rs
+++ b/src/types/value.rs
@@ -567,13 +567,9 @@ impl DataValue {
}
CharLengthUnits::Octets => {
let octets_len = *len as usize;
- let mut string = v.clone();
-
- for _ in 0..(octets_len - string.len()) {
- string.push(' ')
- }
- let mut string_bytes = string.into_bytes();
+ let mut string_bytes = v.clone().into_bytes();
+ string_bytes.resize(octets_len, b' ');
assert_eq!(octets_len, string_bytes.len());
bytes.append(&mut string_bytes);
return Ok(octets_len);