From 126c6c86b7548cfc539616858e51b613a279b2e2 Mon Sep 17 00:00:00 2001
From: Kould <2435992353@qq.com>
Date: Thu, 28 Mar 2024 17:12:23 +0800
Subject: [PATCH] feat: support `Octets` for `Char/Varchar` (#184)
* feat: support `Octets` for `Char/Varchar`
* style: code fmt & fix `Char/Varchar` on Server
* docs: add ospp icon
* docs: change icons place
* style: while push -> resize on `src/types/values.rs`
---
Cargo.toml | 2 +-
README.md | 1 +
src/bin/server.rs | 6 +-
src/binder/create_table.rs | 8 +-
src/binder/expr.rs | 12 +-
src/catalog/column.rs | 8 +-
src/execution/volcano/dml/analyze.rs | 4 +-
src/execution/volcano/dml/copy_from_file.rs | 8 +-
src/execution/volcano/dql/describe.rs | 25 +-
src/execution/volcano/dql/explain.rs | 4 +-
src/execution/volcano/dql/show_table.rs | 4 +-
src/expression/evaluator.rs | 17 +-
src/expression/mod.rs | 8 +-
src/expression/value_compute.rs | 84 ++++--
src/marcos/mod.rs | 14 +-
src/optimizer/core/histogram.rs | 2 +-
.../rule/normalization/column_pruning.rs | 4 +-
src/storage/table_codec.rs | 2 +-
src/types/mod.rs | 77 ++---
src/types/tuple.rs | 67 ++++-
src/types/tuple_builder.rs | 7 +-
src/types/value.rs | 276 +++++++++++++-----
tests/slt/char.slt | 47 +++
tests/slt/sql_2016/E021_01.slt | 10 +-
tests/slt/sql_2016/E021_02.slt | 15 +-
25 files changed, 517 insertions(+), 195 deletions(-)
create mode 100644 tests/slt/char.slt
diff --git a/Cargo.toml b/Cargo.toml
index 6d8125f0..5f854b1c 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -58,7 +58,7 @@ regex = { version = "1.10.3" }
rust_decimal = { version = "1.34.3" }
serde = { version = "1.0.197", features = ["derive", "rc"] }
siphasher = { version = "1.0.0", features = ["serde"] }
-sqlparser = { version = "0.34.0" }
+sqlparser = { version = "0.34.0", features = ["serde"] }
strum_macros = { version = "0.26.2" }
thiserror = { version = "1.0.58" }
tokio = { version = "1.36.0", features = ["full"] }
diff --git a/README.md b/README.md
index 1a03ef85..e97a946a 100755
--- a/README.md
+++ b/README.md
@@ -17,6 +17,7 @@ Built by @KipData
+
diff --git a/src/bin/server.rs b/src/bin/server.rs
index 70747931..4c8450b8 100644
--- a/src/bin/server.rs
+++ b/src/bin/server.rs
@@ -198,7 +198,7 @@ fn encode_tuples<'a>(schema: &Schema, tuples: Vec) -> PgWireResult encoder.encode_field(&value.u64().map(|v| v as i64)),
LogicalType::Float => encoder.encode_field(&value.float()),
LogicalType::Double => encoder.encode_field(&value.double()),
- LogicalType::Char(_) | LogicalType::Varchar(_) => {
+ LogicalType::Char(..) | LogicalType::Varchar(..) => {
encoder.encode_field(&value.utf8())
}
LogicalType::Date => encoder.encode_field(&value.date()),
@@ -225,9 +225,9 @@ fn into_pg_type(data_type: &LogicalType) -> PgWireResult {
LogicalType::Bigint | LogicalType::UBigint => Type::INT8,
LogicalType::Float => Type::FLOAT4,
LogicalType::Double => Type::FLOAT8,
- LogicalType::Varchar(_) => Type::VARCHAR,
+ LogicalType::Varchar(..) => Type::VARCHAR,
LogicalType::Date | LogicalType::DateTime => Type::DATE,
- LogicalType::Char(_) => Type::CHAR,
+ LogicalType::Char(..) => Type::CHAR,
LogicalType::Time => Type::TIME,
LogicalType::Decimal(_, _) => todo!(),
_ => {
diff --git a/src/binder/create_table.rs b/src/binder/create_table.rs
index 70717fcc..9af59f0f 100644
--- a/src/binder/create_table.rs
+++ b/src/binder/create_table.rs
@@ -146,6 +146,7 @@ mod tests {
use crate::storage::kip::KipStorage;
use crate::storage::Storage;
use crate::types::LogicalType;
+ use sqlparser::ast::CharLengthUnits;
use std::sync::atomic::AtomicUsize;
use tempfile::TempDir;
@@ -177,7 +178,12 @@ mod tests {
assert_eq!(op.columns[1].nullable, true);
assert_eq!(
op.columns[1].desc,
- ColumnDesc::new(LogicalType::Varchar(Some(10)), false, false, None)
+ ColumnDesc::new(
+ LogicalType::Varchar(Some(10), CharLengthUnits::Characters),
+ false,
+ false,
+ None
+ )
);
}
_ => unreachable!(),
diff --git a/src/binder/expr.rs b/src/binder/expr.rs
index 3e8e0275..71228dc3 100644
--- a/src/binder/expr.rs
+++ b/src/binder/expr.rs
@@ -4,8 +4,8 @@ use crate::expression;
use crate::expression::agg::AggKind;
use itertools::Itertools;
use sqlparser::ast::{
- BinaryOperator, DataType, Expr, Function, FunctionArg, FunctionArgExpr, Ident, Query,
- UnaryOperator,
+ BinaryOperator, CharLengthUnits, DataType, Expr, Function, FunctionArg, FunctionArgExpr, Ident,
+ Query, UnaryOperator,
};
use std::slice;
use std::sync::Arc;
@@ -69,7 +69,8 @@ impl<'a, T: Transaction> Binder<'a, T> {
let logical_type = LogicalType::try_from(data_type.clone())?;
let value = DataValue::Utf8 {
value: Some(value.to_string()),
- ty: Utf8Type::Variable,
+ ty: Utf8Type::Variable(None),
+ unit: CharLengthUnits::Characters,
}
.cast(&logical_type)?;
@@ -354,7 +355,7 @@ impl<'a, T: Transaction> Binder<'a, T> {
| BinaryOperator::And
| BinaryOperator::Or
| BinaryOperator::Xor => LogicalType::Boolean,
- BinaryOperator::StringConcat => LogicalType::Varchar(None),
+ BinaryOperator::StringConcat => LogicalType::Varchar(None, CharLengthUnits::Characters),
_ => todo!(),
};
@@ -603,7 +604,8 @@ impl<'a, T: Transaction> Binder<'a, T> {
fn wildcard_expr() -> ScalarExpression {
ScalarExpression::Constant(Arc::new(DataValue::Utf8 {
value: Some("*".to_string()),
- ty: Utf8Type::Variable,
+ ty: Utf8Type::Variable(None),
+ unit: CharLengthUnits::Characters,
}))
}
}
diff --git a/src/catalog/column.rs b/src/catalog/column.rs
index 32d7653f..258536fd 100644
--- a/src/catalog/column.rs
+++ b/src/catalog/column.rs
@@ -2,6 +2,7 @@ use crate::catalog::TableName;
use crate::errors::DatabaseError;
use crate::expression::ScalarExpression;
use serde::{Deserialize, Serialize};
+use sqlparser::ast::CharLengthUnits;
use std::hash::Hash;
use std::sync::Arc;
@@ -50,7 +51,12 @@ impl ColumnCatalog {
table_name: None,
},
nullable: true,
- desc: ColumnDesc::new(LogicalType::Varchar(None), false, false, None),
+ desc: ColumnDesc::new(
+ LogicalType::Varchar(None, CharLengthUnits::Characters),
+ false,
+ false,
+ None,
+ ),
}
}
diff --git a/src/execution/volcano/dml/analyze.rs b/src/execution/volcano/dml/analyze.rs
index ae8b6a8f..8e1a6178 100644
--- a/src/execution/volcano/dml/analyze.rs
+++ b/src/execution/volcano/dml/analyze.rs
@@ -12,6 +12,7 @@ use crate::types::tuple::Tuple;
use crate::types::value::{DataValue, Utf8Type};
use futures_async_stream::try_stream;
use itertools::Itertools;
+use sqlparser::ast::CharLengthUnits;
use std::fmt::Formatter;
use std::sync::Arc;
use std::time::{SystemTime, UNIX_EPOCH};
@@ -108,7 +109,8 @@ impl Analyze {
meta.to_file(&path)?;
values.push(Arc::new(DataValue::Utf8 {
value: Some(path.clone()),
- ty: Utf8Type::Variable,
+ ty: Utf8Type::Variable(None),
+ unit: CharLengthUnits::Characters,
}));
transaction.save_table_meta(&table_name, path, meta)?;
}
diff --git a/src/execution/volcano/dml/copy_from_file.rs b/src/execution/volcano/dml/copy_from_file.rs
index 935ebc46..3a8e7d4e 100644
--- a/src/execution/volcano/dml/copy_from_file.rs
+++ b/src/execution/volcano/dml/copy_from_file.rs
@@ -105,6 +105,7 @@ mod tests {
use crate::catalog::{ColumnCatalog, ColumnDesc, ColumnSummary};
use crate::db::DataBaseBuilder;
use futures::StreamExt;
+ use sqlparser::ast::CharLengthUnits;
use std::io::Write;
use std::sync::Arc;
use tempfile::TempDir;
@@ -148,7 +149,12 @@ mod tests {
table_name: None,
},
nullable: false,
- desc: ColumnDesc::new(LogicalType::Varchar(Some(10)), false, false, None),
+ desc: ColumnDesc::new(
+ LogicalType::Varchar(Some(10), CharLengthUnits::Characters),
+ false,
+ false,
+ None,
+ ),
}),
];
diff --git a/src/execution/volcano/dql/describe.rs b/src/execution/volcano/dql/describe.rs
index fb27ce64..814a7145 100644
--- a/src/execution/volcano/dql/describe.rs
+++ b/src/execution/volcano/dql/describe.rs
@@ -7,20 +7,24 @@ use crate::types::tuple::Tuple;
use crate::types::value::{DataValue, Utf8Type, ValueRef};
use futures_async_stream::try_stream;
use lazy_static::lazy_static;
+use sqlparser::ast::CharLengthUnits;
use std::sync::Arc;
lazy_static! {
static ref PRIMARY_KEY_TYPE: ValueRef = Arc::new(DataValue::Utf8 {
value: Some(String::from("PRIMARY")),
- ty: Utf8Type::Variable
+ ty: Utf8Type::Variable(None),
+ unit: CharLengthUnits::Characters
});
static ref UNIQUE_KEY_TYPE: ValueRef = Arc::new(DataValue::Utf8 {
value: Some(String::from("UNIQUE")),
- ty: Utf8Type::Variable
+ ty: Utf8Type::Variable(None),
+ unit: CharLengthUnits::Characters
});
static ref EMPTY_KEY_TYPE: ValueRef = Arc::new(DataValue::Utf8 {
value: Some(String::from("EMPTY")),
- ty: Utf8Type::Variable
+ ty: Utf8Type::Variable(None),
+ unit: CharLengthUnits::Characters
});
}
@@ -69,24 +73,29 @@ impl Describe {
let values = vec![
Arc::new(DataValue::Utf8 {
value: Some(column.name().to_string()),
- ty: Utf8Type::Variable,
+ ty: Utf8Type::Variable(None),
+ unit: CharLengthUnits::Characters,
}),
Arc::new(DataValue::Utf8 {
value: Some(datatype.to_string()),
- ty: Utf8Type::Variable,
+ ty: Utf8Type::Variable(None),
+ unit: CharLengthUnits::Characters,
}),
Arc::new(DataValue::Utf8 {
value: datatype.raw_len().map(|len| len.to_string()),
- ty: Utf8Type::Variable,
+ ty: Utf8Type::Variable(None),
+ unit: CharLengthUnits::Characters,
}),
Arc::new(DataValue::Utf8 {
value: Some(column.nullable.to_string()),
- ty: Utf8Type::Variable,
+ ty: Utf8Type::Variable(None),
+ unit: CharLengthUnits::Characters,
}),
key_fn(column),
Arc::new(DataValue::Utf8 {
value: Some(default),
- ty: Utf8Type::Variable,
+ ty: Utf8Type::Variable(None),
+ unit: CharLengthUnits::Characters,
}),
];
yield Tuple { id: None, values };
diff --git a/src/execution/volcano/dql/explain.rs b/src/execution/volcano/dql/explain.rs
index f1dbb3ca..162657ef 100644
--- a/src/execution/volcano/dql/explain.rs
+++ b/src/execution/volcano/dql/explain.rs
@@ -5,6 +5,7 @@ use crate::storage::Transaction;
use crate::types::tuple::Tuple;
use crate::types::value::{DataValue, Utf8Type};
use futures_async_stream::try_stream;
+use sqlparser::ast::CharLengthUnits;
use std::sync::Arc;
pub struct Explain {
@@ -28,7 +29,8 @@ impl Explain {
pub async fn _execute(self) {
let values = vec![Arc::new(DataValue::Utf8 {
value: Some(self.plan.explain(0)),
- ty: Utf8Type::Variable,
+ ty: Utf8Type::Variable(None),
+ unit: CharLengthUnits::Characters,
})];
yield Tuple { id: None, values };
diff --git a/src/execution/volcano/dql/show_table.rs b/src/execution/volcano/dql/show_table.rs
index f64b6dd1..b9862f08 100644
--- a/src/execution/volcano/dql/show_table.rs
+++ b/src/execution/volcano/dql/show_table.rs
@@ -5,6 +5,7 @@ use crate::storage::Transaction;
use crate::types::tuple::Tuple;
use crate::types::value::{DataValue, Utf8Type};
use futures_async_stream::try_stream;
+use sqlparser::ast::CharLengthUnits;
use std::sync::Arc;
pub struct ShowTables;
@@ -23,7 +24,8 @@ impl ShowTables {
for TableMeta { table_name } in metas {
let values = vec![Arc::new(DataValue::Utf8 {
value: Some(table_name.to_string()),
- ty: Utf8Type::Variable,
+ ty: Utf8Type::Variable(None),
+ unit: CharLengthUnits::Characters,
})];
yield Tuple { id: None, values };
diff --git a/src/expression/evaluator.rs b/src/expression/evaluator.rs
index f97a1d4c..6eea7aee 100644
--- a/src/expression/evaluator.rs
+++ b/src/expression/evaluator.rs
@@ -7,6 +7,7 @@ use crate::types::value::{DataValue, Utf8Type, ValueRef};
use crate::types::LogicalType;
use itertools::Itertools;
use lazy_static::lazy_static;
+use sqlparser::ast::CharLengthUnits;
use std::cmp;
use std::cmp::Ordering;
use std::sync::Arc;
@@ -25,7 +26,8 @@ macro_rules! eval_to_num {
} else {
return Ok(Arc::new(DataValue::Utf8 {
value: None,
- ty: Utf8Type::Variable,
+ ty: Utf8Type::Variable(None),
+ unit: CharLengthUnits::Characters,
}));
}
};
@@ -156,7 +158,7 @@ impl ScalarExpression {
from_expr,
} => {
if let Some(mut string) = DataValue::clone(expr.eval(tuple, schema)?.as_ref())
- .cast(&LogicalType::Varchar(None))?
+ .cast(&LogicalType::Varchar(None, CharLengthUnits::Characters))?
.utf8()
{
if let Some(from_expr) = from_expr {
@@ -169,7 +171,8 @@ impl ScalarExpression {
if from > len_i {
return Ok(Arc::new(DataValue::Utf8 {
value: None,
- ty: Utf8Type::Variable,
+ ty: Utf8Type::Variable(None),
+ unit: CharLengthUnits::Characters,
}));
}
string = string.split_off(from as usize);
@@ -182,19 +185,21 @@ impl ScalarExpression {
Ok(Arc::new(DataValue::Utf8 {
value: Some(string),
- ty: Utf8Type::Variable,
+ ty: Utf8Type::Variable(None),
+ unit: CharLengthUnits::Characters,
}))
} else {
Ok(Arc::new(DataValue::Utf8 {
value: None,
- ty: Utf8Type::Variable,
+ ty: Utf8Type::Variable(None),
+ unit: CharLengthUnits::Characters,
}))
}
}
ScalarExpression::Position { expr, in_expr } => {
let unpack = |expr: &ScalarExpression| -> Result {
Ok(DataValue::clone(expr.eval(tuple, schema)?.as_ref())
- .cast(&LogicalType::Varchar(None))?
+ .cast(&LogicalType::Varchar(None, CharLengthUnits::Characters))?
.utf8()
.unwrap_or("".to_owned()))
};
diff --git a/src/expression/mod.rs b/src/expression/mod.rs
index ab97c43c..3a7dc0c9 100644
--- a/src/expression/mod.rs
+++ b/src/expression/mod.rs
@@ -5,7 +5,9 @@ use std::hash::Hash;
use std::sync::Arc;
use std::{fmt, mem};
-use sqlparser::ast::{BinaryOperator as SqlBinaryOperator, UnaryOperator as SqlUnaryOperator};
+use sqlparser::ast::{
+ BinaryOperator as SqlBinaryOperator, CharLengthUnits, UnaryOperator as SqlUnaryOperator,
+};
use self::agg::AggKind;
use crate::catalog::{ColumnCatalog, ColumnDesc, ColumnRef};
@@ -394,7 +396,9 @@ impl ScalarExpression {
ScalarExpression::IsNull { .. }
| ScalarExpression::In { .. }
| ScalarExpression::Between { .. } => LogicalType::Boolean,
- ScalarExpression::SubString { .. } => LogicalType::Varchar(None),
+ ScalarExpression::SubString { .. } => {
+ LogicalType::Varchar(None, CharLengthUnits::Characters)
+ }
ScalarExpression::Position { .. } => LogicalType::Integer,
ScalarExpression::Alias { expr, .. } | ScalarExpression::Reference { expr, .. } => {
expr.return_type()
diff --git a/src/expression/value_compute.rs b/src/expression/value_compute.rs
index 3ceb7031..297f2905 100644
--- a/src/expression/value_compute.rs
+++ b/src/expression/value_compute.rs
@@ -3,6 +3,7 @@ use crate::expression::{BinaryOperator, UnaryOperator};
use crate::types::value::{DataValue, Utf8Type, ValueRef};
use crate::types::LogicalType;
use regex::Regex;
+use sqlparser::ast::CharLengthUnits;
use std::cmp::Ordering;
fn unpack_bool(value: DataValue) -> Option {
@@ -193,8 +194,15 @@ impl DataValue {
op: &BinaryOperator,
) -> Result {
if let BinaryOperator::Like(escape_char) | BinaryOperator::NotLike(escape_char) = op {
- let value_option = unpack_utf8(self.clone().cast(&LogicalType::Varchar(None))?);
- let pattern_option = unpack_utf8(right.clone().cast(&LogicalType::Varchar(None))?);
+ let value_option = unpack_utf8(
+ self.clone()
+ .cast(&LogicalType::Varchar(None, CharLengthUnits::Characters))?,
+ );
+ let pattern_option = unpack_utf8(
+ right
+ .clone()
+ .cast(&LogicalType::Varchar(None, CharLengthUnits::Characters))?,
+ );
let mut is_match = if let (Some(value), Some(pattern)) = (value_option, pattern_option)
{
@@ -511,7 +519,7 @@ impl DataValue {
_ => return Err(DatabaseError::UnsupportedBinaryOperator(unified_type, *op)),
}
}
- LogicalType::Varchar(_) | LogicalType::Char(_) => {
+ LogicalType::Varchar(_, _) | LogicalType::Char(_, _) => {
let left_value = unpack_utf8(self.clone().cast(&unified_type)?);
let right_value = unpack_utf8(right.clone().cast(&unified_type)?);
@@ -576,7 +584,8 @@ impl DataValue {
DataValue::Utf8 {
value,
- ty: Utf8Type::Variable,
+ ty: Utf8Type::Variable(None),
+ unit: CharLengthUnits::Characters,
}
}
_ => return Err(DatabaseError::UnsupportedBinaryOperator(unified_type, *op)),
@@ -652,6 +661,7 @@ mod test {
use crate::errors::DatabaseError;
use crate::expression::BinaryOperator;
use crate::types::value::{DataValue, Utf8Type};
+ use sqlparser::ast::CharLengthUnits;
#[test]
fn test_binary_op_arithmetic_plus() -> Result<(), DatabaseError> {
@@ -1544,11 +1554,13 @@ mod test {
DataValue::binary_op(
&DataValue::Utf8 {
value: Some("a".to_string()),
- ty: Utf8Type::Variable
+ ty: Utf8Type::Variable(None),
+ unit: CharLengthUnits::Characters,
},
&DataValue::Utf8 {
value: Some("b".to_string()),
- ty: Utf8Type::Variable
+ ty: Utf8Type::Variable(None),
+ unit: CharLengthUnits::Characters,
},
&BinaryOperator::Gt
)?,
@@ -1558,11 +1570,13 @@ mod test {
DataValue::binary_op(
&DataValue::Utf8 {
value: Some("a".to_string()),
- ty: Utf8Type::Variable
+ ty: Utf8Type::Variable(None),
+ unit: CharLengthUnits::Characters,
},
&DataValue::Utf8 {
value: Some("b".to_string()),
- ty: Utf8Type::Variable
+ ty: Utf8Type::Variable(None),
+ unit: CharLengthUnits::Characters,
},
&BinaryOperator::Lt
)?,
@@ -1572,11 +1586,13 @@ mod test {
DataValue::binary_op(
&DataValue::Utf8 {
value: Some("a".to_string()),
- ty: Utf8Type::Variable
+ ty: Utf8Type::Variable(None),
+ unit: CharLengthUnits::Characters,
},
&DataValue::Utf8 {
value: Some("a".to_string()),
- ty: Utf8Type::Variable
+ ty: Utf8Type::Variable(None),
+ unit: CharLengthUnits::Characters,
},
&BinaryOperator::GtEq
)?,
@@ -1586,11 +1602,13 @@ mod test {
DataValue::binary_op(
&DataValue::Utf8 {
value: Some("a".to_string()),
- ty: Utf8Type::Variable
+ ty: Utf8Type::Variable(None),
+ unit: CharLengthUnits::Characters,
},
&DataValue::Utf8 {
value: Some("a".to_string()),
- ty: Utf8Type::Variable
+ ty: Utf8Type::Variable(None),
+ unit: CharLengthUnits::Characters,
},
&BinaryOperator::LtEq
)?,
@@ -1600,11 +1618,13 @@ mod test {
DataValue::binary_op(
&DataValue::Utf8 {
value: Some("a".to_string()),
- ty: Utf8Type::Variable
+ ty: Utf8Type::Variable(None),
+ unit: CharLengthUnits::Characters,
},
&DataValue::Utf8 {
value: Some("a".to_string()),
- ty: Utf8Type::Variable
+ ty: Utf8Type::Variable(None),
+ unit: CharLengthUnits::Characters,
},
&BinaryOperator::NotEq
)?,
@@ -1614,11 +1634,13 @@ mod test {
DataValue::binary_op(
&DataValue::Utf8 {
value: Some("a".to_string()),
- ty: Utf8Type::Variable
+ ty: Utf8Type::Variable(None),
+ unit: CharLengthUnits::Characters,
},
&DataValue::Utf8 {
value: Some("a".to_string()),
- ty: Utf8Type::Variable
+ ty: Utf8Type::Variable(None),
+ unit: CharLengthUnits::Characters,
},
&BinaryOperator::Eq
)?,
@@ -1629,11 +1651,13 @@ mod test {
DataValue::binary_op(
&DataValue::Utf8 {
value: None,
- ty: Utf8Type::Variable
+ ty: Utf8Type::Variable(None),
+ unit: CharLengthUnits::Characters,
},
&DataValue::Utf8 {
value: Some("a".to_string()),
- ty: Utf8Type::Variable
+ ty: Utf8Type::Variable(None),
+ unit: CharLengthUnits::Characters,
},
&BinaryOperator::Gt
)?,
@@ -1643,11 +1667,13 @@ mod test {
DataValue::binary_op(
&DataValue::Utf8 {
value: None,
- ty: Utf8Type::Variable
+ ty: Utf8Type::Variable(None),
+ unit: CharLengthUnits::Characters,
},
&DataValue::Utf8 {
value: Some("a".to_string()),
- ty: Utf8Type::Variable
+ ty: Utf8Type::Variable(None),
+ unit: CharLengthUnits::Characters,
},
&BinaryOperator::Lt
)?,
@@ -1657,11 +1683,13 @@ mod test {
DataValue::binary_op(
&DataValue::Utf8 {
value: None,
- ty: Utf8Type::Variable
+ ty: Utf8Type::Variable(None),
+ unit: CharLengthUnits::Characters,
},
&DataValue::Utf8 {
value: Some("a".to_string()),
- ty: Utf8Type::Variable
+ ty: Utf8Type::Variable(None),
+ unit: CharLengthUnits::Characters,
},
&BinaryOperator::GtEq
)?,
@@ -1671,11 +1699,13 @@ mod test {
DataValue::binary_op(
&DataValue::Utf8 {
value: None,
- ty: Utf8Type::Variable
+ ty: Utf8Type::Variable(None),
+ unit: CharLengthUnits::Characters,
},
&DataValue::Utf8 {
value: Some("a".to_string()),
- ty: Utf8Type::Variable
+ ty: Utf8Type::Variable(None),
+ unit: CharLengthUnits::Characters,
},
&BinaryOperator::LtEq
)?,
@@ -1685,11 +1715,13 @@ mod test {
DataValue::binary_op(
&DataValue::Utf8 {
value: None,
- ty: Utf8Type::Variable
+ ty: Utf8Type::Variable(None),
+ unit: CharLengthUnits::Characters,
},
&DataValue::Utf8 {
value: Some("a".to_string()),
- ty: Utf8Type::Variable
+ ty: Utf8Type::Variable(None),
+ unit: CharLengthUnits::Characters,
},
&BinaryOperator::NotEq
)?,
diff --git a/src/marcos/mod.rs b/src/marcos/mod.rs
index 39332a1d..8042d41d 100644
--- a/src/marcos/mod.rs
+++ b/src/marcos/mod.rs
@@ -137,6 +137,7 @@ mod test {
use crate::types::LogicalType;
use serde::Deserialize;
use serde::Serialize;
+ use sqlparser::ast::CharLengthUnits;
use std::sync::Arc;
fn build_tuple() -> (Tuple, SchemaRef) {
@@ -149,14 +150,20 @@ mod test {
Arc::new(ColumnCatalog::new(
"c2".to_string(),
false,
- ColumnDesc::new(LogicalType::Varchar(None), false, false, None),
+ ColumnDesc::new(
+ LogicalType::Varchar(None, CharLengthUnits::Characters),
+ false,
+ false,
+ None,
+ ),
)),
]);
let values = vec![
Arc::new(DataValue::Int32(Some(9))),
Arc::new(DataValue::Utf8 {
value: Some("LOL".to_string()),
- ty: Utf8Type::Variable,
+ ty: Utf8Type::Variable(None),
+ unit: CharLengthUnits::Characters,
}),
];
@@ -207,7 +214,8 @@ mod test {
ScalarExpression::Constant(Arc::new(DataValue::Int8(Some(1)))),
ScalarExpression::Constant(Arc::new(DataValue::Utf8 {
value: Some("1".to_string()),
- ty: Utf8Type::Variable,
+ ty: Utf8Type::Variable(None),
+ unit: CharLengthUnits::Characters,
})),
],
&Tuple {
diff --git a/src/optimizer/core/histogram.rs b/src/optimizer/core/histogram.rs
index 17cdf193..6f1b9954 100644
--- a/src/optimizer/core/histogram.rs
+++ b/src/optimizer/core/histogram.rs
@@ -255,7 +255,7 @@ impl Histogram {
) -> Result {
let float_value = |value: &DataValue, prefix_len: usize| {
let value = match value.logical_type() {
- LogicalType::Varchar(_) | LogicalType::Char(_) => match value {
+ LogicalType::Varchar(..) | LogicalType::Char(..) => match value {
DataValue::Utf8 { value, .. } => value.as_ref().map(|string| {
if prefix_len > string.len() {
return 0.0;
diff --git a/src/optimizer/rule/normalization/column_pruning.rs b/src/optimizer/rule/normalization/column_pruning.rs
index e48e98e1..d86d16ee 100644
--- a/src/optimizer/rule/normalization/column_pruning.rs
+++ b/src/optimizer/rule/normalization/column_pruning.rs
@@ -10,6 +10,7 @@ use crate::types::value::{DataValue, Utf8Type};
use crate::types::LogicalType;
use itertools::Itertools;
use lazy_static::lazy_static;
+use sqlparser::ast::CharLengthUnits;
use std::collections::HashSet;
use std::sync::Arc;
@@ -63,7 +64,8 @@ impl ColumnPruning {
if op.agg_calls.is_empty() && op.groupby_exprs.is_empty() {
let value = Arc::new(DataValue::Utf8 {
value: Some("*".to_string()),
- ty: Utf8Type::Variable,
+ ty: Utf8Type::Variable(None),
+ unit: CharLengthUnits::Characters,
});
// only single COUNT(*) is not depend on any column
// removed all expressions from the aggregate: push a COUNT(*)
diff --git a/src/storage/table_codec.rs b/src/storage/table_codec.rs
index 3994e489..c74aba96 100644
--- a/src/storage/table_codec.rs
+++ b/src/storage/table_codec.rs
@@ -172,7 +172,7 @@ impl TableCodec {
| LogicalType::USmallint
| LogicalType::UInteger
| LogicalType::UBigint
- | LogicalType::Varchar(_)
+ | LogicalType::Varchar(..)
) {
return Err(DatabaseError::InvalidType);
}
diff --git a/src/types/mod.rs b/src/types/mod.rs
index 209dcda6..53aec131 100644
--- a/src/types/mod.rs
+++ b/src/types/mod.rs
@@ -10,7 +10,7 @@ use std::any::TypeId;
use std::cmp;
use crate::errors::DatabaseError;
-use sqlparser::ast::{CharLengthUnits, CharacterLength, ExactNumberInfo, TimezoneInfo};
+use sqlparser::ast::{CharLengthUnits, ExactNumberInfo, TimezoneInfo};
use strum_macros::AsRefStr;
pub type ColumnId = u32;
@@ -34,8 +34,8 @@ pub enum LogicalType {
UBigint,
Float,
Double,
- Char(u32),
- Varchar(Option),
+ Char(u32, CharLengthUnits),
+ Varchar(Option, CharLengthUnits),
Date,
DateTime,
Time,
@@ -75,7 +75,7 @@ impl LogicalType {
} else if type_id == TypeId::of::() {
Some(LogicalType::Decimal(None, None))
} else if type_id == TypeId::of::() {
- Some(LogicalType::Varchar(None))
+ Some(LogicalType::Varchar(None, CharLengthUnits::Characters))
} else {
None
}
@@ -96,8 +96,11 @@ impl LogicalType {
LogicalType::Float => Some(4),
LogicalType::Double => Some(8),
/// Note: The non-fixed length type's raw_len is None e.g. Varchar
- LogicalType::Varchar(_) => None,
- LogicalType::Char(len) => Some(*len as usize),
+ LogicalType::Varchar(_, _) => None,
+ LogicalType::Char(len, unit) => match unit {
+ CharLengthUnits::Characters => None,
+ CharLengthUnits::Octets => Some(*len as usize),
+ },
LogicalType::Decimal(_, _) => Some(16),
LogicalType::Date => Some(4),
LogicalType::DateTime => Some(8),
@@ -179,8 +182,8 @@ impl LogicalType {
}
if matches!(
(left, right),
- (LogicalType::Date, LogicalType::Varchar(_))
- | (LogicalType::Varchar(_), LogicalType::Date)
+ (LogicalType::Date, LogicalType::Varchar(..))
+ | (LogicalType::Varchar(..), LogicalType::Date)
) {
return Ok(LogicalType::Date);
}
@@ -192,15 +195,15 @@ impl LogicalType {
}
if matches!(
(left, right),
- (LogicalType::DateTime, LogicalType::Varchar(_))
- | (LogicalType::Varchar(_), LogicalType::DateTime)
+ (LogicalType::DateTime, LogicalType::Varchar(..))
+ | (LogicalType::Varchar(..), LogicalType::DateTime)
) {
return Ok(LogicalType::DateTime);
}
- if let (LogicalType::Char(_), LogicalType::Varchar(len))
- | (LogicalType::Varchar(len), LogicalType::Char(_)) = (left, right)
+ if let (LogicalType::Char(..), LogicalType::Varchar(len, ..))
+ | (LogicalType::Varchar(len, ..), LogicalType::Char(..)) = (left, right)
{
- return Ok(LogicalType::Varchar(*len));
+ return Ok(LogicalType::Varchar(*len, CharLengthUnits::Characters));
}
Err(DatabaseError::Incomparable(*left, *right))
}
@@ -296,20 +299,22 @@ impl LogicalType {
LogicalType::UBigint => matches!(to, LogicalType::Float | LogicalType::Double),
LogicalType::Float => matches!(to, LogicalType::Double),
LogicalType::Double => false,
- LogicalType::Char(_) => false,
- LogicalType::Varchar(_) => false,
+ LogicalType::Char(..) => false,
+ LogicalType::Varchar(..) => false,
LogicalType::Date => matches!(
to,
- LogicalType::DateTime | LogicalType::Varchar(_) | LogicalType::Char(_)
+ LogicalType::DateTime | LogicalType::Varchar(..) | LogicalType::Char(..)
),
LogicalType::DateTime => matches!(
to,
LogicalType::Date
| LogicalType::Time
- | LogicalType::Varchar(_)
- | LogicalType::Char(_)
+ | LogicalType::Varchar(..)
+ | LogicalType::Char(..)
),
- LogicalType::Time => matches!(to, LogicalType::Varchar(_) | LogicalType::Char(_)),
+ LogicalType::Time => {
+ matches!(to, LogicalType::Varchar(..) | LogicalType::Char(..))
+ }
LogicalType::Decimal(_, _) | LogicalType::Tuple => false,
}
}
@@ -324,31 +329,29 @@ impl TryFrom for LogicalType {
sqlparser::ast::DataType::Char(char_len)
| sqlparser::ast::DataType::Character(char_len) => {
let mut len = 1;
- if let Some(CharacterLength { length, unit }) = char_len {
- if matches!(unit, Some(CharLengthUnits::Octets)) {
- return Err(DatabaseError::UnsupportedStmt(format!(
- "char unit: {:?}",
- unit
- )));
- }
- len = cmp::max(len, length)
+ let mut char_unit = None;
+ if let Some(sqlparser::ast::CharacterLength { length, unit }) = char_len {
+ len = cmp::max(len, length);
+ char_unit = unit;
}
- Ok(LogicalType::Char(len as u32))
+ Ok(LogicalType::Char(
+ len as u32,
+ char_unit.unwrap_or(CharLengthUnits::Characters),
+ ))
}
sqlparser::ast::DataType::CharVarying(varchar_len)
| sqlparser::ast::DataType::CharacterVarying(varchar_len)
| sqlparser::ast::DataType::Varchar(varchar_len) => {
let mut len = None;
- if let Some(CharacterLength { length, unit }) = varchar_len {
- if matches!(unit, Some(CharLengthUnits::Octets)) {
- return Err(DatabaseError::UnsupportedStmt(format!(
- "char unit: {:?}",
- unit
- )));
- }
- len = Some(length as u32)
+ let mut char_unit = None;
+ if let Some(sqlparser::ast::CharacterLength { length, unit }) = varchar_len {
+ len = Some(length as u32);
+ char_unit = unit;
}
- Ok(LogicalType::Varchar(len))
+ Ok(LogicalType::Varchar(
+ len,
+ char_unit.unwrap_or(CharLengthUnits::Characters),
+ ))
}
sqlparser::ast::DataType::Float(_) => Ok(LogicalType::Float),
sqlparser::ast::DataType::Double | sqlparser::ast::DataType::DoublePrecision => {
diff --git a/src/types/tuple.rs b/src/types/tuple.rs
index dff31ffb..5b852b96 100644
--- a/src/types/tuple.rs
+++ b/src/types/tuple.rs
@@ -175,6 +175,7 @@ mod tests {
use crate::types::LogicalType;
use itertools::Itertools;
use rust_decimal::Decimal;
+ use sqlparser::ast::CharLengthUnits;
use std::sync::Arc;
#[test]
@@ -193,7 +194,12 @@ mod tests {
Arc::new(ColumnCatalog::new(
"c3".to_string(),
false,
- ColumnDesc::new(LogicalType::Varchar(Some(2)), false, false, None),
+ ColumnDesc::new(
+ LogicalType::Varchar(Some(2), CharLengthUnits::Characters),
+ false,
+ false,
+ None,
+ ),
)),
Arc::new(ColumnCatalog::new(
"c4".to_string(),
@@ -248,7 +254,32 @@ mod tests {
Arc::new(ColumnCatalog::new(
"c14".to_string(),
false,
- ColumnDesc::new(LogicalType::Char(1), false, false, None),
+ ColumnDesc::new(
+ LogicalType::Char(1, CharLengthUnits::Characters),
+ false,
+ false,
+ None,
+ ),
+ )),
+ Arc::new(ColumnCatalog::new(
+ "c15".to_string(),
+ false,
+ ColumnDesc::new(
+ LogicalType::Varchar(Some(2), CharLengthUnits::Octets),
+ false,
+ false,
+ None,
+ ),
+ )),
+ Arc::new(ColumnCatalog::new(
+ "c16".to_string(),
+ false,
+ ColumnDesc::new(
+ LogicalType::Char(1, CharLengthUnits::Octets),
+ false,
+ false,
+ None,
+ ),
)),
]);
@@ -260,7 +291,8 @@ mod tests {
Arc::new(DataValue::UInt32(Some(1))),
Arc::new(DataValue::Utf8 {
value: Some("LOL".to_string()),
- ty: Utf8Type::Variable,
+ ty: Utf8Type::Variable(Some(2)),
+ unit: CharLengthUnits::Characters,
}),
Arc::new(DataValue::Int16(Some(1))),
Arc::new(DataValue::UInt16(Some(1))),
@@ -275,6 +307,17 @@ mod tests {
Arc::new(DataValue::Utf8 {
value: Some("K".to_string()),
ty: Utf8Type::Fixed(1),
+ unit: CharLengthUnits::Characters,
+ }),
+ Arc::new(DataValue::Utf8 {
+ value: Some("LOL".to_string()),
+ ty: Utf8Type::Variable(Some(2)),
+ unit: CharLengthUnits::Octets,
+ }),
+ Arc::new(DataValue::Utf8 {
+ value: Some("K".to_string()),
+ ty: Utf8Type::Fixed(1),
+ unit: CharLengthUnits::Octets,
}),
],
},
@@ -285,7 +328,8 @@ mod tests {
Arc::new(DataValue::UInt32(None)),
Arc::new(DataValue::Utf8 {
value: None,
- ty: Utf8Type::Variable,
+ ty: Utf8Type::Variable(Some(2)),
+ unit: CharLengthUnits::Characters,
}),
Arc::new(DataValue::Int16(None)),
Arc::new(DataValue::UInt16(None)),
@@ -300,6 +344,17 @@ mod tests {
Arc::new(DataValue::Utf8 {
value: None,
ty: Utf8Type::Fixed(1),
+ unit: CharLengthUnits::Characters,
+ }),
+ Arc::new(DataValue::Utf8 {
+ value: None,
+ ty: Utf8Type::Variable(Some(2)),
+ unit: CharLengthUnits::Octets,
+ }),
+ Arc::new(DataValue::Utf8 {
+ value: None,
+ ty: Utf8Type::Fixed(1),
+ unit: CharLengthUnits::Octets,
}),
],
},
@@ -312,13 +367,13 @@ mod tests {
let tuple_0 = Tuple::deserialize_from(
&types,
- &[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13],
+ &[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15],
&columns,
&tuples[0].serialize_to(&types).unwrap(),
);
let tuple_1 = Tuple::deserialize_from(
&types,
- &[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13],
+ &[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15],
&columns,
&tuples[1].serialize_to(&types).unwrap(),
);
diff --git a/src/types/tuple_builder.rs b/src/types/tuple_builder.rs
index d9763a87..40835e4b 100644
--- a/src/types/tuple_builder.rs
+++ b/src/types/tuple_builder.rs
@@ -1,6 +1,7 @@
use crate::errors::DatabaseError;
use crate::types::tuple::{Schema, Tuple};
use crate::types::value::{DataValue, Utf8Type};
+use sqlparser::ast::CharLengthUnits;
use std::sync::Arc;
pub struct TupleBuilder<'a> {
@@ -15,7 +16,8 @@ impl<'a> TupleBuilder<'a> {
pub fn build_result(message: String) -> Tuple {
let values = vec![Arc::new(DataValue::Utf8 {
value: Some(message),
- ty: Utf8Type::Variable,
+ ty: Utf8Type::Variable(None),
+ unit: CharLengthUnits::Characters,
})];
Tuple { id: None, values }
@@ -32,7 +34,8 @@ impl<'a> TupleBuilder<'a> {
let data_value = Arc::new(
DataValue::Utf8 {
value: Some(value.to_string()),
- ty: Utf8Type::Variable,
+ ty: Utf8Type::Variable(None),
+ unit: CharLengthUnits::Characters,
}
.cast(self.schema[i].datatype())?,
);
diff --git a/src/types/value.rs b/src/types/value.rs
index 9e4300d0..0768b46c 100644
--- a/src/types/value.rs
+++ b/src/types/value.rs
@@ -14,6 +14,7 @@ use crate::errors::DatabaseError;
use ordered_float::OrderedFloat;
use rust_decimal::prelude::{FromPrimitive, ToPrimitive};
use serde::{Deserialize, Serialize};
+use sqlparser::ast::CharLengthUnits;
use super::LogicalType;
@@ -34,7 +35,7 @@ pub type ValueRef = Arc;
#[derive(Clone, Serialize, Deserialize)]
pub enum Utf8Type {
- Variable,
+ Variable(Option),
Fixed(u32),
}
@@ -55,6 +56,7 @@ pub enum DataValue {
Utf8 {
value: Option,
ty: Utf8Type,
+ unit: CharLengthUnits,
},
/// Date stored as a signed 32bit int days since UNIX epoch 1970-01-01
Date32(Option),
@@ -248,23 +250,25 @@ impl Hash for DataValue {
}
}
macro_rules! varchar_cast {
- ($value:expr, $len:expr, $ty:expr) => {
+ ($value:expr, $len:expr, $ty:expr, $unit:expr) => {
$value
.map(|v| {
let string_value = format!("{}", v);
if let Some(len) = $len {
- if string_value.len() > *len as usize {
+ if Self::check_string_len(&string_value, *len as usize, $unit) {
return Err(DatabaseError::TooLong);
}
}
Ok(DataValue::Utf8 {
value: Some(string_value),
ty: $ty,
+ unit: $unit,
})
})
.unwrap_or(Ok(DataValue::Utf8 {
value: None,
ty: $ty,
+ unit: $unit,
}))
};
}
@@ -315,22 +319,48 @@ impl DataValue {
}
}
+ pub(crate) fn check_string_len(string: &str, len: usize, unit: CharLengthUnits) -> bool {
+ match unit {
+ CharLengthUnits::Characters => string.chars().count() > len,
+ CharLengthUnits::Octets => string.len() > len,
+ }
+ }
+
pub(crate) fn check_len(&self, logic_type: &LogicalType) -> Result<(), DatabaseError> {
let is_over_len = match (logic_type, self) {
+ (LogicalType::Varchar(None, _), _) => false,
+ (
+ LogicalType::Varchar(Some(len), CharLengthUnits::Characters),
+ DataValue::Utf8 {
+ value: Some(val),
+ ty: Utf8Type::Variable(_),
+ unit: CharLengthUnits::Characters,
+ },
+ )
+ | (
+ LogicalType::Char(len, CharLengthUnits::Characters),
+ DataValue::Utf8 {
+ value: Some(val),
+ ty: Utf8Type::Fixed(_),
+ unit: CharLengthUnits::Characters,
+ },
+ ) => Self::check_string_len(val, *len as usize, CharLengthUnits::Characters),
(
- LogicalType::Varchar(Some(len)),
+ LogicalType::Varchar(Some(len), CharLengthUnits::Octets),
DataValue::Utf8 {
value: Some(val),
- ty: Utf8Type::Variable,
+ ty: Utf8Type::Variable(_),
+ unit: CharLengthUnits::Octets,
},
)
| (
- LogicalType::Char(len),
+ LogicalType::Char(len, CharLengthUnits::Octets),
DataValue::Utf8 {
value: Some(val),
ty: Utf8Type::Fixed(_),
+ unit: CharLengthUnits::Octets,
},
- ) => val.len() > *len as usize,
+ ) => Self::check_string_len(val, *len as usize, CharLengthUnits::Octets),
(LogicalType::Decimal(full_len, scale_len), DataValue::Decimal(Some(val))) => {
if let Some(len) = full_len {
if val.mantissa().ilog10() + 1 > *len as u32 {
@@ -404,13 +434,15 @@ impl DataValue {
LogicalType::UBigint => DataValue::UInt64(None),
LogicalType::Float => DataValue::Float32(None),
LogicalType::Double => DataValue::Float64(None),
- LogicalType::Char(len) => DataValue::Utf8 {
+ LogicalType::Char(len, unit) => DataValue::Utf8 {
value: None,
ty: Utf8Type::Fixed(*len),
+ unit: *unit,
},
- LogicalType::Varchar(_) => DataValue::Utf8 {
+ LogicalType::Varchar(len, unit) => DataValue::Utf8 {
value: None,
- ty: Utf8Type::Variable,
+ ty: Utf8Type::Variable(*len),
+ unit: *unit,
},
LogicalType::Date => DataValue::Date32(None),
LogicalType::DateTime => DataValue::Date64(None),
@@ -435,13 +467,15 @@ impl DataValue {
LogicalType::UBigint => DataValue::UInt64(Some(0)),
LogicalType::Float => DataValue::Float32(Some(0.0)),
LogicalType::Double => DataValue::Float64(Some(0.0)),
- LogicalType::Char(len) => DataValue::Utf8 {
+ LogicalType::Char(len, unit) => DataValue::Utf8 {
value: Some(String::new()),
ty: Utf8Type::Fixed(*len),
+ unit: *unit,
},
- LogicalType::Varchar(_) => DataValue::Utf8 {
+ LogicalType::Varchar(len, unit) => DataValue::Utf8 {
value: Some(String::new()),
- ty: Utf8Type::Variable,
+ ty: Utf8Type::Variable(*len),
+ unit: *unit,
},
LogicalType::Date => DataValue::Date32(Some(UNIX_DATETIME.num_days_from_ce())),
LogicalType::DateTime => DataValue::Date64(Some(UNIX_DATETIME.timestamp())),
@@ -511,24 +545,36 @@ impl DataValue {
return Ok(bytes.write_fixedint(*v)?);
}
}
- DataValue::Utf8 { value: v, ty } => {
+ DataValue::Utf8 { value: v, ty, unit } => {
if let Some(v) = v {
match ty {
- Utf8Type::Variable => {
+ Utf8Type::Variable(_) => {
let string_bytes = v.as_bytes();
let len = string_bytes.len();
bytes.extend_from_slice(string_bytes);
return Ok(len);
}
- Utf8Type::Fixed(len) => {
- let mut string_bytes =
- format!("{:len$}", v, len = *len as usize).into_bytes();
- let len = string_bytes.len();
-
- bytes.append(&mut string_bytes);
- return Ok(len);
- }
+ Utf8Type::Fixed(len) => match unit {
+ CharLengthUnits::Characters => {
+ let chars_len = *len as usize;
+ let mut string_bytes =
+ format!("{:len$}", v, len = chars_len).into_bytes();
+ let octets_len = string_bytes.len();
+
+ bytes.append(&mut string_bytes);
+ return Ok(octets_len);
+ }
+ CharLengthUnits::Octets => {
+ let octets_len = *len as usize;
+ let mut string_bytes = v.clone().into_bytes();
+
+ string_bytes.resize(octets_len, b' ');
+ assert_eq!(octets_len, string_bytes.len());
+ bytes.append(&mut string_bytes);
+ return Ok(octets_len);
+ }
+ },
}
}
}
@@ -597,7 +643,7 @@ impl DataValue {
buf.copy_from_slice(bytes);
f64::from_ne_bytes(buf)
})),
- LogicalType::Char(len) => {
+ LogicalType::Char(len, unit) => {
// https://dev.mysql.com/doc/refman/8.0/en/char.html#:~:text=If%20a%20given%20value%20is%20stored%20into%20the%20CHAR(4)%20and%20VARCHAR(4)%20columns%2C%20the%20values%20retrieved%20from%20the%20columns%20are%20not%20always%20the%20same%20because%20trailing%20spaces%20are%20removed%20from%20CHAR%20columns%20upon%20retrieval.%20The%20following%20example%20illustrates%20this%20difference%3A
let value = (!bytes.is_empty()).then(|| {
let last_non_zero_index = match bytes.iter().rposition(|&x| x != b' ') {
@@ -609,14 +655,16 @@ impl DataValue {
DataValue::Utf8 {
value,
ty: Utf8Type::Fixed(*len),
+ unit: *unit,
}
}
- LogicalType::Varchar(_) => {
+ LogicalType::Varchar(len, unit) => {
let value =
(!bytes.is_empty()).then(|| String::from_utf8(bytes.to_owned()).unwrap());
DataValue::Utf8 {
value,
- ty: Utf8Type::Variable,
+ ty: Utf8Type::Variable(*len),
+ unit: *unit,
}
}
LogicalType::Date => {
@@ -651,13 +699,15 @@ impl DataValue {
DataValue::UInt32(_) => LogicalType::UInteger,
DataValue::UInt64(_) => LogicalType::UBigint,
DataValue::Utf8 {
- ty: Utf8Type::Variable,
+ ty: Utf8Type::Variable(len),
+ unit,
..
- } => LogicalType::Varchar(None),
+ } => LogicalType::Varchar(*len, *unit),
DataValue::Utf8 {
ty: Utf8Type::Fixed(len),
+ unit,
..
- } => LogicalType::Char(*len),
+ } => LogicalType::Char(*len, *unit),
DataValue::Date32(_) => LogicalType::Date,
DataValue::Date64(_) => LogicalType::DateTime,
DataValue::Time(_) => LogicalType::Time,
@@ -796,13 +846,15 @@ impl DataValue {
LogicalType::UBigint => Ok(DataValue::UInt64(None)),
LogicalType::Float => Ok(DataValue::Float32(None)),
LogicalType::Double => Ok(DataValue::Float64(None)),
- LogicalType::Char(len) => Ok(DataValue::Utf8 {
+ LogicalType::Char(len, unit) => Ok(DataValue::Utf8 {
value: None,
ty: Utf8Type::Fixed(*len),
+ unit: *unit,
}),
- LogicalType::Varchar(_) => Ok(DataValue::Utf8 {
+ LogicalType::Varchar(len, unit) => Ok(DataValue::Utf8 {
value: None,
- ty: Utf8Type::Variable,
+ ty: Utf8Type::Variable(*len),
+ unit: *unit,
}),
LogicalType::Date => Ok(DataValue::Date32(None)),
LogicalType::DateTime => Ok(DataValue::Date64(None)),
@@ -823,16 +875,24 @@ impl DataValue {
LogicalType::UBigint => Ok(DataValue::UInt64(value.map(|v| v.into()))),
LogicalType::Float => Ok(DataValue::Float32(value.map(|v| v.into()))),
LogicalType::Double => Ok(DataValue::Float64(value.map(|v| v.into()))),
- LogicalType::Char(len) => varchar_cast!(value, Some(len), Utf8Type::Fixed(*len)),
- LogicalType::Varchar(len) => varchar_cast!(value, len, Utf8Type::Variable),
+ LogicalType::Char(len, unit) => {
+ varchar_cast!(value, Some(len), Utf8Type::Fixed(*len), *unit)
+ }
+ LogicalType::Varchar(len, unit) => {
+ varchar_cast!(value, len, Utf8Type::Variable(*len), *unit)
+ }
_ => Err(DatabaseError::CastFail),
},
DataValue::Float32(value) => match to {
LogicalType::SqlNull => Ok(DataValue::Null),
LogicalType::Float => Ok(DataValue::Float32(value)),
LogicalType::Double => Ok(DataValue::Float64(value.map(|v| v.into()))),
- LogicalType::Char(len) => varchar_cast!(value, Some(len), Utf8Type::Fixed(*len)),
- LogicalType::Varchar(len) => varchar_cast!(value, len, Utf8Type::Variable),
+ LogicalType::Char(len, unit) => {
+ varchar_cast!(value, Some(len), Utf8Type::Fixed(*len), *unit)
+ }
+ LogicalType::Varchar(len, unit) => {
+ varchar_cast!(value, len, Utf8Type::Variable(*len), *unit)
+ }
LogicalType::Decimal(_, option) => Ok(DataValue::Decimal(
value
.map(|v| {
@@ -850,8 +910,12 @@ impl DataValue {
LogicalType::SqlNull => Ok(DataValue::Null),
LogicalType::Float => Ok(DataValue::Float32(value.map(|v| v as f32))),
LogicalType::Double => Ok(DataValue::Float64(value)),
- LogicalType::Char(len) => varchar_cast!(value, Some(len), Utf8Type::Fixed(*len)),
- LogicalType::Varchar(len) => varchar_cast!(value, len, Utf8Type::Variable),
+ LogicalType::Char(len, unit) => {
+ varchar_cast!(value, Some(len), Utf8Type::Fixed(*len), *unit)
+ }
+ LogicalType::Varchar(len, unit) => {
+ varchar_cast!(value, len, Utf8Type::Variable(*len), *unit)
+ }
LogicalType::Decimal(_, option) => Ok(DataValue::Decimal(
value
.map(|v| {
@@ -883,8 +947,12 @@ impl DataValue {
LogicalType::Bigint => Ok(DataValue::Int64(value.map(|v| v.into()))),
LogicalType::Float => Ok(DataValue::Float32(value.map(|v| v.into()))),
LogicalType::Double => Ok(DataValue::Float64(value.map(|v| v.into()))),
- LogicalType::Char(len) => varchar_cast!(value, Some(len), Utf8Type::Fixed(*len)),
- LogicalType::Varchar(len) => varchar_cast!(value, len, Utf8Type::Variable),
+ LogicalType::Char(len, unit) => {
+ varchar_cast!(value, Some(len), Utf8Type::Fixed(*len), *unit)
+ }
+ LogicalType::Varchar(len, unit) => {
+ varchar_cast!(value, len, Utf8Type::Variable(*len), *unit)
+ }
LogicalType::Decimal(_, option) => Ok(DataValue::Decimal(value.map(|v| {
let mut decimal = Decimal::from(v);
Self::decimal_round_i(option, &mut decimal);
@@ -911,8 +979,12 @@ impl DataValue {
LogicalType::Bigint => Ok(DataValue::Int64(value.map(|v| v.into()))),
LogicalType::Float => Ok(DataValue::Float32(value.map(|v| v.into()))),
LogicalType::Double => Ok(DataValue::Float64(value.map(|v| v.into()))),
- LogicalType::Char(len) => varchar_cast!(value, Some(len), Utf8Type::Fixed(*len)),
- LogicalType::Varchar(len) => varchar_cast!(value, len, Utf8Type::Variable),
+ LogicalType::Char(len, unit) => {
+ varchar_cast!(value, Some(len), Utf8Type::Fixed(*len), *unit)
+ }
+ LogicalType::Varchar(len, unit) => {
+ varchar_cast!(value, len, Utf8Type::Variable(*len), *unit)
+ }
LogicalType::Decimal(_, option) => Ok(DataValue::Decimal(value.map(|v| {
let mut decimal = Decimal::from(v);
Self::decimal_round_i(option, &mut decimal);
@@ -938,8 +1010,12 @@ impl DataValue {
LogicalType::Bigint => Ok(DataValue::Int64(value.map(|v| v.into()))),
LogicalType::Float => Ok(DataValue::Float32(value.map(|v| v as f32))),
LogicalType::Double => Ok(DataValue::Float64(value.map(|v| v.into()))),
- LogicalType::Char(len) => varchar_cast!(value, Some(len), Utf8Type::Fixed(*len)),
- LogicalType::Varchar(len) => varchar_cast!(value, len, Utf8Type::Variable),
+ LogicalType::Char(len, unit) => {
+ varchar_cast!(value, Some(len), Utf8Type::Fixed(*len), *unit)
+ }
+ LogicalType::Varchar(len, unit) => {
+ varchar_cast!(value, len, Utf8Type::Variable(*len), *unit)
+ }
LogicalType::Decimal(_, option) => Ok(DataValue::Decimal(value.map(|v| {
let mut decimal = Decimal::from(v);
Self::decimal_round_i(option, &mut decimal);
@@ -964,8 +1040,12 @@ impl DataValue {
LogicalType::Bigint => Ok(DataValue::Int64(value)),
LogicalType::Float => Ok(DataValue::Float32(value.map(|v| v as f32))),
LogicalType::Double => Ok(DataValue::Float64(value.map(|v| v as f64))),
- LogicalType::Char(len) => varchar_cast!(value, Some(len), Utf8Type::Fixed(*len)),
- LogicalType::Varchar(len) => varchar_cast!(value, len, Utf8Type::Variable),
+ LogicalType::Char(len, unit) => {
+ varchar_cast!(value, Some(len), Utf8Type::Fixed(*len), *unit)
+ }
+ LogicalType::Varchar(len, unit) => {
+ varchar_cast!(value, len, Utf8Type::Variable(*len), *unit)
+ }
LogicalType::Decimal(_, option) => Ok(DataValue::Decimal(value.map(|v| {
let mut decimal = Decimal::from(v);
Self::decimal_round_i(option, &mut decimal);
@@ -986,8 +1066,12 @@ impl DataValue {
LogicalType::UBigint => Ok(DataValue::UInt64(value.map(|v| v.into()))),
LogicalType::Float => Ok(DataValue::Float32(value.map(|v| v.into()))),
LogicalType::Double => Ok(DataValue::Float64(value.map(|v| v.into()))),
- LogicalType::Char(len) => varchar_cast!(value, Some(len), Utf8Type::Fixed(*len)),
- LogicalType::Varchar(len) => varchar_cast!(value, len, Utf8Type::Variable),
+ LogicalType::Char(len, unit) => {
+ varchar_cast!(value, Some(len), Utf8Type::Fixed(*len), *unit)
+ }
+ LogicalType::Varchar(len, unit) => {
+ varchar_cast!(value, len, Utf8Type::Variable(*len), *unit)
+ }
LogicalType::Decimal(_, option) => Ok(DataValue::Decimal(value.map(|v| {
let mut decimal = Decimal::from(v);
Self::decimal_round_i(option, &mut decimal);
@@ -1006,8 +1090,12 @@ impl DataValue {
LogicalType::UBigint => Ok(DataValue::UInt64(value.map(|v| v.into()))),
LogicalType::Float => Ok(DataValue::Float32(value.map(|v| v.into()))),
LogicalType::Double => Ok(DataValue::Float64(value.map(|v| v.into()))),
- LogicalType::Char(len) => varchar_cast!(value, Some(len), Utf8Type::Fixed(*len)),
- LogicalType::Varchar(len) => varchar_cast!(value, len, Utf8Type::Variable),
+ LogicalType::Char(len, unit) => {
+ varchar_cast!(value, Some(len), Utf8Type::Fixed(*len), *unit)
+ }
+ LogicalType::Varchar(len, unit) => {
+ varchar_cast!(value, len, Utf8Type::Variable(*len), *unit)
+ }
LogicalType::Decimal(_, option) => Ok(DataValue::Decimal(value.map(|v| {
let mut decimal = Decimal::from(v);
Self::decimal_round_i(option, &mut decimal);
@@ -1024,8 +1112,12 @@ impl DataValue {
LogicalType::UBigint => Ok(DataValue::UInt64(value.map(|v| v.into()))),
LogicalType::Float => Ok(DataValue::Float32(value.map(|v| v as f32))),
LogicalType::Double => Ok(DataValue::Float64(value.map(|v| v.into()))),
- LogicalType::Char(len) => varchar_cast!(value, Some(len), Utf8Type::Fixed(*len)),
- LogicalType::Varchar(len) => varchar_cast!(value, len, Utf8Type::Variable),
+ LogicalType::Char(len, unit) => {
+ varchar_cast!(value, Some(len), Utf8Type::Fixed(*len), *unit)
+ }
+ LogicalType::Varchar(len, unit) => {
+ varchar_cast!(value, len, Utf8Type::Variable(*len), *unit)
+ }
LogicalType::Decimal(_, option) => Ok(DataValue::Decimal(value.map(|v| {
let mut decimal = Decimal::from(v);
Self::decimal_round_i(option, &mut decimal);
@@ -1040,8 +1132,12 @@ impl DataValue {
LogicalType::UBigint => Ok(DataValue::UInt64(value)),
LogicalType::Float => Ok(DataValue::Float32(value.map(|v| v as f32))),
LogicalType::Double => Ok(DataValue::Float64(value.map(|v| v as f64))),
- LogicalType::Char(len) => varchar_cast!(value, Some(len), Utf8Type::Fixed(*len)),
- LogicalType::Varchar(len) => varchar_cast!(value, len, Utf8Type::Variable),
+ LogicalType::Char(len, unit) => {
+ varchar_cast!(value, Some(len), Utf8Type::Fixed(*len), *unit)
+ }
+ LogicalType::Varchar(len, unit) => {
+ varchar_cast!(value, len, Utf8Type::Variable(*len), *unit)
+ }
LogicalType::Decimal(_, option) => Ok(DataValue::Decimal(value.map(|v| {
let mut decimal = Decimal::from(v);
Self::decimal_round_i(option, &mut decimal);
@@ -1087,8 +1183,12 @@ impl DataValue {
LogicalType::Double => Ok(DataValue::Float64(
value.map(|v| f64::from_str(&v)).transpose()?,
)),
- LogicalType::Char(len) => varchar_cast!(value, Some(len), Utf8Type::Fixed(*len)),
- LogicalType::Varchar(len) => varchar_cast!(value, len, Utf8Type::Variable),
+ LogicalType::Char(len, unit) => {
+ varchar_cast!(value, Some(len), Utf8Type::Fixed(*len), *unit)
+ }
+ LogicalType::Varchar(len, unit) => {
+ varchar_cast!(value, len, Utf8Type::Variable(*len), *unit)
+ }
LogicalType::Date => {
let option = value
.map(|v| {
@@ -1130,11 +1230,21 @@ impl DataValue {
},
DataValue::Date32(value) => match to {
LogicalType::SqlNull => Ok(DataValue::Null),
- LogicalType::Char(len) => {
- varchar_cast!(Self::format_date(value), Some(len), Utf8Type::Fixed(*len))
+ LogicalType::Char(len, unit) => {
+ varchar_cast!(
+ Self::format_date(value),
+ Some(len),
+ Utf8Type::Fixed(*len),
+ *unit
+ )
}
- LogicalType::Varchar(len) => {
- varchar_cast!(Self::format_date(value), len, Utf8Type::Variable)
+ LogicalType::Varchar(len, unit) => {
+ varchar_cast!(
+ Self::format_date(value),
+ len,
+ Utf8Type::Variable(*len),
+ *unit
+ )
}
LogicalType::Date => Ok(DataValue::Date32(value)),
LogicalType::DateTime => {
@@ -1150,15 +1260,21 @@ impl DataValue {
},
DataValue::Date64(value) => match to {
LogicalType::SqlNull => Ok(DataValue::Null),
- LogicalType::Char(len) => {
+ LogicalType::Char(len, unit) => {
varchar_cast!(
Self::format_datetime(value),
Some(len),
- Utf8Type::Fixed(*len)
+ Utf8Type::Fixed(*len),
+ *unit
)
}
- LogicalType::Varchar(len) => {
- varchar_cast!(Self::format_datetime(value), len, Utf8Type::Variable)
+ LogicalType::Varchar(len, unit) => {
+ varchar_cast!(
+ Self::format_datetime(value),
+ len,
+ Utf8Type::Variable(*len),
+ *unit
+ )
}
LogicalType::Date => {
let option = value.and_then(|v| {
@@ -1181,11 +1297,21 @@ impl DataValue {
},
DataValue::Time(value) => match to {
LogicalType::SqlNull => Ok(DataValue::Null),
- LogicalType::Char(len) => {
- varchar_cast!(Self::format_time(value), Some(len), Utf8Type::Fixed(*len))
+ LogicalType::Char(len, unit) => {
+ varchar_cast!(
+ Self::format_time(value),
+ Some(len),
+ Utf8Type::Fixed(*len),
+ *unit
+ )
}
- LogicalType::Varchar(len) => {
- varchar_cast!(Self::format_time(value), len, Utf8Type::Variable)
+ LogicalType::Varchar(len, unit) => {
+ varchar_cast!(
+ Self::format_time(value),
+ len,
+ Utf8Type::Variable(*len),
+ *unit
+ )
}
_ => Err(DatabaseError::CastFail),
},
@@ -1194,8 +1320,12 @@ impl DataValue {
LogicalType::Float => Ok(DataValue::Float32(value.and_then(|v| v.to_f32()))),
LogicalType::Double => Ok(DataValue::Float64(value.and_then(|v| v.to_f64()))),
LogicalType::Decimal(_, _) => Ok(DataValue::Decimal(value)),
- LogicalType::Char(len) => varchar_cast!(value, Some(len), Utf8Type::Fixed(*len)),
- LogicalType::Varchar(len) => varchar_cast!(value, len, Utf8Type::Variable),
+ LogicalType::Char(len, unit) => {
+ varchar_cast!(value, Some(len), Utf8Type::Fixed(*len), *unit)
+ }
+ LogicalType::Varchar(len, unit) => {
+ varchar_cast!(value, len, Utf8Type::Variable(*len), *unit)
+ }
_ => Err(DatabaseError::CastFail),
},
DataValue::Tuple(values) => match to {
@@ -1306,7 +1436,8 @@ impl From for DataValue {
fn from(value: String) -> Self {
DataValue::Utf8 {
value: Some(value),
- ty: Utf8Type::Variable,
+ ty: Utf8Type::Variable(None),
+ unit: CharLengthUnits::Characters,
}
}
}
@@ -1315,7 +1446,8 @@ impl From