From 60526e10ebfb7a0467ec62834ac2be1ad1df41f2 Mon Sep 17 00:00:00 2001 From: Kould Date: Thu, 5 Dec 2024 01:32:05 +0800 Subject: [PATCH 1/2] perf: https://github.com/KipData/FnckSQL/issues/155 & explain tpcc make sure all statement use index --- README.md | 12 +- src/catalog/table.rs | 17 + src/execution/ddl/add_column.rs | 4 +- src/execution/ddl/create_index.rs | 6 +- src/execution/dml/analyze.rs | 4 +- src/execution/dml/delete.rs | 6 +- src/execution/dml/insert.rs | 7 +- src/execution/dml/update.rs | 11 +- src/expression/evaluator.rs | 4 +- src/expression/range_detacher.rs | 514 +++++++++++++----- src/optimizer/core/histogram.rs | 6 +- src/optimizer/core/memo.rs | 1 + src/optimizer/core/statistics_meta.rs | 1 + .../rule/normalization/pushdown_predicates.rs | 29 +- src/serdes/data_value.rs | 15 +- src/storage/mod.rs | 112 ++-- src/storage/rocksdb.rs | 5 +- src/storage/table_codec.rs | 44 +- src/types/evaluator/tuple.rs | 21 +- src/types/index.rs | 11 +- src/types/tuple_builder.rs | 2 +- src/types/value.rs | 126 ++++- tests/sqllogictest/Cargo.toml | 4 +- tpcc/src/README.md | 37 +- tpcc/src/load.rs | 5 +- tpcc/src/main.rs | 219 +++++++- 26 files changed, 891 insertions(+), 332 deletions(-) diff --git a/README.md b/README.md index 04ea8fff..00ad7545 100755 --- a/README.md +++ b/README.md @@ -63,13 +63,13 @@ run `cargo run -p tpcc --release` to run tpcc - Tips: TPCC currently only supports single thread ```shell <90th Percentile RT (MaxRT)> - New-Order : 0.005 (0.007) - Payment : 0.084 (0.141) -Order-Status : 0.492 (0.575) - Delivery : 6.109 (6.473) - Stock-Level : 0.001 (0.001) + New-Order : 0.003 (0.006) + Payment : 0.001 (0.003) +Order-Status : 0.062 (0.188) + Delivery : 0.022 (0.052) + Stock-Level : 0.004 (0.006) -98 Tpmc +6669 Tpmc ``` #### PG Wire Service run `cargo run --features="net"` to start server diff --git a/src/catalog/table.rs b/src/catalog/table.rs index 504f0a28..73ab2ef2 100644 --- a/src/catalog/table.rs +++ b/src/catalog/table.rs @@ -143,11 +143,28 @@ impl TableCatalog { } }) .clone(); + + let mut val_tys = Vec::with_capacity(column_ids.len()); + for column_id in column_ids.iter() { + let val_ty = self + .get_column_by_id(column_id) + .ok_or_else(|| DatabaseError::ColumnNotFound(column_id.to_string()))? + .datatype() + .clone(); + val_tys.push(val_ty) + } + let value_ty = if val_tys.len() == 1 { + val_tys.pop().unwrap() + } else { + LogicalType::Tuple(val_tys) + }; + let index = IndexMeta { id: index_id, column_ids, table_name: self.name.clone(), pk_ty, + value_ty, name, ty, }; diff --git a/src/execution/ddl/add_column.rs b/src/execution/ddl/add_column.rs index beaf1400..1df2cc47 100644 --- a/src/execution/ddl/add_column.rs +++ b/src/execution/ddl/add_column.rs @@ -11,7 +11,6 @@ use crate::{ use std::ops::Coroutine; use std::ops::CoroutineState; use std::pin::Pin; -use std::slice; pub struct AddColumn { op: AddColumnOperator, @@ -80,8 +79,7 @@ impl<'a, T: Transaction + 'a> WriteExecutor<'a, T> for AddColumn { .cloned(), ) { for (tuple_id, value) in unique_values { - let index = - Index::new(unique_meta.id, slice::from_ref(&value), IndexType::Unique); + let index = Index::new(unique_meta.id, &value, IndexType::Unique); throw!(transaction.add_index(table_name, index, &tuple_id)); } } diff --git a/src/execution/ddl/create_index.rs b/src/execution/ddl/create_index.rs index b73394c2..6fd888f2 100644 --- a/src/execution/ddl/create_index.rs +++ b/src/execution/ddl/create_index.rs @@ -9,6 +9,7 @@ use crate::throw; use crate::types::index::Index; use crate::types::tuple::Tuple; use crate::types::tuple_builder::TupleBuilder; +use crate::types::value::DataValue; use crate::types::ColumnId; use std::ops::Coroutine; use std::ops::CoroutineState; @@ -86,7 +87,10 @@ impl<'a, T: Transaction + 'a> WriteExecutor<'a, T> for CreateIndex { } drop(coroutine); for (tuple_id, values) in index_values { - let index = Index::new(index_id, &values, ty); + let Some(value) = DataValue::values_to_tuple(values) else { + continue; + }; + let index = Index::new(index_id, &value, ty); throw!(transaction.add_index(table_name.as_str(), index, &tuple_id)); } yield Ok(TupleBuilder::build_result("1".to_string())); diff --git a/src/execution/dml/analyze.rs b/src/execution/dml/analyze.rs index 6e0b8f9b..da615650 100644 --- a/src/execution/dml/analyze.rs +++ b/src/execution/dml/analyze.rs @@ -90,7 +90,9 @@ impl<'a, T: Transaction + 'a> WriteExecutor<'a, T> for Analyze { if values.len() == 1 { throw!(builder.append(&values[0])); } else { - throw!(builder.append(&Arc::new(DataValue::Tuple(Some(values))))); + throw!( + builder.append(&Arc::new(DataValue::Tuple(Some((values, false))))) + ); } } } diff --git a/src/execution/dml/delete.rs b/src/execution/dml/delete.rs index ab1702c6..fa511e0c 100644 --- a/src/execution/dml/delete.rs +++ b/src/execution/dml/delete.rs @@ -88,9 +88,13 @@ impl<'a, T: Transaction + 'a> WriteExecutor<'a, T> for Delete { ) in indexes { for (i, values) in value_rows.into_iter().enumerate() { + let Some(value) = DataValue::values_to_tuple(values) else { + continue; + }; + throw!(transaction.del_index( &table_name, - &Index::new(index_id, &values, index_ty), + &Index::new(index_id, &value, index_ty), Some(&tuple_ids[i]), )); } diff --git a/src/execution/dml/insert.rs b/src/execution/dml/insert.rs index c5b2b70a..1a9b5856 100644 --- a/src/execution/dml/insert.rs +++ b/src/execution/dml/insert.rs @@ -131,7 +131,7 @@ impl<'a, T: Transaction + 'a> WriteExecutor<'a, T> for Insert { id: Some(if primary_keys.len() == 1 { tuple_id.pop().unwrap() } else { - DataValue::Tuple(Some(tuple_id)) + DataValue::Tuple(Some((tuple_id, false))) }), values, }); @@ -142,7 +142,10 @@ impl<'a, T: Transaction + 'a> WriteExecutor<'a, T> for Insert { for tuple in tuples.iter() { let values = throw!(Projection::projection(tuple, &exprs, &schema)); - let index = Index::new(index_meta.id, &values, index_meta.ty); + let Some(value) = DataValue::values_to_tuple(values) else { + continue; + }; + let index = Index::new(index_meta.id, &value, index_meta.ty); throw!(transaction.add_index( &table_name, diff --git a/src/execution/dml/update.rs b/src/execution/dml/update.rs index 30a5a1e3..2ae6a6c6 100644 --- a/src/execution/dml/update.rs +++ b/src/execution/dml/update.rs @@ -10,6 +10,7 @@ use crate::types::index::Index; use crate::types::tuple::types; use crate::types::tuple::Tuple; use crate::types::tuple_builder::{TupleBuilder, TupleIdBuilder}; +use crate::types::value::DataValue; use std::collections::HashMap; use std::ops::Coroutine; use std::ops::CoroutineState; @@ -82,7 +83,10 @@ impl<'a, T: Transaction + 'a> WriteExecutor<'a, T> for Update { for tuple in tuples.iter() { let values = throw!(Projection::projection(tuple, &exprs, &input_schema)); - let index = Index::new(index_meta.id, &values, index_meta.ty); + let Some(value) = DataValue::values_to_tuple(values) else { + continue; + }; + let index = Index::new(index_meta.id, &value, index_meta.ty); throw!(transaction.del_index( &table_name, &index, @@ -116,7 +120,10 @@ impl<'a, T: Transaction + 'a> WriteExecutor<'a, T> for Update { for (index_meta, exprs) in index_metas.iter() { let values = throw!(Projection::projection(&tuple, exprs, &input_schema)); - let index = Index::new(index_meta.id, &values, index_meta.ty); + let Some(value) = DataValue::values_to_tuple(values) else { + continue; + }; + let index = Index::new(index_meta.id, &value, index_meta.ty); throw!(transaction.add_index( &table_name, index, diff --git a/src/expression/evaluator.rs b/src/expression/evaluator.rs index cf8caabb..747a6edb 100644 --- a/src/expression/evaluator.rs +++ b/src/expression/evaluator.rs @@ -274,7 +274,9 @@ impl ScalarExpression { for expr in exprs { values.push(expr.eval(tuple, schema)?); } - Ok(DataValue::Tuple((!values.is_empty()).then_some(values))) + Ok(DataValue::Tuple( + (!values.is_empty()).then_some((values, false)), + )) } ScalarExpression::ScalaFunction(ScalarFunction { inner, args, .. }) => { inner.eval(args, tuple, schema)?.cast(inner.return_type()) diff --git a/src/expression/range_detacher.rs b/src/expression/range_detacher.rs index 319447e7..135f8620 100644 --- a/src/expression/range_detacher.rs +++ b/src/expression/range_detacher.rs @@ -107,33 +107,40 @@ impl Range { pub(crate) fn combining_eqs(&self, eqs: &[Range]) -> Option { #[allow(clippy::map_clone)] - fn merge_value(tuple: &[&DataValue], value: DataValue) -> DataValue { + fn merge_value(tuple: &[&DataValue], is_upper: bool, value: DataValue) -> DataValue { let mut merge_tuple = Vec::with_capacity(tuple.len() + 1); for value in tuple { merge_tuple.push((*value).clone()); } merge_tuple.push(value); - DataValue::Tuple(Some(merge_tuple)) + DataValue::Tuple(Some((merge_tuple, is_upper))) } fn _to_tuple_range(tuple: &[&DataValue], range: Range) -> Range { fn merge_value_on_bound( tuple: &[&DataValue], + is_upper: bool, bound: Bound, ) -> Bound { match bound { - Bound::Included(v) => Bound::Included(merge_value(tuple, v)), - Bound::Excluded(v) => Bound::Excluded(merge_value(tuple, v)), - Bound::Unbounded => Bound::Unbounded, + Bound::Included(v) => Bound::Included(merge_value(tuple, is_upper, v)), + Bound::Excluded(v) => Bound::Excluded(merge_value(tuple, is_upper, v)), + Bound::Unbounded => { + if tuple.is_empty() { + return Bound::Unbounded; + } + let values = tuple.iter().map(|v| (*v).clone()).collect_vec(); + Bound::Excluded(DataValue::Tuple(Some((values, is_upper)))) + } } } match range { Range::Scope { min, max } => Range::Scope { - min: merge_value_on_bound(tuple, min), - max: merge_value_on_bound(tuple, max), + min: merge_value_on_bound(tuple, false, min), + max: merge_value_on_bound(tuple, true, max), }, - Range::Eq(v) => Range::Eq(merge_value(tuple, v)), + Range::Eq(v) => Range::Eq(merge_value(tuple, false, v)), Range::Dummy => Range::Dummy, Range::SortedRanges(mut ranges) => { for range in &mut ranges { @@ -149,20 +156,6 @@ impl Range { node.enumeration(&mut Vec::new(), &mut combinations); - if let Some(combination) = match self { - Range::Scope { - min: Bound::Unbounded, - .. - } => combinations.last(), - Range::Scope { - max: Bound::Unbounded, - .. - } => combinations.first(), - _ => None, - } { - return Some(_to_tuple_range(combination, self.clone())); - } - let mut ranges = Vec::new(); for tuple in combinations { @@ -795,6 +788,8 @@ mod test { use crate::planner::operator::Operator; use crate::planner::LogicalPlan; use crate::storage::rocksdb::RocksTransaction; + use crate::types::evaluator::tuple::TupleLtBinaryEvaluator; + use crate::types::evaluator::BinaryEvaluator; use crate::types::value::DataValue; use std::ops::Bound; @@ -1540,15 +1535,122 @@ mod test { assert_eq!( range, - Some(Range::Scope { - min: Bound::Included(DataValue::Tuple(Some(vec![ - DataValue::Int32(Some(1)), - DataValue::Int32(None), - DataValue::Int32(Some(1)), - DataValue::Int32(Some(1)), - ]))), - max: Bound::Unbounded, - }) + Some(Range::SortedRanges(vec![ + Range::Scope { + min: Bound::Included(DataValue::Tuple(Some(( + vec![ + DataValue::Int32(Some(1)), + DataValue::Int32(None), + DataValue::Int32(Some(1)), + DataValue::Int32(Some(1)), + ], + false + )))), + max: Bound::Excluded(DataValue::Tuple(Some(( + vec![ + DataValue::Int32(Some(1)), + DataValue::Int32(None), + DataValue::Int32(Some(1)), + ], + true + )))), + }, + Range::Scope { + min: Bound::Included(DataValue::Tuple(Some(( + vec![ + DataValue::Int32(Some(1)), + DataValue::Int32(None), + DataValue::Int32(Some(2)), + DataValue::Int32(Some(1)), + ], + false + )))), + max: Bound::Excluded(DataValue::Tuple(Some(( + vec![ + DataValue::Int32(Some(1)), + DataValue::Int32(None), + DataValue::Int32(Some(2)), + ], + true + )))), + }, + Range::Scope { + min: Bound::Included(DataValue::Tuple(Some(( + vec![ + DataValue::Int32(Some(1)), + DataValue::Int32(Some(1)), + DataValue::Int32(Some(1)), + DataValue::Int32(Some(1)), + ], + false + )))), + max: Bound::Excluded(DataValue::Tuple(Some(( + vec![ + DataValue::Int32(Some(1)), + DataValue::Int32(Some(1)), + DataValue::Int32(Some(1)), + ], + true + )))), + }, + Range::Scope { + min: Bound::Included(DataValue::Tuple(Some(( + vec![ + DataValue::Int32(Some(1)), + DataValue::Int32(Some(1)), + DataValue::Int32(Some(2)), + DataValue::Int32(Some(1)), + ], + false + )))), + max: Bound::Excluded(DataValue::Tuple(Some(( + vec![ + DataValue::Int32(Some(1)), + DataValue::Int32(Some(1)), + DataValue::Int32(Some(2)), + ], + true + )))), + }, + Range::Scope { + min: Bound::Included(DataValue::Tuple(Some(( + vec![ + DataValue::Int32(Some(1)), + DataValue::Int32(Some(2)), + DataValue::Int32(Some(1)), + DataValue::Int32(Some(1)), + ], + false + )))), + max: Bound::Excluded(DataValue::Tuple(Some(( + vec![ + DataValue::Int32(Some(1)), + DataValue::Int32(Some(2)), + DataValue::Int32(Some(1)), + ], + true + )))), + }, + Range::Scope { + min: Bound::Included(DataValue::Tuple(Some(( + vec![ + DataValue::Int32(Some(1)), + DataValue::Int32(Some(2)), + DataValue::Int32(Some(2)), + DataValue::Int32(Some(1)), + ], + false + )))), + max: Bound::Excluded(DataValue::Tuple(Some(( + vec![ + DataValue::Int32(Some(1)), + DataValue::Int32(Some(2)), + DataValue::Int32(Some(2)), + ], + true + )))), + }, + ])) ); let range = Range::Scope { @@ -1559,15 +1661,122 @@ mod test { assert_eq!( range, - Some(Range::Scope { - min: Bound::Unbounded, - max: Bound::Included(DataValue::Tuple(Some(vec![ - DataValue::Int32(Some(1)), - DataValue::Int32(Some(2)), - DataValue::Int32(Some(2)), - DataValue::Int32(Some(1)), - ]))), - }) + Some(Range::SortedRanges(vec![ + Range::Scope { + min: Bound::Excluded(DataValue::Tuple(Some(( + vec![ + DataValue::Int32(Some(1)), + DataValue::Int32(None), + DataValue::Int32(Some(1)), + ], + false + )))), + max: Bound::Included(DataValue::Tuple(Some(( + vec![ + DataValue::Int32(Some(1)), + DataValue::Int32(None), + DataValue::Int32(Some(1)), + DataValue::Int32(Some(1)), + ], + true + )))), + }, + Range::Scope { + min: Bound::Excluded(DataValue::Tuple(Some(( + vec![ + DataValue::Int32(Some(1)), + DataValue::Int32(None), + DataValue::Int32(Some(2)), + ], + false + )))), + max: Bound::Included(DataValue::Tuple(Some(( + vec![ + DataValue::Int32(Some(1)), + DataValue::Int32(None), + DataValue::Int32(Some(2)), + DataValue::Int32(Some(1)), + ], + true + )))), + }, + Range::Scope { + min: Bound::Excluded(DataValue::Tuple(Some(( + vec![ + DataValue::Int32(Some(1)), + DataValue::Int32(Some(1)), + DataValue::Int32(Some(1)), + ], + false + )))), + max: Bound::Included(DataValue::Tuple(Some(( + vec![ + DataValue::Int32(Some(1)), + DataValue::Int32(Some(1)), + DataValue::Int32(Some(1)), + DataValue::Int32(Some(1)), + ], + true + )))), + }, + Range::Scope { + min: Bound::Excluded(DataValue::Tuple(Some(( + vec![ + DataValue::Int32(Some(1)), + DataValue::Int32(Some(1)), + DataValue::Int32(Some(2)), + ], + false + )))), + max: Bound::Included(DataValue::Tuple(Some(( + vec![ + DataValue::Int32(Some(1)), + DataValue::Int32(Some(1)), + DataValue::Int32(Some(2)), + DataValue::Int32(Some(1)), + ], + true + )))), + }, + Range::Scope { + min: Bound::Excluded(DataValue::Tuple(Some(( + vec![ + DataValue::Int32(Some(1)), + DataValue::Int32(Some(2)), + DataValue::Int32(Some(1)), + ], + false + )))), + max: Bound::Included(DataValue::Tuple(Some(( + vec![ + DataValue::Int32(Some(1)), + DataValue::Int32(Some(2)), + DataValue::Int32(Some(1)), + DataValue::Int32(Some(1)), + ], + true + )))), + }, + Range::Scope { + min: Bound::Excluded(DataValue::Tuple(Some(( + vec![ + DataValue::Int32(Some(1)), + DataValue::Int32(Some(2)), + DataValue::Int32(Some(2)), + ], + false + )))), + max: Bound::Included(DataValue::Tuple(Some(( + vec![ + DataValue::Int32(Some(1)), + DataValue::Int32(Some(2)), + DataValue::Int32(Some(2)), + DataValue::Int32(Some(1)), + ], + true + )))), + }, + ])) ); let range = Range::Scope { @@ -1580,88 +1789,124 @@ mod test { range, Some(Range::SortedRanges(vec![ Range::Scope { - min: Bound::Included(DataValue::Tuple(Some(vec![ - DataValue::Int32(Some(1)), - DataValue::Int32(None), - DataValue::Int32(Some(1)), - DataValue::Int32(Some(1)), - ]))), - max: Bound::Included(DataValue::Tuple(Some(vec![ - DataValue::Int32(Some(1)), - DataValue::Int32(None), - DataValue::Int32(Some(1)), - DataValue::Int32(Some(2)), - ]))), + min: Bound::Included(DataValue::Tuple(Some(( + vec![ + DataValue::Int32(Some(1)), + DataValue::Int32(None), + DataValue::Int32(Some(1)), + DataValue::Int32(Some(1)), + ], + false + )))), + max: Bound::Included(DataValue::Tuple(Some(( + vec![ + DataValue::Int32(Some(1)), + DataValue::Int32(None), + DataValue::Int32(Some(1)), + DataValue::Int32(Some(2)), + ], + true + )))), }, Range::Scope { - min: Bound::Included(DataValue::Tuple(Some(vec![ - DataValue::Int32(Some(1)), - DataValue::Int32(None), - DataValue::Int32(Some(2)), - DataValue::Int32(Some(1)), - ]))), - max: Bound::Included(DataValue::Tuple(Some(vec![ - DataValue::Int32(Some(1)), - DataValue::Int32(None), - DataValue::Int32(Some(2)), - DataValue::Int32(Some(2)), - ]))), + min: Bound::Included(DataValue::Tuple(Some(( + vec![ + DataValue::Int32(Some(1)), + DataValue::Int32(None), + DataValue::Int32(Some(2)), + DataValue::Int32(Some(1)), + ], + false + )))), + max: Bound::Included(DataValue::Tuple(Some(( + vec![ + DataValue::Int32(Some(1)), + DataValue::Int32(None), + DataValue::Int32(Some(2)), + DataValue::Int32(Some(2)), + ], + true + )))), }, Range::Scope { - min: Bound::Included(DataValue::Tuple(Some(vec![ - DataValue::Int32(Some(1)), - DataValue::Int32(Some(1)), - DataValue::Int32(Some(1)), - DataValue::Int32(Some(1)), - ]))), - max: Bound::Included(DataValue::Tuple(Some(vec![ - DataValue::Int32(Some(1)), - DataValue::Int32(Some(1)), - DataValue::Int32(Some(1)), - DataValue::Int32(Some(2)), - ]))), + min: Bound::Included(DataValue::Tuple(Some(( + vec![ + DataValue::Int32(Some(1)), + DataValue::Int32(Some(1)), + DataValue::Int32(Some(1)), + DataValue::Int32(Some(1)), + ], + false + )))), + max: Bound::Included(DataValue::Tuple(Some(( + vec![ + DataValue::Int32(Some(1)), + DataValue::Int32(Some(1)), + DataValue::Int32(Some(1)), + DataValue::Int32(Some(2)), + ], + true + )))), }, Range::Scope { - min: Bound::Included(DataValue::Tuple(Some(vec![ - DataValue::Int32(Some(1)), - DataValue::Int32(Some(1)), - DataValue::Int32(Some(2)), - DataValue::Int32(Some(1)), - ]))), - max: Bound::Included(DataValue::Tuple(Some(vec![ - DataValue::Int32(Some(1)), - DataValue::Int32(Some(1)), - DataValue::Int32(Some(2)), - DataValue::Int32(Some(2)), - ]))), + min: Bound::Included(DataValue::Tuple(Some(( + vec![ + DataValue::Int32(Some(1)), + DataValue::Int32(Some(1)), + DataValue::Int32(Some(2)), + DataValue::Int32(Some(1)), + ], + false + )))), + max: Bound::Included(DataValue::Tuple(Some(( + vec![ + DataValue::Int32(Some(1)), + DataValue::Int32(Some(1)), + DataValue::Int32(Some(2)), + DataValue::Int32(Some(2)), + ], + true + )))), }, Range::Scope { - min: Bound::Included(DataValue::Tuple(Some(vec![ - DataValue::Int32(Some(1)), - DataValue::Int32(Some(2)), - DataValue::Int32(Some(1)), - DataValue::Int32(Some(1)), - ]))), - max: Bound::Included(DataValue::Tuple(Some(vec![ - DataValue::Int32(Some(1)), - DataValue::Int32(Some(2)), - DataValue::Int32(Some(1)), - DataValue::Int32(Some(2)), - ]))), + min: Bound::Included(DataValue::Tuple(Some(( + vec![ + DataValue::Int32(Some(1)), + DataValue::Int32(Some(2)), + DataValue::Int32(Some(1)), + DataValue::Int32(Some(1)), + ], + false + )))), + max: Bound::Included(DataValue::Tuple(Some(( + vec![ + DataValue::Int32(Some(1)), + DataValue::Int32(Some(2)), + DataValue::Int32(Some(1)), + DataValue::Int32(Some(2)), + ], + true + )))), }, Range::Scope { - min: Bound::Included(DataValue::Tuple(Some(vec![ - DataValue::Int32(Some(1)), - DataValue::Int32(Some(2)), - DataValue::Int32(Some(2)), - DataValue::Int32(Some(1)), - ]))), - max: Bound::Included(DataValue::Tuple(Some(vec![ - DataValue::Int32(Some(1)), - DataValue::Int32(Some(2)), - DataValue::Int32(Some(2)), - DataValue::Int32(Some(2)), - ]))), + min: Bound::Included(DataValue::Tuple(Some(( + vec![ + DataValue::Int32(Some(1)), + DataValue::Int32(Some(2)), + DataValue::Int32(Some(2)), + DataValue::Int32(Some(1)), + ], + false + )))), + max: Bound::Included(DataValue::Tuple(Some(( + vec![ + DataValue::Int32(Some(1)), + DataValue::Int32(Some(2)), + DataValue::Int32(Some(2)), + DataValue::Int32(Some(2)), + ], + true + )))), }, ])) ) @@ -1669,36 +1914,23 @@ mod test { #[test] fn test_to_tuple_range_none() { - let eqs_ranges_1 = vec![ - Range::Eq(DataValue::Int32(Some(1))), - Range::SortedRanges(vec![ - Range::Eq(DataValue::Int32(Some(1))), - Range::Scope { - min: Bound::Unbounded, - max: Bound::Unbounded, - }, - ]), - ]; - let eqs_ranges_2 = vec![ - Range::Eq(DataValue::Int32(Some(1))), - Range::Scope { - min: Bound::Unbounded, - max: Bound::Unbounded, - }, - ]; - - let range_1 = Range::Scope { - min: Bound::Included(DataValue::Int32(Some(1))), - max: Bound::Unbounded, - } - .combining_eqs(&eqs_ranges_1); - let range_2 = Range::Scope { - min: Bound::Included(DataValue::Int32(Some(1))), + let range = Range::Scope { + min: Bound::Included(DataValue::Int32(Some(2))), max: Bound::Unbounded, } - .combining_eqs(&eqs_ranges_2); - - assert_eq!(range_1, None); - assert_eq!(range_2, None); + .combining_eqs(&[ + Range::Eq(DataValue::Int32(Some(7))), + Range::Eq(DataValue::Int32(Some(10))), + ]); + println!("{:#?}", range); + let Range::Scope { + min: Bound::Included(min), + max: Bound::Excluded(max), + } = range.unwrap() + else { + unreachable!() + }; + let evaluator = TupleLtBinaryEvaluator; + println!("{:#?}", evaluator.binary_eval(&min, &max)); } } diff --git a/src/optimizer/core/histogram.rs b/src/optimizer/core/histogram.rs index 05f830a6..75690900 100644 --- a/src/optimizer/core/histogram.rs +++ b/src/optimizer/core/histogram.rs @@ -317,10 +317,13 @@ impl Histogram { | LogicalType::Double | LogicalType::Decimal(_, _) => value.clone().cast(&LogicalType::Double)?.double(), LogicalType::Tuple(_) => match value { - DataValue::Tuple(Some(values)) => { + DataValue::Tuple(Some((values, _))) => { let mut float = 0.0; for (i, value) in values.iter().enumerate() { + if !value.logical_type().is_numeric() { + continue; + } if let Some(f) = DataValue::clone(value).cast(&LogicalType::Double)?.double() { @@ -474,6 +477,7 @@ mod tests { column_ids: vec![Ulid::new()], table_name: Arc::new("t1".to_string()), pk_ty: LogicalType::Integer, + value_ty: LogicalType::Integer, name: "pk_c1".to_string(), ty: IndexType::PrimaryKey { is_multiple: false }, } diff --git a/src/optimizer/core/memo.rs b/src/optimizer/core/memo.rs index bbeb6b6a..1b378cad 100644 --- a/src/optimizer/core/memo.rs +++ b/src/optimizer/core/memo.rs @@ -189,6 +189,7 @@ mod tests { column_ids: vec![*c1_column_id], table_name: Arc::new("t1".to_string()), pk_ty: LogicalType::Integer, + value_ty: LogicalType::Integer, name: "pk_index".to_string(), ty: IndexType::PrimaryKey { is_multiple: false }, }), diff --git a/src/optimizer/core/statistics_meta.rs b/src/optimizer/core/statistics_meta.rs index e46f5173..a545b09d 100644 --- a/src/optimizer/core/statistics_meta.rs +++ b/src/optimizer/core/statistics_meta.rs @@ -123,6 +123,7 @@ mod tests { column_ids: vec![Ulid::new()], table_name: Arc::new("t1".to_string()), pk_ty: LogicalType::Integer, + value_ty: LogicalType::Integer, name: "pk_c1".to_string(), ty: IndexType::PrimaryKey { is_multiple: false }, }; diff --git a/src/optimizer/rule/normalization/pushdown_predicates.rs b/src/optimizer/rule/normalization/pushdown_predicates.rs index a3ae7fe6..b6334776 100644 --- a/src/optimizer/rule/normalization/pushdown_predicates.rs +++ b/src/optimizer/rule/normalization/pushdown_predicates.rs @@ -10,8 +10,11 @@ use crate::planner::operator::filter::FilterOperator; use crate::planner::operator::join::JoinType; use crate::planner::operator::Operator; use crate::types::index::{IndexInfo, IndexMetaRef, IndexType}; +use crate::types::value::DataValue; use crate::types::LogicalType; use itertools::Itertools; +use std::mem; +use std::ops::Bound; use std::sync::LazyLock; static PUSH_PREDICATE_THROUGH_JOIN: LazyLock = LazyLock::new(|| Pattern { @@ -262,12 +265,30 @@ impl PushPredicateIntoScan { res = range.combining_eqs(&eq_ranges); } } - res.and_then(|range| { + res.map(|range| { if range.only_eq() && apply_column_count != meta.column_ids.len() { - None - } else { - Some(range) + fn eq_to_scope(range: Range) -> Range { + match range { + Range::Eq(DataValue::Tuple(Some((values, _)))) => { + let min = + Bound::Included(DataValue::Tuple(Some((values.clone(), false)))); + let max = Bound::Excluded(DataValue::Tuple(Some((values, true)))); + + Range::Scope { min, max } + } + Range::SortedRanges(mut ranges) => { + for range in ranges.iter_mut() { + let tmp = mem::replace(range, Range::Dummy); + *range = eq_to_scope(tmp); + } + Range::SortedRanges(ranges) + } + range => range, + } + } + return eq_to_scope(range); } + range }) } } diff --git a/src/serdes/data_value.rs b/src/serdes/data_value.rs index 872446de..9fd7647c 100644 --- a/src/serdes/data_value.rs +++ b/src/serdes/data_value.rs @@ -20,12 +20,13 @@ impl DataValue { if let DataValue::Tuple(values) = self { match values { None => writer.write_all(&[0u8])?, - Some(values) => { + Some((values, is_upper)) => { writer.write_all(&[1u8])?; writer.write_all(&(values.len() as u32).to_le_bytes())?; for value in values.iter() { value.inner_encode(writer, &value.logical_type())? } + writer.write_all(&[if *is_upper { 1u8 } else { 0u8 }])?; } } @@ -65,7 +66,9 @@ impl DataValue { for ty in types.iter() { vec.push(Self::inner_decode(reader, ty)?); } - Some(vec) + let mut bytes = [0u8]; + reader.read_exact(&mut bytes)?; + Some((vec, bytes[0] == 1)) } _ => unreachable!(), }; @@ -135,10 +138,10 @@ pub(crate) mod test { unit: CharLengthUnits::Characters, }; let source_4 = DataValue::Tuple(None); - let source_5 = DataValue::Tuple(Some(vec![ - DataValue::Int32(None), - DataValue::Int32(Some(42)), - ])); + let source_5 = DataValue::Tuple(Some(( + vec![DataValue::Int32(None), DataValue::Int32(Some(42))], + false, + ))); let mut reference_tables = ReferenceTables::new(); let mut bytes = Vec::new(); diff --git a/src/storage/mod.rs b/src/storage/mod.rs index a91891a0..8d697568 100644 --- a/src/storage/mod.rs +++ b/src/storage/mod.rs @@ -1,5 +1,5 @@ pub mod rocksdb; -mod table_codec; +pub(crate) mod table_codec; use crate::catalog::view::View; use crate::catalog::{ColumnCatalog, ColumnRef, TableCatalog, TableMeta, TableName}; @@ -18,9 +18,9 @@ use bytes::Bytes; use itertools::Itertools; use std::collections::{Bound, VecDeque}; use std::io::Cursor; +use std::mem; use std::ops::SubAssign; use std::sync::Arc; -use std::{mem, slice}; use ulid::Generator; pub(crate) type StatisticsMetaCache = SharedLruCache<(TableName, IndexId), StatisticsMeta>; @@ -628,7 +628,6 @@ trait IndexImpl { &self, params: &IndexImplParams, value: &DataValue, - is_upper: bool, ) -> Result, DatabaseError>; } @@ -666,17 +665,15 @@ struct IndexImplParams<'a, T: Transaction> { } impl IndexImplParams<'_, T> { - pub(crate) fn pk_ty(&self) -> &LogicalType { - &self.index_meta.pk_ty + pub(crate) fn value_ty(&self) -> &LogicalType { + &self.index_meta.value_ty } pub(crate) fn try_cast(&self, mut val: DataValue) -> Result { - let pk_ty = self.pk_ty(); + let value_ty = self.value_ty(); - if matches!(self.index_meta.ty, IndexType::PrimaryKey { .. }) - && &val.logical_type() != pk_ty - { - val = val.cast(pk_ty)?; + if &val.logical_type() != value_ty { + val = val.cast(value_ty)?; } Ok(val) } @@ -738,13 +735,12 @@ impl IndexImpl for IndexImplEnum { &self, params: &IndexImplParams, value: &DataValue, - is_upper: bool, ) -> Result, DatabaseError> { match self { - IndexImplEnum::PrimaryKey(inner) => inner.bound_key(params, value, is_upper), - IndexImplEnum::Unique(inner) => inner.bound_key(params, value, is_upper), - IndexImplEnum::Normal(inner) => inner.bound_key(params, value, is_upper), - IndexImplEnum::Composite(inner) => inner.bound_key(params, value, is_upper), + IndexImplEnum::PrimaryKey(inner) => inner.bound_key(params, value), + IndexImplEnum::Unique(inner) => inner.bound_key(params, value), + IndexImplEnum::Normal(inner) => inner.bound_key(params, value), + IndexImplEnum::Composite(inner) => inner.bound_key(params, value), } } } @@ -790,7 +786,6 @@ impl IndexImpl for PrimaryKeyIndexImpl { &self, params: &IndexImplParams, val: &DataValue, - _: bool, ) -> Result, DatabaseError> { TableCodec::encode_tuple_key(params.table_name, val) } @@ -823,7 +818,7 @@ impl IndexImpl for UniqueIndexImpl { id_builder: &mut TupleIdBuilder, params: &IndexImplParams<'a, T>, ) -> Result, DatabaseError> { - let Some(bytes) = params.tx.get(&self.bound_key(params, value, false)?)? else { + let Some(bytes) = params.tx.get(&self.bound_key(params, value)?)? else { return Ok(IndexResult::Tuple(None)); }; let tuple_id = TableCodec::decode_index(&bytes, ¶ms.index_meta.pk_ty)?; @@ -837,13 +832,8 @@ impl IndexImpl for UniqueIndexImpl { &self, params: &IndexImplParams, value: &DataValue, - _: bool, ) -> Result, DatabaseError> { - let index = Index::new( - params.index_meta.id, - slice::from_ref(value), - IndexType::Unique, - ); + let index = Index::new(params.index_meta.id, value, IndexType::Unique); TableCodec::encode_index_key(params.table_name, &index, None) } @@ -865,8 +855,8 @@ impl IndexImpl for NormalIndexImpl { _: &mut TupleIdBuilder, params: &IndexImplParams<'a, T>, ) -> Result, DatabaseError> { - let min = self.bound_key(params, value, false)?; - let max = self.bound_key(params, value, true)?; + let min = self.bound_key(params, value)?; + let max = self.bound_key(params, value)?; let iter = params.tx.range( Bound::Included(min.as_slice()), @@ -879,15 +869,10 @@ impl IndexImpl for NormalIndexImpl { &self, params: &IndexImplParams, value: &DataValue, - is_upper: bool, ) -> Result, DatabaseError> { - let index = Index::new( - params.index_meta.id, - slice::from_ref(value), - IndexType::Normal, - ); + let index = Index::new(params.index_meta.id, value, IndexType::Normal); - TableCodec::encode_index_bound_key(params.table_name, &index, is_upper) + TableCodec::encode_index_bound_key(params.table_name, &index) } } @@ -907,8 +892,8 @@ impl IndexImpl for CompositeIndexImpl { _: &mut TupleIdBuilder, params: &IndexImplParams<'a, T>, ) -> Result, DatabaseError> { - let min = self.bound_key(params, value, false)?; - let max = self.bound_key(params, value, true)?; + let min = self.bound_key(params, value)?; + let max = self.bound_key(params, value)?; let iter = params.tx.range( Bound::Included(min.as_slice()), @@ -921,16 +906,10 @@ impl IndexImpl for CompositeIndexImpl { &self, params: &IndexImplParams, value: &DataValue, - is_upper: bool, ) -> Result, DatabaseError> { - let values = if let DataValue::Tuple(Some(values)) = &value { - values.as_slice() - } else { - slice::from_ref(value) - }; - let index = Index::new(params.index_meta.id, values, IndexType::Composite); + let index = Index::new(params.index_meta.id, value, IndexType::Composite); - TableCodec::encode_index_bound_key(params.table_name, &index, is_upper) + TableCodec::encode_index_bound_key(params.table_name, &index) } } @@ -1042,30 +1021,21 @@ impl Iter for IndexIter<'_, T> { Range::Scope { min, max } => { let table_name = self.params.table_name; let index_meta = &self.params.index_meta; - let bound_encode = - |bound: Bound, is_upper: bool| -> Result<_, DatabaseError> { - match bound { - Bound::Included(mut val) => { - val = self.params.try_cast(val)?; - - Ok(Bound::Included(self.inner.bound_key( - &self.params, - &val, - is_upper, - )?)) - } - Bound::Excluded(mut val) => { - val = self.params.try_cast(val)?; - - Ok(Bound::Excluded(self.inner.bound_key( - &self.params, - &val, - is_upper, - )?)) - } - Bound::Unbounded => Ok(Bound::Unbounded), + let bound_encode = |bound: Bound| -> Result<_, DatabaseError> { + match bound { + Bound::Included(mut val) => { + val = self.params.try_cast(val)?; + + Ok(Bound::Included(self.inner.bound_key(&self.params, &val)?)) } - }; + Bound::Excluded(mut val) => { + val = self.params.try_cast(val)?; + + Ok(Bound::Excluded(self.inner.bound_key(&self.params, &val)?)) + } + Bound::Unbounded => Ok(Bound::Unbounded), + } + }; let (bound_min, bound_max) = if matches!(index_meta.ty, IndexType::PrimaryKey { .. }) { TableCodec::tuple_bound(table_name) @@ -1078,10 +1048,10 @@ impl Iter for IndexIter<'_, T> { } }; - let mut encode_min = bound_encode(min, false)?; + let mut encode_min = bound_encode(min)?; check_bound(&mut encode_min, bound_min); - let mut encode_max = bound_encode(max, true)?; + let mut encode_max = bound_encode(max)?; check_bound(&mut encode_max, bound_max); let iter = self.params.tx.range( @@ -1142,7 +1112,6 @@ mod test { use crate::utils::lru::SharedLruCache; use std::collections::Bound; use std::hash::RandomState; - use std::slice; use std::sync::Arc; use tempfile::TempDir; @@ -1473,6 +1442,7 @@ mod test { column_ids: vec![index_column_id], table_name: Arc::new("t1".to_string()), pk_ty: LogicalType::Integer, + value_ty: LogicalType::Integer, name: "i1".to_string(), ty: IndexType::Normal, }), @@ -1506,15 +1476,15 @@ mod test { let indexes = vec![ ( Arc::new(DataValue::Int32(Some(0))), - Index::new(1, slice::from_ref(&tuples[0].values[2]), IndexType::Normal), + Index::new(1, &tuples[0].values[2], IndexType::Normal), ), ( Arc::new(DataValue::Int32(Some(1))), - Index::new(1, slice::from_ref(&tuples[1].values[2]), IndexType::Normal), + Index::new(1, &tuples[1].values[2], IndexType::Normal), ), ( Arc::new(DataValue::Int32(Some(2))), - Index::new(1, slice::from_ref(&tuples[2].values[2]), IndexType::Normal), + Index::new(1, &tuples[2].values[2], IndexType::Normal), ), ]; for (tuple_id, index) in indexes.iter().cloned() { diff --git a/src/storage/rocksdb.rs b/src/storage/rocksdb.rs index 59a71e6c..7bd51e32 100644 --- a/src/storage/rocksdb.rs +++ b/src/storage/rocksdb.rs @@ -109,7 +109,9 @@ impl InnerIter for RocksIter<'_, '_> { for result in self.iter.by_ref() { let (key, value) = result?; let upper_bound_check = match &self.upper { - Bound::Included(ref upper) => key.as_ref() <= upper.as_slice(), + Bound::Included(ref upper) => { + key.as_ref() <= upper.as_slice() || key.starts_with(upper.as_slice()) + } Bound::Excluded(ref upper) => key.as_ref() < upper.as_slice(), Bound::Unbounded => true, }; @@ -257,6 +259,7 @@ mod test { column_ids: vec![*a_column_id], table_name, pk_ty: LogicalType::Integer, + value_ty: LogicalType::Integer, name: "pk_a".to_string(), ty: IndexType::PrimaryKey { is_multiple: false }, }), diff --git a/src/storage/table_codec.rs b/src/storage/table_codec.rs index a05f268d..63bf7ae8 100644 --- a/src/storage/table_codec.rs +++ b/src/storage/table_codec.rs @@ -12,8 +12,8 @@ use bytes::Bytes; use std::io::{Cursor, Read, Seek, SeekFrom, Write}; use std::sync::LazyLock; -const BOUND_MIN_TAG: u8 = 0; -const BOUND_MAX_TAG: u8 = 1; +pub(crate) const BOUND_MIN_TAG: u8 = u8::MIN; +pub(crate) const BOUND_MAX_TAG: u8 = u8::MAX; static ROOT_BYTES: LazyLock> = LazyLock::new(|| b"Root".to_vec()); static VIEW_BYTES: LazyLock> = LazyLock::new(|| b"View".to_vec()); @@ -42,7 +42,7 @@ impl TableCodec { return Err(DatabaseError::NotNull); } - if let DataValue::Tuple(Some(values)) = &value { + if let DataValue::Tuple(Some((values, _))) = &value { for value in values { Self::check_primary_key(value, indentation + 1)? } @@ -304,31 +304,14 @@ impl TableCodec { Ok((Bytes::from(key), Bytes::from(bytes))) } - fn _encode_index_key(name: &str, index: &Index) -> Result, DatabaseError> { + pub fn encode_index_bound_key(name: &str, index: &Index) -> Result, DatabaseError> { let mut key_prefix = Self::key_prefix(CodecType::Index, name); key_prefix.push(BOUND_MIN_TAG); key_prefix.extend_from_slice(&index.id.to_be_bytes()); key_prefix.push(BOUND_MIN_TAG); - for col_v in index.column_values { - col_v.memcomparable_encode(&mut key_prefix)?; - key_prefix.push(BOUND_MIN_TAG); - } - Ok(key_prefix) - } - - pub fn encode_index_bound_key( - name: &str, - index: &Index, - is_upper: bool, - ) -> Result, DatabaseError> { - let mut key_prefix = Self::_encode_index_key(name, index)?; + index.value.memcomparable_encode(&mut key_prefix)?; - if is_upper { - if let Some(last) = key_prefix.last_mut() { - *last = BOUND_MAX_TAG - } - } Ok(key_prefix) } @@ -337,11 +320,11 @@ impl TableCodec { index: &Index, tuple_id: Option<&TupleId>, ) -> Result, DatabaseError> { - let mut key_prefix = Self::_encode_index_key(name, index)?; + let mut key_prefix = Self::encode_index_bound_key(name, index)?; if let Some(tuple_id) = tuple_id { if matches!(index.ty, IndexType::Normal | IndexType::Composite) { - tuple_id.inner_encode(&mut key_prefix, &tuple_id.logical_type())?; + tuple_id.memcomparable_encode(&mut key_prefix)?; } } Ok(key_prefix) @@ -505,7 +488,6 @@ mod tests { use std::collections::BTreeSet; use std::io::Cursor; use std::ops::Bound; - use std::slice; use std::sync::Arc; use ulid::Ulid; @@ -587,6 +569,7 @@ mod tests { column_ids: vec![Ulid::new()], table_name: Arc::new("T1".to_string()), pk_ty: LogicalType::Integer, + value_ty: LogicalType::Integer, name: "index_1".to_string(), ty: IndexType::PrimaryKey { is_multiple: false }, }; @@ -604,11 +587,7 @@ mod tests { fn test_table_codec_index() -> Result<(), DatabaseError> { let table_catalog = build_table_codec(); let value = Arc::new(DataValue::Int32(Some(0))); - let index = Index::new( - 0, - slice::from_ref(&value), - IndexType::PrimaryKey { is_multiple: false }, - ); + let index = Index::new(0, &value, IndexType::PrimaryKey { is_multiple: false }); let tuple_id = DataValue::Int32(Some(0)); let (_, bytes) = TableCodec::encode_index(&table_catalog.name, &index, &tuple_id)?; @@ -759,6 +738,7 @@ mod tests { column_ids: vec![], table_name: Arc::new(table_name.to_string()), pk_ty: LogicalType::Integer, + value_ty: LogicalType::Integer, name: format!("{}_index", index_id), ty: IndexType::PrimaryKey { is_multiple: false }, }; @@ -810,7 +790,7 @@ mod tests { let value = Arc::new(value); let index = Index::new( index_id as u32, - slice::from_ref(&value), + &value, IndexType::PrimaryKey { is_multiple: false }, ); @@ -866,7 +846,7 @@ mod tests { let value = Arc::new(value); let index = Index::new( index_id as u32, - slice::from_ref(&value), + &value, IndexType::PrimaryKey { is_multiple: false }, ); diff --git a/src/types/evaluator/tuple.rs b/src/types/evaluator/tuple.rs index 9421420d..57cc6935 100644 --- a/src/types/evaluator/tuple.rs +++ b/src/types/evaluator/tuple.rs @@ -17,7 +17,10 @@ pub struct TupleLtBinaryEvaluator; #[derive(Debug, PartialEq, Eq, Clone, Hash, Serialize, Deserialize)] pub struct TupleLtEqBinaryEvaluator; -fn tuple_cmp(v1: &[DataValue], v2: &[DataValue]) -> Option { +fn tuple_cmp( + (v1, v1_is_upper): &(Vec, bool), + (v2, v2_is_upper): &(Vec, bool), +) -> Option { let mut order = Ordering::Equal; let mut v1_iter = v1.iter(); let mut v2_iter = v2.iter(); @@ -25,8 +28,20 @@ fn tuple_cmp(v1: &[DataValue], v2: &[DataValue]) -> Option { while order == Ordering::Equal { order = match (v1_iter.next(), v2_iter.next()) { (Some(v1), Some(v2)) => v1.partial_cmp(v2)?, - (Some(_), None) => Ordering::Greater, - (None, Some(_)) => Ordering::Less, + (Some(_), None) => { + if *v2_is_upper { + Ordering::Less + } else { + Ordering::Greater + } + } + (None, Some(_)) => { + if *v1_is_upper { + Ordering::Greater + } else { + Ordering::Less + } + } (None, None) => break, } } diff --git a/src/types/index.rs b/src/types/index.rs index bc6b6d38..18dee32a 100644 --- a/src/types/index.rs +++ b/src/types/index.rs @@ -32,6 +32,7 @@ pub struct IndexMeta { pub column_ids: Vec, pub table_name: TableName, pub pk_ty: LogicalType, + pub value_ty: LogicalType, pub name: String, pub ty: IndexType, } @@ -57,17 +58,13 @@ impl IndexMeta { #[derive(Debug, Clone)] pub struct Index<'a> { pub id: IndexId, - pub column_values: &'a [DataValue], + pub value: &'a DataValue, pub ty: IndexType, } impl<'a> Index<'a> { - pub fn new(id: IndexId, column_values: &'a [DataValue], ty: IndexType) -> Self { - Index { - id, - column_values, - ty, - } + pub fn new(id: IndexId, value: &'a DataValue, ty: IndexType) -> Self { + Index { id, value, ty } } } diff --git a/src/types/tuple_builder.rs b/src/types/tuple_builder.rs index d8ecb5a5..7c557ad0 100644 --- a/src/types/tuple_builder.rs +++ b/src/types/tuple_builder.rs @@ -49,7 +49,7 @@ impl TupleIdBuilder { } self.tmp_keys.clear(); - DataValue::Tuple(Some(primary_keys)) + DataValue::Tuple(Some((primary_keys, false))) } }) } diff --git a/src/types/value.rs b/src/types/value.rs index 71733260..77fa2fe0 100644 --- a/src/types/value.rs +++ b/src/types/value.rs @@ -1,5 +1,6 @@ use super::LogicalType; use crate::errors::DatabaseError; +use crate::storage::table_codec::{BOUND_MAX_TAG, BOUND_MIN_TAG}; use chrono::format::{DelayedFormat, StrftimeItems}; use chrono::{DateTime, Datelike, NaiveDate, NaiveDateTime, NaiveTime, Timelike}; use itertools::Itertools; @@ -60,7 +61,8 @@ pub enum DataValue { Date64(Option), Time(Option), Decimal(Option), - Tuple(Option>), + /// (values, is_upper) + Tuple(Option<(Vec, bool)>), } macro_rules! generate_get_option { @@ -477,7 +479,7 @@ impl DataValue { LogicalType::Tuple(types) => { let values = types.iter().map(DataValue::init).collect_vec(); - DataValue::Tuple(Some(values)) + DataValue::Tuple(Some((values, false))) } } } @@ -722,7 +724,7 @@ impl DataValue { DataValue::Time(_) => LogicalType::Time, DataValue::Decimal(_) => LogicalType::Decimal(None, None), DataValue::Tuple(values) => { - if let Some(values) = values { + if let Some((values, _)) = values { let types = values.iter().map(|v| v.logical_type()).collect_vec(); LogicalType::Tuple(types) } else { @@ -820,10 +822,19 @@ impl DataValue { } DataValue::Null => (), DataValue::Decimal(Some(_v)) => todo!(), - DataValue::Tuple(Some(values)) => { + DataValue::Tuple(Some((values, is_upper))) => { for v in values.iter() { v.memcomparable_encode(b)?; - b.push(0u8); + if v.is_null() && *is_upper { + b.push(BOUND_MAX_TAG); + } else { + b.push(BOUND_MIN_TAG); + } + } + if *is_upper && !values.is_empty() { + if let Some(v) = b.last_mut() { + *v = BOUND_MAX_TAG + } } } value => { @@ -1385,13 +1396,13 @@ impl DataValue { _ => Err(DatabaseError::CastFail), }, DataValue::Tuple(values) => match to { - LogicalType::Tuple(types) => Ok(if let Some(mut values) = values { + LogicalType::Tuple(types) => Ok(if let Some((mut values, is_upper)) = values { for (i, value) in values.iter_mut().enumerate() { if types[i] != value.logical_type() { *value = mem::replace(value, DataValue::Null).cast(&types[i])?; } } - DataValue::Tuple(Some(values)) + DataValue::Tuple(Some((values, is_upper))) } else { DataValue::Tuple(None) }), @@ -1435,6 +1446,14 @@ impl DataValue { Some(0) } + pub(crate) fn values_to_tuple(mut values: Vec) -> Option { + if values.len() > 1 { + Some(DataValue::Tuple(Some((values, false)))) + } else { + values.pop() + } + } + fn decimal_round_i(option: &Option, decimal: &mut Decimal) { if let Some(scale) = option { let new_decimal = decimal.trunc_with_scale(*scale as u32); @@ -1628,7 +1647,7 @@ impl fmt::Display for DataValue { DataValue::Decimal(e) => format_option!(f, e.as_ref().map(DataValue::decimal_format))?, DataValue::Tuple(e) => { write!(f, "(")?; - if let Some(values) = e { + if let Some((values, _)) = e { let len = values.len(); for (i, value) in values.iter().enumerate() { @@ -1666,7 +1685,13 @@ impl fmt::Debug for DataValue { DataValue::Date64(_) => write!(f, "Date64({})", self), DataValue::Time(_) => write!(f, "Time({})", self), DataValue::Decimal(_) => write!(f, "Decimal({})", self), - DataValue::Tuple(_) => write!(f, "Tuple({})", self), + DataValue::Tuple(_) => { + write!(f, "Tuple({}", self)?; + if matches!(self, DataValue::Tuple(Some((_, true)))) { + write!(f, " [is_upper]")?; + } + write!(f, ")") + } } } } @@ -1765,28 +1790,37 @@ mod test { } #[test] - fn test_mem_comparable_tuple() -> Result<(), DatabaseError> { + fn test_mem_comparable_tuple_lower() -> Result<(), DatabaseError> { let mut key_tuple_1 = Vec::new(); let mut key_tuple_2 = Vec::new(); let mut key_tuple_3 = Vec::new(); - DataValue::Tuple(Some(vec![ - DataValue::Int8(None), - DataValue::Int8(Some(0)), - DataValue::Int8(Some(1)), - ])) + DataValue::Tuple(Some(( + vec![ + DataValue::Int8(None), + DataValue::Int8(Some(0)), + DataValue::Int8(Some(1)), + ], + false, + ))) .memcomparable_encode(&mut key_tuple_1)?; - DataValue::Tuple(Some(vec![ - DataValue::Int8(Some(0)), - DataValue::Int8(Some(0)), - DataValue::Int8(Some(1)), - ])) + DataValue::Tuple(Some(( + vec![ + DataValue::Int8(Some(0)), + DataValue::Int8(Some(0)), + DataValue::Int8(Some(1)), + ], + false, + ))) .memcomparable_encode(&mut key_tuple_2)?; - DataValue::Tuple(Some(vec![ - DataValue::Int8(Some(0)), - DataValue::Int8(Some(0)), - DataValue::Int8(Some(2)), - ])) + DataValue::Tuple(Some(( + vec![ + DataValue::Int8(Some(0)), + DataValue::Int8(Some(0)), + DataValue::Int8(Some(2)), + ], + false, + ))) .memcomparable_encode(&mut key_tuple_3)?; println!("{:?} < {:?}", key_tuple_1, key_tuple_2); @@ -1796,4 +1830,46 @@ mod test { Ok(()) } + + #[test] + fn test_mem_comparable_tuple_upper() -> Result<(), DatabaseError> { + let mut key_tuple_1 = Vec::new(); + let mut key_tuple_2 = Vec::new(); + let mut key_tuple_3 = Vec::new(); + + DataValue::Tuple(Some(( + vec![ + DataValue::Int8(None), + DataValue::Int8(Some(0)), + DataValue::Int8(Some(1)), + ], + true, + ))) + .memcomparable_encode(&mut key_tuple_1)?; + DataValue::Tuple(Some(( + vec![ + DataValue::Int8(Some(0)), + DataValue::Int8(Some(0)), + DataValue::Int8(Some(1)), + ], + true, + ))) + .memcomparable_encode(&mut key_tuple_2)?; + DataValue::Tuple(Some(( + vec![ + DataValue::Int8(Some(0)), + DataValue::Int8(Some(0)), + DataValue::Int8(Some(2)), + ], + true, + ))) + .memcomparable_encode(&mut key_tuple_3)?; + + println!("{:?} < {:?}", key_tuple_2, key_tuple_3); + println!("{:?} < {:?}", key_tuple_3, key_tuple_1); + assert!(key_tuple_2 < key_tuple_3); + assert!(key_tuple_3 < key_tuple_1); + + Ok(()) + } } diff --git a/tests/sqllogictest/Cargo.toml b/tests/sqllogictest/Cargo.toml index 5391b82f..0c21bcc3 100644 --- a/tests/sqllogictest/Cargo.toml +++ b/tests/sqllogictest/Cargo.toml @@ -4,8 +4,8 @@ version = "0.4.0" edition = "2021" [dependencies] +clap = { version = "4" } "fnck_sql" = { path = "../.." } glob = { version = "0.3" } sqllogictest = { version = "0.14" } -tempfile = { version = "3.10" } -clap = { version = "4" } \ No newline at end of file +tempfile = { version = "3.10" } \ No newline at end of file diff --git a/tpcc/src/README.md b/tpcc/src/README.md index df4a4313..3ea3f745 100644 --- a/tpcc/src/README.md +++ b/tpcc/src/README.md @@ -6,11 +6,11 @@ run `cargo run -p tpcc --release` to run tpcc - YMTC PC411-1024GB-B - Tips: TPCC currently only supports single thread ```shell -|New-Order| sc: 1182 lt: 0 fl: 13 -|Payment| sc: 1155 lt: 0 fl: 0 -|Order-Status| sc: 115 lt: 1 fl: 29 -|Delivery| sc: 114 lt: 2 fl: 0 -|Stock-Level| sc: 115 lt: 0 fl: 0 +|New-Order| sc: 80029 lt: 0 fl: 821 +|Payment| sc: 80005 lt: 0 fl: 0 +|Order-Status| sc: 8001 lt: 0 fl: 412 +|Delivery| sc: 8001 lt: 0 fl: 0 +|Stock-Level| sc: 8001 lt: 0 fl: 0 in 720 sec. (all must be [OK]) [transaction percentage] @@ -21,23 +21,24 @@ in 720 sec. [response time (at least 90%% passed)] New-Order: 100.0 [OK] Payment: 100.0 [OK] - Order-Status: 99.1 [OK] - Delivery: 98.3 [OK] + Order-Status: 100.0 [OK] + Delivery: 100.0 [OK] Stock-Level: 100.0 [OK] - New-Order Total: 1182 - Payment Total: 1155 - Order-Status Total: 116 - Delivery Total: 116 - Stock-Level Total: 115 + New-Order Total: 80029 + Payment Total: 80005 + Order-Status Total: 8001 + Delivery Total: 8001 + Stock-Level Total: 8001 <90th Percentile RT (MaxRT)> - New-Order : 0.003 (0.011) - Payment : 0.078 (0.470) -Order-Status : 0.227 (0.240) - Delivery : 5.439 (27.702) - Stock-Level : 0.001 (0.001) + New-Order : 0.003 (0.006) + Payment : 0.001 (0.003) +Order-Status : 0.062 (0.188) + Delivery : 0.022 (0.052) + Stock-Level : 0.004 (0.006) -98 Tpmc +6669 Tpmc + ``` ## Refer to diff --git a/tpcc/src/load.rs b/tpcc/src/load.rs index b1aee932..90a44d5e 100644 --- a/tpcc/src/load.rs +++ b/tpcc/src/load.rs @@ -340,15 +340,18 @@ impl Load { ol_dist_info char(24), PRIMARY KEY(ol_w_id, ol_d_id, ol_o_id, ol_number) );", )?; + let _ = + db.run("CREATE INDEX fkey_order_line_1 ON order_line (ol_o_id, ol_d_id, ol_w_id);")?; let _ = db.run("CREATE INDEX fkey_order_line_2 ON order_line (ol_supply_w_id,ol_i_id);")?; for w_id in 1..num_ware + 1 { for d_id in 1..DIST_PER_WARE + 1 { Self::load_orders(rng, db, d_id, w_id)?; } } - println!("[Analyze Table: orders & order_line]"); + println!("[Analyze Table: orders & order_line & new_order]"); let _ = db.run("analyze table orders")?; let _ = db.run("analyze table order_line")?; + let _ = db.run("analyze table new_orders")?; Ok(()) } diff --git a/tpcc/src/main.rs b/tpcc/src/main.rs index 87282036..c539dfcb 100644 --- a/tpcc/src/main.rs +++ b/tpcc/src/main.rs @@ -13,6 +13,7 @@ use fnck_sql::storage::Storage; use rand::prelude::ThreadRng; use rand::Rng; use std::time::{Duration, Instant}; +use fnck_sql::types::tuple::create_table; mod delivery; mod load; @@ -193,10 +194,10 @@ fn main() -> Result<(), TpccError> { if !is_succeed { return Err(TpccError::MaxRetry); } - if round_count != 0 && round_count % 8 == 0 { + if round_count != 0 && round_count % 100 == 0 { println!( "[TPCC CheckPoint {} on round {round_count}][{}]: 90th Percentile RT: {:.3}", - round_count / 4, + round_count / 100, tpcc_test.name(), rt_hist.hist_ckp(i) ); @@ -317,3 +318,217 @@ pub enum TpccError { #[error("maximum retries reached")] MaxRetry, } + +#[test] +fn debug_tpcc() -> Result<(), DatabaseError> { + let database = DataBaseBuilder::path("./fnck_sql_tpcc").build()?; + let mut tx = database.new_transaction()?; + + let (_, customer_tuples) = + tx.run("SELECT c_w_id, c_d_id, c_id, c_last, c_balance, c_data FROM customer limit 1")?; + let (_, district_tuples) = tx.run("SELECT d_id, d_w_id, d_next_o_id FROM district limit 1")?; + let (_, item_tuples) = tx.run("SELECT i_id FROM item limit 1")?; + let (_, stock_tuples) = tx.run("SELECT s_i_id, s_w_id, s_quantity FROM stock limit 1")?; + let (_, orders_tuples) = + tx.run("SELECT o_w_id, o_d_id, o_c_id, o_id, o_carrier_id FROM orders limit 1")?; + let (_, order_line_tuples) = + tx.run("SELECT ol_w_id, ol_d_id, ol_o_id, ol_delivery_d FROM order_line limit 1")?; + let (_, new_order_tuples) = + tx.run("SELECT no_d_id, no_w_id, no_o_id FROM new_orders limit 1")?; + + let c_w_id = customer_tuples[0].values[0].clone(); + let c_d_id = customer_tuples[0].values[1].clone(); + let c_id = customer_tuples[0].values[2].clone(); + let c_last = customer_tuples[0].values[3].clone(); + let c_balance = customer_tuples[0].values[4].clone(); + let c_data = customer_tuples[0].values[5].clone(); + + let d_id = district_tuples[0].values[0].clone(); + let d_w_id = district_tuples[0].values[1].clone(); + let d_next_o_id = district_tuples[0].values[2].clone(); + + let i_id = item_tuples[0].values[0].clone(); + + let s_i_id = stock_tuples[0].values[0].clone(); + let s_w_id = stock_tuples[0].values[1].clone(); + let s_quantity = stock_tuples[0].values[2].clone(); + + let o_w_id = orders_tuples[0].values[0].clone(); + let o_d_id = orders_tuples[0].values[1].clone(); + let o_c_id = orders_tuples[0].values[2].clone(); + let o_id = orders_tuples[0].values[3].clone(); + let o_carrier_id = orders_tuples[0].values[4].clone(); + + let ol_w_id = order_line_tuples[0].values[0].clone(); + let ol_d_id = order_line_tuples[0].values[1].clone(); + let ol_o_id = order_line_tuples[0].values[2].clone(); + let ol_delivery_d = order_line_tuples[0].values[3].clone(); + + let no_d_id = new_order_tuples[0].values[0].clone(); + let no_w_id = new_order_tuples[0].values[1].clone(); + let no_o_id = new_order_tuples[0].values[2].clone(); + // ORDER + { + println!("========Explain on Order"); + { + println!("{}", format!("explain SELECT c_discount, c_last, c_credit FROM customer WHERE c_w_id = {} AND c_d_id = {} AND c_id = {}", c_w_id, c_d_id, c_id)); + let (schema, tuples) = tx.run(format!("explain SELECT c_discount, c_last, c_credit FROM customer WHERE c_w_id = {} AND c_d_id = {} AND c_id = {}", c_w_id, c_d_id, c_id))?; + println!("{}", create_table(&schema, &tuples)); + } + { + let (schema, tuples) = tx.run(format!( + "explain SELECT d_next_o_id, d_tax FROM district WHERE d_id = {} AND d_w_id = {}", + d_id, d_w_id + ))?; + println!("{}", create_table(&schema, &tuples)); + } + { + let (schema, tuples) = tx.run(format!( + "explain UPDATE district SET d_next_o_id = {} + 1 WHERE d_id = {} AND d_w_id = {}", + d_next_o_id, d_id, d_w_id + ))?; + println!("{}", create_table(&schema, &tuples)); + } + { + let (schema, tuples) = tx.run(format!( + "explain SELECT i_price, i_name, i_data FROM item WHERE i_id = {}", + i_id + ))?; + println!("{}", create_table(&schema, &tuples)); + } + { + let (schema, tuples) = tx.run(format!("explain SELECT s_quantity, s_data, s_dist_01, s_dist_02, s_dist_03, s_dist_04, s_dist_05, s_dist_06, s_dist_07, s_dist_08, s_dist_09, s_dist_10 FROM stock WHERE s_i_id = {} AND s_w_id = {}", s_i_id, s_w_id))?; + println!("{}", create_table(&schema, &tuples)); + } + { + let (schema, tuples) = tx.run(format!( + "explain UPDATE stock SET s_quantity = {} WHERE s_i_id = {} AND s_w_id = {}", + s_quantity, s_i_id, s_w_id + ))?; + println!("{}", create_table(&schema, &tuples)); + } + } + + // Payment + { + println!("========Explain on Payment"); + { + let (schema, tuples) = tx.run(format!( + "explain UPDATE stock SET s_quantity = {} WHERE s_i_id = {} AND s_w_id = {}", + s_quantity, s_i_id, s_w_id + ))?; + println!("{}", create_table(&schema, &tuples)); + } + { + let (schema, tuples) = tx.run(format!("explain SELECT d_street_1, d_street_2, d_city, d_state, d_zip, d_name FROM district WHERE d_w_id = {} AND d_id = {}", d_w_id, d_id))?; + println!("{}", create_table(&schema, &tuples)); + } + { + let (schema, tuples) = tx.run(format!("explain SELECT count(c_id) FROM customer WHERE c_w_id = {} AND c_d_id = {} AND c_last = '{}'", c_w_id, c_d_id, c_last))?; + println!("{}", create_table(&schema, &tuples)); + } + { + let (schema, tuples) = tx.run(format!("explain SELECT c_id FROM customer WHERE c_w_id = {} AND c_d_id = {} AND c_last = '{}' ORDER BY c_first", c_w_id, c_d_id, c_last))?; + println!("{}", create_table(&schema, &tuples)); + } + { + let (schema, tuples) = tx.run(format!("explain SELECT c_first, c_middle, c_last, c_street_1, c_street_2, c_city, c_state, c_zip, c_phone, c_credit, c_credit_lim, c_discount, c_balance, c_since FROM customer WHERE c_w_id = {} AND c_d_id = {} AND c_id = {}", c_w_id, c_d_id, c_id))?; + println!("{}", create_table(&schema, &tuples)); + } + { + let (schema, tuples) = tx.run(format!("explain SELECT c_data FROM customer WHERE c_w_id = {} AND c_d_id = {} AND c_id = {}", c_w_id, c_d_id, c_id))?; + println!("{}", create_table(&schema, &tuples)); + } + { + let (schema, tuples) = tx.run(format!("explain UPDATE customer SET c_balance = {}, c_data = '{}' WHERE c_w_id = {} AND c_d_id = {} AND c_id = {}", c_balance, c_data, c_w_id, c_d_id, c_id))?; + println!("{}", create_table(&schema, &tuples)); + } + { + let (schema, tuples) = tx.run(format!("explain UPDATE customer SET c_balance = {} WHERE c_w_id = {} AND c_d_id = {} AND c_id = {}", c_balance, c_w_id, c_d_id, c_id))?; + println!("{}", create_table(&schema, &tuples)); + } + } + + // Order-Stat + { + println!("========Explain on Order-Stat"); + { + let (schema, tuples) = tx.run(format!("explain SELECT count(c_id) FROM customer WHERE c_w_id = {} AND c_d_id = {} AND c_last = '{}'", c_w_id, c_d_id, c_last))?; + println!("{}", create_table(&schema, &tuples)); + } + { + let (schema, tuples) = tx.run(format!("explain SELECT c_balance, c_first, c_middle, c_last FROM customer WHERE c_w_id = {} AND c_d_id = {} AND c_last = '{}' ORDER BY c_first", c_w_id, c_d_id, c_last))?; + println!("{}", create_table(&schema, &tuples)); + } + { + let (schema, tuples) = tx.run(format!("explain SELECT c_balance, c_first, c_middle, c_last FROM customer WHERE c_w_id = {} AND c_d_id = {} AND c_id = {}", c_w_id, c_d_id, c_id))?; + println!("{}", create_table(&schema, &tuples)); + } + { + let (schema, tuples) = tx.run(format!("explain SELECT o_id, o_entry_d, COALESCE(o_carrier_id,0) FROM orders WHERE o_w_id = {} AND o_d_id = {} AND o_c_id = {} AND o_id = (SELECT MAX(o_id) FROM orders WHERE o_w_id = {} AND o_d_id = {} AND o_c_id = {})", o_w_id, o_d_id, o_c_id, o_w_id, o_d_id, o_c_id))?; + println!("{}", create_table(&schema, &tuples)); + } + { + let (schema, tuples) = tx.run(format!("explain SELECT ol_i_id, ol_supply_w_id, ol_quantity, ol_amount, ol_delivery_d FROM order_line WHERE ol_w_id = {} AND ol_d_id = {} AND ol_o_id = {}", ol_w_id, ol_d_id, ol_o_id))?; + println!("{}", create_table(&schema, &tuples)); + } + } + + // Deliver + { + println!("========Explain on Deliver"); + { + let (schema, tuples) = tx.run(format!("explain SELECT COALESCE(MIN(no_o_id),0) FROM new_orders WHERE no_d_id = {} AND no_w_id = {}", no_d_id, no_w_id))?; + println!("{}", create_table(&schema, &tuples)); + } + { + let (schema, tuples) = tx.run(format!("explain DELETE FROM new_orders WHERE no_o_id = {} AND no_d_id = {} AND no_w_id = {}", no_o_id, no_d_id, no_w_id))?; + println!("{}", create_table(&schema, &tuples)); + } + { + let (schema, tuples) = tx.run(format!( + "explain SELECT o_c_id FROM orders WHERE o_id = {} AND o_d_id = {} AND o_w_id = {}", + o_id, o_d_id, o_w_id + ))?; + println!("{}", create_table(&schema, &tuples)); + } + { + let (schema, tuples) = tx.run(format!("explain UPDATE orders SET o_carrier_id = {} WHERE o_id = {} AND o_d_id = {} AND o_w_id = {}", o_carrier_id, o_id, o_d_id, o_w_id))?; + println!("{}", create_table(&schema, &tuples)); + } + { + let (schema, tuples) = tx.run(format!("explain UPDATE order_line SET ol_delivery_d = '{}' WHERE ol_o_id = {} AND ol_d_id = {} AND ol_w_id = {}", ol_delivery_d, ol_o_id, ol_d_id, ol_w_id))?; + println!("{}", create_table(&schema, &tuples)); + } + { + let (schema, tuples) = tx.run(format!("explain SELECT SUM(ol_amount) FROM order_line WHERE ol_o_id = {} AND ol_d_id = {} AND ol_w_id = {}", ol_o_id, ol_d_id, ol_w_id))?; + println!("{}", create_table(&schema, &tuples)); + } + { + let (schema, tuples) = tx.run(format!("explain UPDATE customer SET c_balance = c_balance + 1 , c_delivery_cnt = c_delivery_cnt + 1 WHERE c_id = {} AND c_d_id = {} AND c_w_id = {}", c_id, c_d_id, c_w_id))?; + println!("{}", create_table(&schema, &tuples)); + } + } + + // Stock-Level + { + println!("========Explain on Stock-Level"); + { + let (schema, tuples) = tx.run(format!( + "explain SELECT d_next_o_id FROM district WHERE d_id = {} AND d_w_id = {}", + d_id, d_w_id + ))?; + println!("{}", create_table(&schema, &tuples)); + } + { + let (schema, tuples) = tx.run(format!("explain SELECT DISTINCT ol_i_id FROM order_line WHERE ol_w_id = {} AND ol_d_id = {} AND ol_o_id < {} AND ol_o_id >= ({} - 20)", ol_w_id, ol_d_id, ol_o_id, ol_o_id))?; + println!("{}", create_table(&schema, &tuples)); + } + { + let (schema, tuples) = tx.run(format!("explain SELECT count(*) FROM stock WHERE s_w_id = {} AND s_i_id = {} AND s_quantity < {}", s_w_id, s_i_id, s_quantity))?; + println!("{}", create_table(&schema, &tuples)); + } + } + + Ok(()) +} From 47b107a86e780e6f657f638034a484ec1d2d634c Mon Sep 17 00:00:00 2001 From: Kould Date: Thu, 5 Dec 2024 02:37:40 +0800 Subject: [PATCH 2/2] chore: make `IndexIter` stateful to enhance readability --- src/expression/range_detacher.rs | 24 ++- .../rule/normalization/pushdown_predicates.rs | 2 +- src/storage/mod.rs | 184 ++++++++++-------- src/storage/rocksdb.rs | 12 +- src/types/value.rs | 13 +- tpcc/src/main.rs | 4 +- 6 files changed, 136 insertions(+), 103 deletions(-) diff --git a/src/expression/range_detacher.rs b/src/expression/range_detacher.rs index 135f8620..9c77c719 100644 --- a/src/expression/range_detacher.rs +++ b/src/expression/range_detacher.rs @@ -1922,7 +1922,23 @@ mod test { Range::Eq(DataValue::Int32(Some(7))), Range::Eq(DataValue::Int32(Some(10))), ]); - println!("{:#?}", range); + assert_eq!( + range, + Some(Range::Scope { + min: Bound::Included(DataValue::Tuple(Some(( + vec![ + DataValue::Int32(Some(7)), + DataValue::Int32(Some(10)), + DataValue::Int32(Some(2)) + ], + false + )))), + max: Bound::Excluded(DataValue::Tuple(Some(( + vec![DataValue::Int32(Some(7)), DataValue::Int32(Some(10))], + true + )))), + }) + ); let Range::Scope { min: Bound::Included(min), max: Bound::Excluded(max), @@ -1930,7 +1946,9 @@ mod test { else { unreachable!() }; - let evaluator = TupleLtBinaryEvaluator; - println!("{:#?}", evaluator.binary_eval(&min, &max)); + assert_eq!( + TupleLtBinaryEvaluator.binary_eval(&min, &max), + DataValue::Boolean(Some(true)) + ) } } diff --git a/src/optimizer/rule/normalization/pushdown_predicates.rs b/src/optimizer/rule/normalization/pushdown_predicates.rs index b6334776..1fcef798 100644 --- a/src/optimizer/rule/normalization/pushdown_predicates.rs +++ b/src/optimizer/rule/normalization/pushdown_predicates.rs @@ -271,7 +271,7 @@ impl PushPredicateIntoScan { match range { Range::Eq(DataValue::Tuple(Some((values, _)))) => { let min = - Bound::Included(DataValue::Tuple(Some((values.clone(), false)))); + Bound::Excluded(DataValue::Tuple(Some((values.clone(), false)))); let max = Bound::Excluded(DataValue::Tuple(Some((values, true)))); Range::Scope { min, max } diff --git a/src/storage/mod.rs b/src/storage/mod.rs index 8d697568..a9b560a3 100644 --- a/src/storage/mod.rs +++ b/src/storage/mod.rs @@ -16,11 +16,12 @@ use crate::types::{ColumnId, LogicalType}; use crate::utils::lru::SharedLruCache; use bytes::Bytes; use itertools::Itertools; -use std::collections::{Bound, VecDeque}; +use std::collections::Bound; use std::io::Cursor; use std::mem; use std::ops::SubAssign; use std::sync::Arc; +use std::vec::IntoIter; use ulid::Generator; pub(crate) type StatisticsMetaCache = SharedLruCache<(TableName, IndexId), StatisticsMeta>; @@ -127,8 +128,8 @@ pub trait Transaction: Sized { tx: self, }, inner, - ranges: VecDeque::from(ranges), - scope_iter: None, + ranges: ranges.into_iter(), + state: IndexIterState::Init, }) } @@ -965,8 +966,14 @@ pub struct IndexIter<'a, T: Transaction> { params: IndexImplParams<'a, T>, inner: IndexImplEnum, // for buffering data - ranges: VecDeque, - scope_iter: Option>, + ranges: IntoIter, + state: IndexIterState<'a, T>, +} + +pub enum IndexIterState<'a, T: Transaction + 'a> { + Init, + Range(T::IterType<'a>), + Over, } impl<'a, T: Transaction + 'a> IndexIter<'a, T> { @@ -985,102 +992,111 @@ impl<'a, T: Transaction + 'a> IndexIter<'a, T> { num.sub_assign(1); } } - - fn is_empty(&self) -> bool { - self.scope_iter.is_none() && self.ranges.is_empty() - } } /// expression -> index value -> tuple impl Iter for IndexIter<'_, T> { fn next_tuple(&mut self) -> Result, DatabaseError> { - if matches!(self.limit, Some(0)) || self.is_empty() { - self.scope_iter = None; - self.ranges.clear(); + if matches!(self.limit, Some(0)) { + self.state = IndexIterState::Over; return Ok(None); } - if let Some(iter) = &mut self.scope_iter { - while let Some((_, bytes)) = iter.try_next()? { - if Self::offset_move(&mut self.offset) { - continue; - } - Self::limit_sub(&mut self.limit); - let tuple = self - .inner - .index_lookup(&bytes, &mut self.id_builder, &self.params)?; - - return Ok(Some(tuple)); - } - self.scope_iter = None; - } - - if let Some(binary) = self.ranges.pop_front() { - match binary { - Range::Scope { min, max } => { - let table_name = self.params.table_name; - let index_meta = &self.params.index_meta; - let bound_encode = |bound: Bound| -> Result<_, DatabaseError> { - match bound { - Bound::Included(mut val) => { - val = self.params.try_cast(val)?; - - Ok(Bound::Included(self.inner.bound_key(&self.params, &val)?)) - } - Bound::Excluded(mut val) => { - val = self.params.try_cast(val)?; - - Ok(Bound::Excluded(self.inner.bound_key(&self.params, &val)?)) - } - Bound::Unbounded => Ok(Bound::Unbounded), - } + loop { + match &mut self.state { + IndexIterState::Init => { + let Some(binary) = self.ranges.next() else { + self.state = IndexIterState::Over; + continue; }; - let (bound_min, bound_max) = - if matches!(index_meta.ty, IndexType::PrimaryKey { .. }) { - TableCodec::tuple_bound(table_name) - } else { - TableCodec::index_bound(table_name, &index_meta.id)? - }; - let check_bound = |value: &mut Bound>, bound: Vec| { - if matches!(value, Bound::Unbounded) { - let _ = mem::replace(value, Bound::Included(bound)); + match binary { + Range::Scope { min, max } => { + let table_name = self.params.table_name; + let index_meta = &self.params.index_meta; + let bound_encode = + |bound: Bound| -> Result<_, DatabaseError> { + match bound { + Bound::Included(mut val) => { + val = self.params.try_cast(val)?; + + Ok(Bound::Included( + self.inner.bound_key(&self.params, &val)?, + )) + } + Bound::Excluded(mut val) => { + val = self.params.try_cast(val)?; + + Ok(Bound::Excluded( + self.inner.bound_key(&self.params, &val)?, + )) + } + Bound::Unbounded => Ok(Bound::Unbounded), + } + }; + let (bound_min, bound_max) = + if matches!(index_meta.ty, IndexType::PrimaryKey { .. }) { + TableCodec::tuple_bound(table_name) + } else { + TableCodec::index_bound(table_name, &index_meta.id)? + }; + let check_bound = |value: &mut Bound>, bound: Vec| { + if matches!(value, Bound::Unbounded) { + let _ = mem::replace(value, Bound::Included(bound)); + } + }; + + let mut encode_min = bound_encode(min)?; + check_bound(&mut encode_min, bound_min); + + let mut encode_max = bound_encode(max)?; + check_bound(&mut encode_max, bound_max); + + let iter = self.params.tx.range( + encode_min.as_ref().map(Vec::as_slice), + encode_max.as_ref().map(Vec::as_slice), + )?; + self.state = IndexIterState::Range(iter); } - }; - - let mut encode_min = bound_encode(min)?; - check_bound(&mut encode_min, bound_min); - - let mut encode_max = bound_encode(max)?; - check_bound(&mut encode_max, bound_max); - - let iter = self.params.tx.range( - encode_min.as_ref().map(Vec::as_slice), - encode_max.as_ref().map(Vec::as_slice), - )?; - self.scope_iter = Some(iter); - } - Range::Eq(mut val) => { - val = self.params.try_cast(val)?; - - match self - .inner - .eq_to_res(&val, &mut self.id_builder, &self.params)? - { - IndexResult::Tuple(tuple) => { - if Self::offset_move(&mut self.offset) { - return self.next_tuple(); + Range::Eq(mut val) => { + val = self.params.try_cast(val)?; + + match self + .inner + .eq_to_res(&val, &mut self.id_builder, &self.params)? + { + IndexResult::Tuple(tuple) => { + if Self::offset_move(&mut self.offset) { + continue; + } + Self::limit_sub(&mut self.limit); + return Ok(tuple); + } + IndexResult::Scope(iter) => { + self.state = IndexIterState::Range(iter); + } } - Self::limit_sub(&mut self.limit); - return Ok(tuple); } - IndexResult::Scope(iter) => self.scope_iter = Some(iter), + _ => (), + } + } + IndexIterState::Range(iter) => { + while let Some((_, bytes)) = iter.try_next()? { + if Self::offset_move(&mut self.offset) { + continue; + } + Self::limit_sub(&mut self.limit); + let tuple = + self.inner + .index_lookup(&bytes, &mut self.id_builder, &self.params)?; + + return Ok(Some(tuple)); } + self.state = IndexIterState::Init; } - _ => (), + IndexIterState::Over => return Ok(None), } } - self.next_tuple() } } diff --git a/src/storage/rocksdb.rs b/src/storage/rocksdb.rs index 7bd51e32..552142da 100644 --- a/src/storage/rocksdb.rs +++ b/src/storage/rocksdb.rs @@ -139,7 +139,8 @@ mod test { use crate::expression::range_detacher::Range; use crate::storage::rocksdb::RocksStorage; use crate::storage::{ - IndexImplEnum, IndexImplParams, IndexIter, Iter, PrimaryKeyIndexImpl, Storage, Transaction, + IndexImplEnum, IndexImplParams, IndexIter, IndexIterState, Iter, PrimaryKeyIndexImpl, + Storage, Transaction, }; use crate::types::index::{IndexMeta, IndexType}; use crate::types::tuple::Tuple; @@ -148,7 +149,7 @@ mod test { use crate::types::LogicalType; use crate::utils::lru::SharedLruCache; use itertools::Itertools; - use std::collections::{Bound, VecDeque}; + use std::collections::Bound; use std::hash::RandomState; use std::sync::Arc; use tempfile::TempDir; @@ -267,14 +268,15 @@ mod test { table_types: table.types(), tx: &transaction, }, - ranges: VecDeque::from(vec![ + ranges: vec![ Range::Eq(DataValue::Int32(Some(0))), Range::Scope { min: Bound::Included(DataValue::Int32(Some(2))), max: Bound::Included(DataValue::Int32(Some(4))), }, - ]), - scope_iter: None, + ] + .into_iter(), + state: IndexIterState::Init, inner: IndexImplEnum::PrimaryKey(PrimaryKeyIndexImpl), }; let mut result = Vec::new(); diff --git a/src/types/value.rs b/src/types/value.rs index 77fa2fe0..78ff424a 100644 --- a/src/types/value.rs +++ b/src/types/value.rs @@ -823,19 +823,16 @@ impl DataValue { DataValue::Null => (), DataValue::Decimal(Some(_v)) => todo!(), DataValue::Tuple(Some((values, is_upper))) => { - for v in values.iter() { + let last = values.len() - 1; + + for (i, v) in values.iter().enumerate() { v.memcomparable_encode(b)?; - if v.is_null() && *is_upper { + if (v.is_null() || i == last) && *is_upper { b.push(BOUND_MAX_TAG); } else { b.push(BOUND_MIN_TAG); } } - if *is_upper && !values.is_empty() { - if let Some(v) = b.last_mut() { - *v = BOUND_MAX_TAG - } - } } value => { if !value.is_null() { @@ -1688,7 +1685,7 @@ impl fmt::Debug for DataValue { DataValue::Tuple(_) => { write!(f, "Tuple({}", self)?; if matches!(self, DataValue::Tuple(Some((_, true)))) { - write!(f, " [is_upper]")?; + write!(f, " [is upper]")?; } write!(f, ")") } diff --git a/tpcc/src/main.rs b/tpcc/src/main.rs index c539dfcb..4d753364 100644 --- a/tpcc/src/main.rs +++ b/tpcc/src/main.rs @@ -10,10 +10,10 @@ use clap::Parser; use fnck_sql::db::{DBTransaction, DataBaseBuilder, Statement}; use fnck_sql::errors::DatabaseError; use fnck_sql::storage::Storage; +use fnck_sql::types::tuple::create_table; use rand::prelude::ThreadRng; use rand::Rng; use std::time::{Duration, Instant}; -use fnck_sql::types::tuple::create_table; mod delivery; mod load; @@ -320,7 +320,7 @@ pub enum TpccError { } #[test] -fn debug_tpcc() -> Result<(), DatabaseError> { +fn explain_tpcc() -> Result<(), DatabaseError> { let database = DataBaseBuilder::path("./fnck_sql_tpcc").build()?; let mut tx = database.new_transaction()?;