From a7209662db3f57170fa1252d08eb3ec68e2fec30 Mon Sep 17 00:00:00 2001 From: Kould <2435992353@qq.com> Date: Wed, 6 Mar 2024 18:21:25 +0800 Subject: [PATCH] refactor: use RangeDetacher for Range inference to reduce redundant logic and avoid more potential bugs --- src/execution/volcano/dql/index_scan.rs | 13 +- src/execution/volcano/mod.rs | 4 +- src/expression/mod.rs | 1 + src/expression/range_detacher.rs | 1280 +++++++++++++++ src/expression/simplify.rs | 1367 +---------------- src/optimizer/core/cm_sketch.rs | 24 +- src/optimizer/core/column_meta.rs | 14 +- src/optimizer/core/histogram.rs | 70 +- src/optimizer/core/memo.rs | 10 +- src/optimizer/rule/implementation/dql/scan.rs | 6 +- .../rule/normalization/pushdown_predicates.rs | 25 +- .../rule/normalization/simplification.rs | 260 +--- src/planner/operator/scan.rs | 2 +- src/storage/kip.rs | 12 +- src/storage/mod.rs | 10 +- src/types/index.rs | 17 +- tests/slt/where_by_index.slt | 2 +- 17 files changed, 1460 insertions(+), 1657 deletions(-) create mode 100644 src/expression/range_detacher.rs diff --git a/src/execution/volcano/dql/index_scan.rs b/src/execution/volcano/dql/index_scan.rs index 1f2c0399..c2c28def 100644 --- a/src/execution/volcano/dql/index_scan.rs +++ b/src/execution/volcano/dql/index_scan.rs @@ -1,6 +1,6 @@ use crate::errors::DatabaseError; use crate::execution::volcano::{BoxedExecutor, ReadExecutor}; -use crate::expression::simplify::ConstantBinary; +use crate::expression::range_detacher::Range; use crate::planner::operator::scan::ScanOperator; use crate::storage::{Iter, Transaction}; use crate::types::index::IndexMetaRef; @@ -10,11 +10,16 @@ use futures_async_stream::try_stream; pub(crate) struct IndexScan { op: ScanOperator, index_by: IndexMetaRef, - ranges: Vec, + ranges: Vec, } -impl From<(ScanOperator, IndexMetaRef, Vec)> for IndexScan { - fn from((op, index_by, ranges): (ScanOperator, IndexMetaRef, Vec)) -> Self { +impl From<(ScanOperator, IndexMetaRef, Range)> for IndexScan { + fn from((op, index_by, range): (ScanOperator, IndexMetaRef, Range)) -> Self { + let ranges = match range { + Range::SortedRanges(ranges) => ranges, + range => vec![range], + }; + IndexScan { op, index_by, diff --git a/src/execution/volcano/mod.rs b/src/execution/volcano/mod.rs index 7d736ca9..6647bc1e 100644 --- a/src/execution/volcano/mod.rs +++ b/src/execution/volcano/mod.rs @@ -84,10 +84,10 @@ pub fn build_read(plan: LogicalPlan, transaction: &T) -> BoxedEx Operator::Scan(op) => { if let Some(PhysicalOption::IndexScan(IndexInfo { meta, - ranges: Some(ranges), + range: Some(range), })) = plan.physical_option { - IndexScan::from((op, meta, ranges)).execute(transaction) + IndexScan::from((op, meta, range)).execute(transaction) } else { SeqScan::from(op).execute(transaction) } diff --git a/src/expression/mod.rs b/src/expression/mod.rs index bcad26aa..179c2d5e 100644 --- a/src/expression/mod.rs +++ b/src/expression/mod.rs @@ -16,6 +16,7 @@ use crate::types::LogicalType; pub mod agg; mod evaluator; pub mod function; +pub mod range_detacher; pub mod simplify; pub mod value_compute; diff --git a/src/expression/range_detacher.rs b/src/expression/range_detacher.rs new file mode 100644 index 00000000..49f5da7f --- /dev/null +++ b/src/expression/range_detacher.rs @@ -0,0 +1,1280 @@ +use crate::catalog::ColumnRef; +use crate::errors::DatabaseError; +use crate::expression::{BinaryOperator, ScalarExpression}; +use crate::types::value::{ValueRef, NULL_VALUE}; +use crate::types::ColumnId; +use itertools::Itertools; +use std::cmp::Ordering; +use std::collections::Bound; +use std::fmt; +use std::fmt::Formatter; + +/// Used to represent binary relationships between fields and constants +/// Tips: The NotEq case is ignored because it makes expression composition very complex +/// - [`Range::Scope`]: +#[derive(Debug, PartialEq, Eq, Clone, Hash)] +pub enum Range { + Scope { + min: Bound, + max: Bound, + }, + Eq(ValueRef), + Dummy, + SortedRanges(Vec), +} + +pub struct RangeDetacher<'a> { + table_name: &'a str, + column_id: &'a ColumnId, +} + +impl<'a> RangeDetacher<'a> { + pub(crate) fn new(table_name: &'a str, column_id: &'a ColumnId) -> Self { + Self { + table_name, + column_id, + } + } + + pub(crate) fn detach( + &mut self, + expr: &ScalarExpression, + ) -> Result, DatabaseError> { + match expr { + ScalarExpression::Binary { + left_expr, + right_expr, + op, + .. + } => match (self.detach(left_expr)?, self.detach(right_expr)?) { + (Some(left_binary), Some(right_binary)) => { + Ok(Self::merge_binary(*op, left_binary, right_binary)) + } + (None, None) => { + if let (Some(col), Some(val)) = + (left_expr.unpack_col(false), right_expr.unpack_val()) + { + return Ok(self.new_range(*op, col, val, false)); + } else if let (Some(val), Some(col)) = + (left_expr.unpack_val(), right_expr.unpack_col(false)) + { + return Ok(self.new_range(*op, col, val, true)); + } + + Ok(None) + } + (Some(binary), None) => Ok(self.check_or(right_expr, op, binary)), + (None, Some(binary)) => Ok(self.check_or(left_expr, op, binary)), + }, + ScalarExpression::Alias { expr, .. } + | ScalarExpression::TypeCast { expr, .. } + | ScalarExpression::Unary { expr, .. } + | ScalarExpression::In { expr, .. } + | ScalarExpression::Between { expr, .. } + | ScalarExpression::SubString { expr, .. } => self.detach(expr), + ScalarExpression::IsNull { expr, negated, .. } => match expr.as_ref() { + ScalarExpression::ColumnRef(column) => { + if let (Some(col_id), Some(col_table)) = (column.id(), column.table_name()) { + if &col_id == self.column_id && col_table.as_str() == self.table_name { + return Ok(if *negated { + // Range::NotEq(NULL_VALUE.clone()) + None + } else { + Some(Range::Eq(NULL_VALUE.clone())) + }); + } + } + + Ok(None) + } + ScalarExpression::Constant(_) + | ScalarExpression::Alias { .. } + | ScalarExpression::TypeCast { .. } + | ScalarExpression::IsNull { .. } + | ScalarExpression::Unary { .. } + | ScalarExpression::Binary { .. } + | ScalarExpression::AggCall { .. } + | ScalarExpression::In { .. } + | ScalarExpression::Between { .. } + | ScalarExpression::SubString { .. } + | ScalarExpression::Function(_) + | ScalarExpression::If { .. } + | ScalarExpression::IfNull { .. } + | ScalarExpression::NullIf { .. } + | ScalarExpression::Coalesce { .. } + | ScalarExpression::CaseWhen { .. } => self.detach(expr), + ScalarExpression::Tuple(_) + | ScalarExpression::Reference { .. } + | ScalarExpression::Empty => unreachable!(), + }, + ScalarExpression::Constant(_) | ScalarExpression::ColumnRef(_) => Ok(None), + // FIXME: support [RangeDetacher::_detach] + ScalarExpression::Tuple(_) + | ScalarExpression::AggCall { .. } + | ScalarExpression::Function(_) + | ScalarExpression::If { .. } + | ScalarExpression::IfNull { .. } + | ScalarExpression::NullIf { .. } + | ScalarExpression::Coalesce { .. } + | ScalarExpression::CaseWhen { .. } => Ok(None), + ScalarExpression::Reference { .. } | ScalarExpression::Empty => unreachable!(), + } + } + + fn merge_binary(op: BinaryOperator, left_binary: Range, right_binary: Range) -> Option { + fn process_exclude_bound_with_eq( + bound: Bound, + eq: &ValueRef, + op: BinaryOperator, + ) -> Bound { + match bound { + Bound::Excluded(bound_val) => { + if &bound_val == eq && op == BinaryOperator::Or { + Bound::Included(bound_val) + } else { + Bound::Excluded(bound_val) + } + } + bound => bound, + } + } + match (left_binary, right_binary) { + (Range::Dummy, binary) | (binary, Range::Dummy) => match op { + BinaryOperator::And => Some(Range::Dummy), + BinaryOperator::Or => Some(binary), + BinaryOperator::Xor => todo!(), + _ => None, + }, + // e.g. c1 > 1 ? c1 < 2 + ( + Range::Scope { + min: left_min, + max: left_max, + }, + Range::Scope { + min: right_min, + max: right_max, + }, + ) => match op { + BinaryOperator::And => Some(Self::and_scope_merge( + left_min, left_max, right_min, right_max, + )), + BinaryOperator::Or => Some(Self::or_scope_merge( + left_min, left_max, right_min, right_max, + )), + BinaryOperator::Xor => todo!(), + _ => None, + }, + // e.g. c1 > 1 ? c1 = 1 + (Range::Scope { min, max }, Range::Eq(eq)) + | (Range::Eq(eq), Range::Scope { min, max }) => { + let unpack_bound = |bound_eq: Bound| match bound_eq { + Bound::Included(val) | Bound::Excluded(val) => val, + _ => unreachable!(), + }; + match op { + BinaryOperator::And => { + let bound_eq = Bound::Included(eq); + let is_less = matches!( + Self::bound_compared(&bound_eq, &min, true).unwrap_or({ + if matches!(min, Bound::Unbounded) { + Ordering::Greater + } else { + Ordering::Less + } + }), + Ordering::Less + ); + + if is_less + || matches!( + Self::bound_compared(&bound_eq, &max, false), + Some(Ordering::Greater) + ) + { + return Some(Range::Dummy); + } + Some(Range::Eq(unpack_bound(bound_eq))) + } + BinaryOperator::Or => { + if eq.is_null() { + return Some(if matches!(min, Bound::Excluded(_)) { + Range::SortedRanges(vec![Range::Eq(eq), Range::Scope { min, max }]) + } else { + Range::Scope { min, max } + }); + } + let bound_eq = Bound::Excluded(eq); + let range = match Self::bound_compared(&bound_eq, &min, true) { + Some(Ordering::Less) => Range::SortedRanges(vec![ + Range::Eq(unpack_bound(bound_eq)), + Range::Scope { min, max }, + ]), + Some(Ordering::Equal) => Range::Scope { + min: process_exclude_bound_with_eq( + min, + &unpack_bound(bound_eq), + op, + ), + max, + }, + _ => match Self::bound_compared(&bound_eq, &max, false) { + Some(Ordering::Greater) => Range::SortedRanges(vec![ + Range::Scope { min, max }, + Range::Eq(unpack_bound(bound_eq)), + ]), + Some(Ordering::Equal) => Range::Scope { + min, + max: process_exclude_bound_with_eq( + max, + &unpack_bound(bound_eq), + op, + ), + }, + _ => Range::Scope { min, max }, + }, + }; + Some(range) + } + BinaryOperator::Xor => todo!(), + _ => None, + } + } + // e.g. c1 > 1 ? (c1 = 1 or c1 = 2) + (Range::Scope { min, max }, Range::SortedRanges(ranges)) + | (Range::SortedRanges(ranges), Range::Scope { min, max }) => { + let merged_ranges = + Self::extract_merge_ranges(op, Some(Range::Scope { min, max }), ranges, &mut 0); + + Self::ranges2range(merged_ranges) + } + // e.g. c1 = 1 ? c1 = 2 + (Range::Eq(left_val), Range::Eq(right_val)) => { + if left_val.eq(&right_val) && matches!(op, BinaryOperator::And | BinaryOperator::Or) + { + return Some(Range::Eq(left_val)); + } + match op { + BinaryOperator::And => Some(Range::Dummy), + BinaryOperator::Or => { + let mut ranges = Vec::new(); + + let (val_1, val_2) = if let Some(true) = + left_val.partial_cmp(&right_val).map(Ordering::is_gt) + { + (right_val, left_val) + } else { + (left_val, right_val) + }; + ranges.push(Range::Eq(val_1)); + ranges.push(Range::Eq(val_2)); + Some(Range::SortedRanges(ranges)) + } + BinaryOperator::Xor => todo!(), + _ => None, + } + } + // e.g. c1 = 1 ? (c1 = 1 or c1 = 2) + (Range::Eq(eq), Range::SortedRanges(ranges)) + | (Range::SortedRanges(ranges), Range::Eq(eq)) => { + let merged_ranges = + Self::extract_merge_ranges(op, Some(Range::Eq(eq)), ranges, &mut 0); + + Self::ranges2range(merged_ranges) + } + // e.g. (c1 = 1 or c1 = 2) ? (c1 = 1 or c1 = 2) + (Range::SortedRanges(left_ranges), Range::SortedRanges(mut right_ranges)) => { + let mut idx = 0; + + for left_range in left_ranges { + right_ranges = + Self::extract_merge_ranges(op, Some(left_range), right_ranges, &mut idx) + } + + Self::ranges2range(right_ranges) + } + } + } + + fn ranges2range(mut merged_ranges: Vec) -> Option { + if merged_ranges.is_empty() { + Some(Range::Dummy) + } else if merged_ranges.len() == 1 { + Some(merged_ranges.pop().unwrap()) + } else { + Some(Range::SortedRanges(merged_ranges)) + } + } + + fn extract_merge_ranges( + op: BinaryOperator, + mut binary: Option, + mut ranges: Vec, + idx: &mut usize, + ) -> Vec { + // FIXME: Lots of duplicate code + while *idx < ranges.len() { + match (&binary, &ranges[*idx]) { + ( + Some(Range::Scope { + min: l_min, + max: l_max, + }), + Range::Scope { + min: r_min, + max: r_max, + }, + ) => { + if let Some(true) = + Self::bound_compared(l_max, r_min, false).map(Ordering::is_lt) + { + ranges.insert(*idx, binary.unwrap()); + return ranges; + } else if let Some(true) = + Self::bound_compared(l_min, r_max, true).map(Ordering::is_gt) + { + *idx += 1; + continue; + } else { + binary = Self::merge_binary(op, binary.unwrap(), ranges.remove(*idx)); + } + } + ( + Some(Range::Scope { + min: l_min, + max: l_max, + }), + Range::Eq(r_val), + ) => { + let r_bound = Bound::Included(r_val.clone()); + + if let Some(true) = + Self::bound_compared(l_max, &r_bound, false).map(Ordering::is_lt) + { + ranges.insert(*idx, binary.unwrap()); + return ranges; + } else if Self::bound_compared(l_min, &r_bound, true) + .map(Ordering::is_gt) + .unwrap_or_else(|| op == BinaryOperator::Or) + { + *idx += 1; + continue; + } else if r_val.is_null() { + let _ = ranges.remove(*idx); + } else { + binary = Self::merge_binary(op, binary.unwrap(), ranges.remove(*idx)); + } + } + (Some(Range::Eq(l_val)), Range::Eq(r_val)) => { + if let Some(true) = l_val.partial_cmp(r_val).map(Ordering::is_lt) { + ranges.insert(*idx, binary.unwrap()); + return ranges; + } else if let Some(true) = l_val.partial_cmp(r_val).map(Ordering::is_gt) { + *idx += 1; + continue; + } else { + binary = Self::merge_binary(op, binary.unwrap(), ranges.remove(*idx)); + } + } + ( + Some(Range::Eq(l_val)), + Range::Scope { + min: r_min, + max: r_max, + }, + ) => { + let l_bound = Bound::Included(l_val.clone()); + + if Self::bound_compared(&l_bound, r_min, false) + .map(Ordering::is_lt) + .unwrap_or_else(|| op == BinaryOperator::Or) + { + ranges.insert(*idx, binary.unwrap()); + return ranges; + } else if let Some(true) = + Self::bound_compared(&l_bound, r_max, true).map(Ordering::is_gt) + { + *idx += 1; + continue; + } else if l_val.is_null() { + binary = Some(ranges.remove(*idx)); + } else { + binary = Self::merge_binary(op, binary.unwrap(), ranges.remove(*idx)); + } + } + (Some(Range::Dummy), _) => { + binary = match op { + BinaryOperator::And => return vec![], + BinaryOperator::Or => Some(ranges.remove(*idx)), + BinaryOperator::Xor => todo!(), + _ => None, + }; + } + (None, _) => break, + _ => unreachable!(), + } + } + if let Some(range) = binary { + ranges.push(range); + } + ranges + } + + fn or_scope_merge( + left_min: Bound, + left_max: Bound, + right_min: Bound, + right_max: Bound, + ) -> Range { + if matches!( + Self::bound_compared(&left_max, &right_min, false), + Some(Ordering::Less) + ) || matches!( + Self::bound_compared(&right_max, &left_min, false), + Some(Ordering::Less) + ) { + let (min_1, max_1, min_2, max_2) = if let Some(true) = + Self::bound_compared(&left_min, &right_min, true).map(Ordering::is_lt) + { + (left_min, left_max, right_min, right_max) + } else { + (right_min, right_max, left_min, left_max) + }; + return Range::SortedRanges(vec![ + Range::Scope { + min: min_1, + max: max_1, + }, + Range::Scope { + min: min_2, + max: max_2, + }, + ]); + } + let min = if let Some(true) = + Self::bound_compared(&left_min, &right_min, true).map(Ordering::is_lt) + { + left_min + } else { + right_min + }; + let max = if let Some(true) = + Self::bound_compared(&left_max, &right_max, false).map(Ordering::is_gt) + { + left_max + } else { + right_max + }; + match Self::bound_compared(&min, &max, matches!(min, Bound::Unbounded)) { + Some(Ordering::Equal) => match min { + Bound::Included(val) => Range::Eq(val), + Bound::Excluded(_) => Range::Dummy, + Bound::Unbounded => Range::Scope { + min: Bound::Unbounded, + max: Bound::Unbounded, + }, + }, + _ => Range::Scope { min, max }, + } + } + + fn and_scope_merge( + left_min: Bound, + left_max: Bound, + right_min: Bound, + right_max: Bound, + ) -> Range { + let min = if let Some(true) = + Self::bound_compared(&left_min, &right_min, true).map(Ordering::is_gt) + { + left_min + } else { + right_min + }; + let max = if let Some(true) = + Self::bound_compared(&left_max, &right_max, false).map(Ordering::is_lt) + { + left_max + } else { + right_max + }; + match Self::bound_compared(&min, &max, matches!(min, Bound::Unbounded)) { + Some(Ordering::Greater) => Range::Dummy, + Some(Ordering::Equal) => match min { + Bound::Included(val) => Range::Eq(val), + Bound::Excluded(_) => Range::Dummy, + Bound::Unbounded => Range::Scope { + min: Bound::Unbounded, + max: Bound::Unbounded, + }, + }, + _ => Range::Scope { min, max }, + } + } + + fn _is_belong(table_name: &str, col: &ColumnRef) -> bool { + matches!( + col.table_name().map(|name| table_name == name.as_str()), + Some(true) + ) + } + + fn bound_compared( + left_bound: &Bound, + right_bound: &Bound, + is_min: bool, + ) -> Option { + fn is_min_then_reverse(is_min: bool, order: Ordering) -> Ordering { + if is_min { + order + } else { + order.reverse() + } + } + match (left_bound, right_bound) { + (Bound::Unbounded, Bound::Unbounded) => Some(Ordering::Equal), + (Bound::Unbounded, _) => Some(is_min_then_reverse(is_min, Ordering::Less)), + (_, Bound::Unbounded) => Some(is_min_then_reverse(is_min, Ordering::Greater)), + (Bound::Included(left), Bound::Included(right)) => left.partial_cmp(right), + (Bound::Included(left), Bound::Excluded(right)) => left + .partial_cmp(right) + .map(|order| order.then(is_min_then_reverse(is_min, Ordering::Less))), + (Bound::Excluded(left), Bound::Excluded(right)) => left.partial_cmp(right), + (Bound::Excluded(left), Bound::Included(right)) => left + .partial_cmp(right) + .map(|order| order.then(is_min_then_reverse(is_min, Ordering::Greater))), + } + } + + fn new_range( + &mut self, + mut op: BinaryOperator, + col: ColumnRef, + val: ValueRef, + is_flip: bool, + ) -> Option { + if !Self::_is_belong(self.table_name, &col) || col.id() != Some(*self.column_id) { + return None; + } + if is_flip { + op = match op { + BinaryOperator::Gt => BinaryOperator::Lt, + BinaryOperator::Lt => BinaryOperator::Gt, + BinaryOperator::GtEq => BinaryOperator::LtEq, + BinaryOperator::LtEq => BinaryOperator::GtEq, + source_op => source_op, + }; + } + match op { + BinaryOperator::Gt => Some(Range::Scope { + min: Bound::Excluded(val.clone()), + max: Bound::Unbounded, + }), + BinaryOperator::Lt => Some(Range::Scope { + min: Bound::Unbounded, + max: Bound::Excluded(val.clone()), + }), + BinaryOperator::GtEq => Some(Range::Scope { + min: Bound::Included(val.clone()), + max: Bound::Unbounded, + }), + BinaryOperator::LtEq => Some(Range::Scope { + min: Bound::Unbounded, + max: Bound::Included(val.clone()), + }), + BinaryOperator::Eq | BinaryOperator::Spaceship => Some(Range::Eq(val.clone())), + _ => None, + } + } + + /// check if: c1 > c2 or c1 > 1 + /// this case it makes no sense to just extract c1 > 1 + fn check_or( + &mut self, + right_expr: &ScalarExpression, + op: &BinaryOperator, + binary: Range, + ) -> Option { + if matches!(op, BinaryOperator::Or) + && right_expr.exist_column(self.table_name, self.column_id) + { + return None; + } + + Some(binary) + } +} + +impl fmt::Display for Range { + fn fmt(&self, f: &mut Formatter) -> fmt::Result { + match self { + Range::Scope { min, max } => { + match min { + Bound::Unbounded => write!(f, "(-∞")?, + Bound::Included(value) => write!(f, "[{}", value)?, + Bound::Excluded(value) => write!(f, "({}", value)?, + } + + write!(f, ", ")?; + + match max { + Bound::Unbounded => write!(f, "+∞)")?, + Bound::Included(value) => write!(f, "{}]", value)?, + Bound::Excluded(value) => write!(f, "{})", value)?, + } + + Ok(()) + } + Range::Eq(value) => write!(f, "{}", value), + Range::Dummy => write!(f, "Dummy"), + Range::SortedRanges(ranges) => { + let ranges_str = ranges.iter().map(|range| format!("{}", range)).join(", "); + write!(f, "{}", ranges_str) + } + } + } +} + +#[cfg(test)] +mod test { + use crate::binder::test::select_sql_run; + use crate::errors::DatabaseError; + use crate::expression::range_detacher::{Range, RangeDetacher}; + use crate::optimizer::heuristic::batch::HepBatchStrategy; + use crate::optimizer::heuristic::optimizer::HepOptimizer; + use crate::optimizer::rule::normalization::NormalizationRuleImpl; + use crate::planner::operator::filter::FilterOperator; + use crate::planner::operator::Operator; + use crate::planner::LogicalPlan; + use crate::storage::kip::KipTransaction; + use crate::types::value::DataValue; + use std::ops::Bound; + use std::sync::Arc; + + fn plan_filter(plan: LogicalPlan) -> Result, DatabaseError> { + let best_plan = HepOptimizer::new(plan.clone()) + .batch( + "test_simplify_filter".to_string(), + HepBatchStrategy::once_topdown(), + vec![NormalizationRuleImpl::SimplifyFilter], + ) + .find_best::(None)?; + if let Operator::Filter(filter_op) = best_plan.childrens[0].clone().operator { + Ok(Some(filter_op)) + } else { + Ok(None) + } + } + + #[tokio::test] + async fn test_detach_ideal_cases() -> Result<(), DatabaseError> { + { + let plan = select_sql_run("select * from t1 where c1 = 1").await?; + let op = plan_filter(plan)?.unwrap(); + let range = RangeDetacher::new("t1", &0).detach(&op.predicate)?.unwrap(); + println!("c1 = 1 => {}", range); + assert_eq!(range, Range::Eq(Arc::new(DataValue::Int32(Some(1))))) + } + { + let plan = select_sql_run("select * from t1 where c1 != 1").await?; + let op = plan_filter(plan)?.unwrap(); + let range = RangeDetacher::new("t1", &0).detach(&op.predicate)?; + println!("c1 != 1 => {:#?}", range); + assert_eq!(range, None) + } + { + let plan = select_sql_run("select * from t1 where c1 > 1").await?; + let op = plan_filter(plan)?.unwrap(); + let range = RangeDetacher::new("t1", &0).detach(&op.predicate)?.unwrap(); + println!("c1 > 1 => c1: {}", range); + assert_eq!( + range, + Range::Scope { + min: Bound::Excluded(Arc::new(DataValue::Int32(Some(1)))), + max: Bound::Unbounded, + } + ) + } + { + let plan = select_sql_run("select * from t1 where c1 >= 1").await?; + let op = plan_filter(plan)?.unwrap(); + let range = RangeDetacher::new("t1", &0).detach(&op.predicate)?.unwrap(); + println!("c1 >= 1 => c1: {}", range); + assert_eq!( + range, + Range::Scope { + min: Bound::Included(Arc::new(DataValue::Int32(Some(1)))), + max: Bound::Unbounded, + } + ) + } + { + let plan = select_sql_run("select * from t1 where c1 < 1").await?; + let op = plan_filter(plan)?.unwrap(); + let range = RangeDetacher::new("t1", &0).detach(&op.predicate)?.unwrap(); + println!("c1 < 1 => c1: {}", range); + assert_eq!( + range, + Range::Scope { + min: Bound::Unbounded, + max: Bound::Excluded(Arc::new(DataValue::Int32(Some(1)))), + } + ) + } + { + let plan = select_sql_run("select * from t1 where c1 <= 1").await?; + let op = plan_filter(plan)?.unwrap(); + let range = RangeDetacher::new("t1", &0).detach(&op.predicate)?.unwrap(); + println!("c1 <= 1 => c1: {}", range); + assert_eq!( + range, + Range::Scope { + min: Bound::Unbounded, + max: Bound::Included(Arc::new(DataValue::Int32(Some(1)))), + } + ) + } + // empty + { + let plan = select_sql_run("select * from t1 where true").await?; + let op = plan_filter(plan)?.unwrap(); + let range = RangeDetacher::new("t1", &0).detach(&op.predicate)?; + println!("empty => c1: {:#?}", range); + assert_eq!(range, None) + } + // other column + { + let plan = select_sql_run("select * from t1 where c2 = 1").await?; + let op = plan_filter(plan)?.unwrap(); + let range = RangeDetacher::new("t1", &0).detach(&op.predicate)?; + println!("c2 = 1 => c1: {:#?}", range); + assert_eq!(range, None) + } + + { + let plan = select_sql_run("select * from t1 where c1 < 1 and c1 >= 0").await?; + let op = plan_filter(plan)?.unwrap(); + let range = RangeDetacher::new("t1", &0).detach(&op.predicate)?.unwrap(); + println!("c1 < 1 and c1 >= 0 => c1: {}", range); + assert_eq!( + range, + Range::Scope { + min: Bound::Included(Arc::new(DataValue::Int32(Some(0)))), + max: Bound::Excluded(Arc::new(DataValue::Int32(Some(1)))), + } + ) + } + { + let plan = select_sql_run("select * from t1 where c1 < 1 or c1 >= 0").await?; + let op = plan_filter(plan)?.unwrap(); + let range = RangeDetacher::new("t1", &0).detach(&op.predicate)?.unwrap(); + println!("c1 < 1 or c1 >= 0 => c1: {}", range); + assert_eq!( + range, + Range::Scope { + min: Bound::Unbounded, + max: Bound::Unbounded, + } + ) + } + + { + let plan = select_sql_run("select * from t1 where c1 = 1 and c1 = 0").await?; + let op = plan_filter(plan)?.unwrap(); + let range = RangeDetacher::new("t1", &0).detach(&op.predicate)?.unwrap(); + println!("c1 = 1 and c1 = 0 => c1: {}", range); + assert_eq!(range, Range::Dummy) + } + { + let plan = select_sql_run("select * from t1 where c1 = 1 or c1 = 0").await?; + let op = plan_filter(plan)?.unwrap(); + let range = RangeDetacher::new("t1", &0).detach(&op.predicate)?.unwrap(); + println!("c1 = 1 or c1 = 0 => c1: {}", range); + assert_eq!( + range, + Range::SortedRanges(vec![ + Range::Eq(Arc::new(DataValue::Int32(Some(0)))), + Range::Eq(Arc::new(DataValue::Int32(Some(1)))), + ]) + ) + } + { + let plan = select_sql_run("select * from t1 where c1 = 1 and c1 = 1").await?; + let op = plan_filter(plan)?.unwrap(); + let range = RangeDetacher::new("t1", &0).detach(&op.predicate)?.unwrap(); + println!("c1 = 1 and c1 = 1 => c1: {}", range); + assert_eq!(range, Range::Eq(Arc::new(DataValue::Int32(Some(1))))) + } + { + let plan = select_sql_run("select * from t1 where c1 = 1 or c1 = 1").await?; + let op = plan_filter(plan)?.unwrap(); + let range = RangeDetacher::new("t1", &0).detach(&op.predicate)?.unwrap(); + println!("c1 = 1 or c1 = 1 => c1: {}", range); + assert_eq!(range, Range::Eq(Arc::new(DataValue::Int32(Some(1))))) + } + + { + let plan = select_sql_run("select * from t1 where c1 > 1 and c1 = 1").await?; + let op = plan_filter(plan)?.unwrap(); + let range = RangeDetacher::new("t1", &0).detach(&op.predicate)?.unwrap(); + println!("c1 > 1 and c1 = 1 => c1: {}", range); + assert_eq!(range, Range::Dummy) + } + { + let plan = select_sql_run("select * from t1 where c1 >= 1 and c1 = 1").await?; + let op = plan_filter(plan)?.unwrap(); + let range = RangeDetacher::new("t1", &0).detach(&op.predicate)?.unwrap(); + println!("c1 >= 1 and c1 = 1 => c1: {}", range); + assert_eq!(range, Range::Eq(Arc::new(DataValue::Int32(Some(1))))) + } + { + let plan = select_sql_run("select * from t1 where c1 > 1 or c1 = 1").await?; + let op = plan_filter(plan)?.unwrap(); + let range = RangeDetacher::new("t1", &0).detach(&op.predicate)?.unwrap(); + println!("c1 > 1 or c1 = 1 => c1: {}", range); + assert_eq!( + range, + Range::Scope { + min: Bound::Included(Arc::new(DataValue::Int32(Some(1)))), + max: Bound::Unbounded, + } + ) + } + { + let plan = select_sql_run("select * from t1 where c1 >= 1 or c1 = 1").await?; + let op = plan_filter(plan)?.unwrap(); + let range = RangeDetacher::new("t1", &0).detach(&op.predicate)?.unwrap(); + println!("c1 >= 1 or c1 = 1 => c1: {}", range); + assert_eq!( + range, + Range::Scope { + min: Bound::Included(Arc::new(DataValue::Int32(Some(1)))), + max: Bound::Unbounded, + } + ) + } + + // scope + { + let plan = select_sql_run( + "select * from t1 where (c1 > 0 and c1 < 3) and (c1 > 1 and c1 < 4)", + ) + .await?; + let op = plan_filter(plan)?.unwrap(); + let range = RangeDetacher::new("t1", &0).detach(&op.predicate)?.unwrap(); + println!( + "(c1 > 0 and c1 < 3) and (c1 > 1 and c1 < 4) => c1: {}", + range + ); + assert_eq!( + range, + Range::Scope { + min: Bound::Excluded(Arc::new(DataValue::Int32(Some(1)))), + max: Bound::Excluded(Arc::new(DataValue::Int32(Some(3)))), + } + ) + } + { + let plan = + select_sql_run("select * from t1 where (c1 > 0 and c1 < 3) or (c1 > 1 and c1 < 4)") + .await?; + let op = plan_filter(plan)?.unwrap(); + let range = RangeDetacher::new("t1", &0).detach(&op.predicate)?.unwrap(); + println!( + "(c1 > 0 and c1 < 3) or (c1 > 1 and c1 < 4) => c1: {}", + range + ); + assert_eq!( + range, + Range::Scope { + min: Bound::Excluded(Arc::new(DataValue::Int32(Some(0)))), + max: Bound::Excluded(Arc::new(DataValue::Int32(Some(4)))), + } + ) + } + + { + let plan = select_sql_run( + "select * from t1 where ((c1 > 0 and c1 < 3) and (c1 > 1 and c1 < 4)) and c1 = 0", + ) + .await?; + let op = plan_filter(plan)?.unwrap(); + let range = RangeDetacher::new("t1", &0).detach(&op.predicate)?.unwrap(); + println!( + "((c1 > 0 and c1 < 3) and (c1 > 1 and c1 < 4)) and c1 = 0 => c1: {}", + range + ); + assert_eq!(range, Range::Dummy) + } + { + let plan = select_sql_run( + "select * from t1 where ((c1 > 0 and c1 < 3) or (c1 > 1 and c1 < 4)) and c1 = 0", + ) + .await?; + let op = plan_filter(plan)?.unwrap(); + let range = RangeDetacher::new("t1", &0).detach(&op.predicate)?.unwrap(); + println!( + "((c1 > 0 and c1 < 3) or (c1 > 1 and c1 < 4)) and c1 = 0 => c1: {}", + range + ); + assert_eq!(range, Range::Dummy) + } + { + let plan = select_sql_run( + "select * from t1 where ((c1 > 0 and c1 < 3) and (c1 > 1 and c1 < 4)) or c1 = 0", + ) + .await?; + let op = plan_filter(plan)?.unwrap(); + let range = RangeDetacher::new("t1", &0).detach(&op.predicate)?.unwrap(); + println!( + "((c1 > 0 and c1 < 3) and (c1 > 1 and c1 < 4)) or c1 = 0 => c1: {}", + range + ); + assert_eq!( + range, + Range::SortedRanges(vec![ + Range::Eq(Arc::new(DataValue::Int32(Some(0)))), + Range::Scope { + min: Bound::Excluded(Arc::new(DataValue::Int32(Some(1)))), + max: Bound::Excluded(Arc::new(DataValue::Int32(Some(3)))), + } + ]) + ) + } + { + let plan = select_sql_run( + "select * from t1 where ((c1 > 0 and c1 < 3) or (c1 > 1 and c1 < 4)) or c1 = 0", + ) + .await?; + let op = plan_filter(plan)?.unwrap(); + let range = RangeDetacher::new("t1", &0).detach(&op.predicate)?.unwrap(); + println!( + "((c1 > 0 and c1 < 3) or (c1 > 1 and c1 < 4)) or c1 = 0 => c1: {}", + range + ); + assert_eq!( + range, + Range::Scope { + min: Bound::Included(Arc::new(DataValue::Int32(Some(0)))), + max: Bound::Excluded(Arc::new(DataValue::Int32(Some(4)))), + } + ) + } + + { + let plan = select_sql_run("select * from t1 where (((c1 > 0 and c1 < 3) and (c1 > 1 and c1 < 4)) and c1 = 0) and (c1 >= 0 and c1 <= 2)").await?; + let op = plan_filter(plan)?.unwrap(); + let range = RangeDetacher::new("t1", &0).detach(&op.predicate)?.unwrap(); + println!("(((c1 > 0 and c1 < 3) and (c1 > 1 and c1 < 4)) and c1 = 0) and (c1 >= 0 and c1 <= 2) => c1: {}", range); + assert_eq!(range, Range::Dummy) + } + { + let plan = select_sql_run("select * from t1 where (((c1 > 0 and c1 < 3) and (c1 > 1 and c1 < 4)) and c1 = 0) or (c1 >= 0 and c1 <= 2)").await?; + let op = plan_filter(plan)?.unwrap(); + let range = RangeDetacher::new("t1", &0).detach(&op.predicate)?.unwrap(); + println!("(((c1 > 0 and c1 < 3) and (c1 > 1 and c1 < 4)) and c1 = 0) or (c1 >= 0 and c1 <= 2) => c1: {}", range); + assert_eq!( + range, + Range::Scope { + min: Bound::Included(Arc::new(DataValue::Int32(Some(0)))), + max: Bound::Included(Arc::new(DataValue::Int32(Some(2)))), + } + ) + } + // ranges and ranges + { + let plan = select_sql_run("select * from t1 where ((c1 < 2 and c1 > 0) or (c1 < 6 and c1 > 4)) and ((c1 < 3 and c1 > 1) or (c1 < 7 and c1 > 5))").await?; + let op = plan_filter(plan)?.unwrap(); + let range = RangeDetacher::new("t1", &0).detach(&op.predicate)?.unwrap(); + println!("((c1 < 2 and c1 > 0) or (c1 < 6 and c1 > 4)) and ((c1 < 3 and c1 > 1) or (c1 < 7 and c1 > 5)) => c1: {}", range); + assert_eq!( + range, + Range::SortedRanges(vec![ + Range::Scope { + min: Bound::Excluded(Arc::new(DataValue::Int32(Some(1)))), + max: Bound::Excluded(Arc::new(DataValue::Int32(Some(2)))), + }, + Range::Scope { + min: Bound::Excluded(Arc::new(DataValue::Int32(Some(5)))), + max: Bound::Excluded(Arc::new(DataValue::Int32(Some(6)))), + }, + ]) + ) + } + { + let plan = select_sql_run("select * from t1 where ((c1 < 2 and c1 > 0) or (c1 < 6 and c1 > 4)) or ((c1 < 3 and c1 > 1) or (c1 < 7 and c1 > 5))").await?; + let op = plan_filter(plan)?.unwrap(); + let range = RangeDetacher::new("t1", &0).detach(&op.predicate)?.unwrap(); + println!("((c1 < 2 and c1 > 0) or (c1 < 6 and c1 > 4)) or ((c1 < 3 and c1 > 1) or (c1 < 7 and c1 > 5)) => c1: {}", range); + assert_eq!( + range, + Range::SortedRanges(vec![ + Range::Scope { + min: Bound::Excluded(Arc::new(DataValue::Int32(Some(0)))), + max: Bound::Excluded(Arc::new(DataValue::Int32(Some(3)))), + }, + Range::Scope { + min: Bound::Excluded(Arc::new(DataValue::Int32(Some(4)))), + max: Bound::Excluded(Arc::new(DataValue::Int32(Some(7)))), + }, + ]) + ) + } + // case 1 + { + let plan = select_sql_run( + "select * from t1 where c1 = 5 or (c1 > 5 and (c1 > 6 or c1 < 8) and c1 < 12)", + ) + .await?; + let op = plan_filter(plan)?.unwrap(); + let range = RangeDetacher::new("t1", &0).detach(&op.predicate)?.unwrap(); + println!( + "c1 = 5 or (c1 > 5 and (c1 > 6 or c1 < 8) and c1 < 12) => c1: {}", + range + ); + assert_eq!( + range, + Range::Scope { + min: Bound::Included(Arc::new(DataValue::Int32(Some(5)))), + max: Bound::Excluded(Arc::new(DataValue::Int32(Some(12)))), + } + ) + } + // case 2 + { + let plan = select_sql_run( + "select * from t1 where ((c2 >= -8 and -4 >= c1) or (c1 >= 0 and 5 > c2)) and ((c2 > 0 and c1 <= 1) or (c1 > -8 and c2 < -6))", + ) + .await?; + let op = plan_filter(plan)?.unwrap(); + let range = RangeDetacher::new("t1", &0).detach(&op.predicate)?.unwrap(); + println!( + "((c2 >= -8 and -4 >= c1) or (c1 >= 0 and 5 > c2)) and ((c2 > 0 and c1 <= 1) or (c1 > -8 and c2 < -6)) => c1: {}", + range + ); + assert_eq!( + range, + Range::SortedRanges(vec![ + Range::Scope { + min: Bound::Unbounded, + max: Bound::Included(Arc::new(DataValue::Int32(Some(-4)))), + }, + Range::Scope { + min: Bound::Included(Arc::new(DataValue::Int32(Some(0)))), + max: Bound::Unbounded, + } + ]) + ) + } + + Ok(()) + } + + // Tips: `null` should be First + #[tokio::test] + async fn test_detach_null_cases() -> Result<(), DatabaseError> { + // eq + { + let plan = select_sql_run("select * from t1 where c1 = null").await?; + let op = plan_filter(plan)?.unwrap(); + let range = RangeDetacher::new("t1", &0).detach(&op.predicate)?.unwrap(); + println!("c1 = null => c1: {}", range); + assert_eq!(range, Range::Eq(Arc::new(DataValue::Int32(None)))) + } + { + let plan = select_sql_run("select * from t1 where c1 = null or c1 = 1").await?; + let op = plan_filter(plan)?.unwrap(); + let range = RangeDetacher::new("t1", &0).detach(&op.predicate)?.unwrap(); + println!("c1 = null or c1 = 1 => c1: {}", range); + assert_eq!( + range, + Range::SortedRanges(vec![ + Range::Eq(Arc::new(DataValue::Int32(None))), + Range::Eq(Arc::new(DataValue::Int32(Some(1)))) + ]) + ) + } + { + let plan = select_sql_run("select * from t1 where c1 = null or c1 < 5").await?; + let op = plan_filter(plan)?.unwrap(); + let range = RangeDetacher::new("t1", &0).detach(&op.predicate)?.unwrap(); + println!("c1 = null or c1 < 5 => c1: {}", range); + assert_eq!( + range, + Range::Scope { + min: Bound::Unbounded, + max: Bound::Excluded(Arc::new(DataValue::Int32(Some(5)))), + } + ) + } + { + let plan = + select_sql_run("select * from t1 where c1 = null or (c1 > 1 and c1 < 5)").await?; + let op = plan_filter(plan)?.unwrap(); + let range = RangeDetacher::new("t1", &0).detach(&op.predicate)?.unwrap(); + println!("c1 = null or (c1 > 1 and c1 < 5) => c1: {}", range); + assert_eq!( + range, + Range::SortedRanges(vec![ + Range::Eq(Arc::new(DataValue::Int32(None))), + Range::Scope { + min: Bound::Excluded(Arc::new(DataValue::Int32(Some(1)))), + max: Bound::Excluded(Arc::new(DataValue::Int32(Some(5)))), + }, + ]) + ) + } + { + let plan = select_sql_run("select * from t1 where c1 = null and c1 < 5").await?; + let op = plan_filter(plan)?.unwrap(); + let range = RangeDetacher::new("t1", &0).detach(&op.predicate)?.unwrap(); + println!("c1 = null and c1 < 5 => c1: {}", range); + assert_eq!(range, Range::Eq(Arc::new(DataValue::Int32(None)))) + } + { + let plan = + select_sql_run("select * from t1 where c1 = null and (c1 > 1 and c1 < 5)").await?; + let op = plan_filter(plan)?.unwrap(); + let range = RangeDetacher::new("t1", &0).detach(&op.predicate)?.unwrap(); + println!("c1 = null and (c1 > 1 and c1 < 5) => c1: {}", range); + assert_eq!(range, Range::Dummy) + } + // noteq + { + let plan = select_sql_run("select * from t1 where c1 != null").await?; + let op = plan_filter(plan)?.unwrap(); + let range = RangeDetacher::new("t1", &0).detach(&op.predicate)?; + println!("c1 != null => c1: {:#?}", range); + assert_eq!(range, None) + } + { + let plan = select_sql_run("select * from t1 where c1 = null or c1 != 1").await?; + let op = plan_filter(plan)?.unwrap(); + let range = RangeDetacher::new("t1", &0).detach(&op.predicate)?; + println!("c1 = null or c1 != 1 => c1: {:#?}", range); + assert_eq!(range, None) + } + { + let plan = select_sql_run("select * from t1 where c1 != null or c1 < 5").await?; + let op = plan_filter(plan)?.unwrap(); + let range = RangeDetacher::new("t1", &0).detach(&op.predicate)?; + println!("c1 != null or c1 < 5 => c1: {:#?}", range); + assert_eq!(range, None) + } + { + let plan = + select_sql_run("select * from t1 where c1 != null or (c1 > 1 and c1 < 5)").await?; + let op = plan_filter(plan)?.unwrap(); + let range = RangeDetacher::new("t1", &0).detach(&op.predicate)?; + println!("c1 != null or (c1 > 1 and c1 < 5) => c1: {:#?}", range); + assert_eq!(range, None) + } + { + let plan = select_sql_run("select * from t1 where c1 != null and c1 < 5").await?; + let op = plan_filter(plan)?.unwrap(); + let range = RangeDetacher::new("t1", &0).detach(&op.predicate)?.unwrap(); + println!("c1 != null and c1 < 5 => c1: {}", range); + assert_eq!( + range, + Range::Scope { + min: Bound::Unbounded, + max: Bound::Excluded(Arc::new(DataValue::Int32(Some(5)))), + } + ) + } + { + let plan = + select_sql_run("select * from t1 where c1 != null and (c1 > 1 and c1 < 5)").await?; + let op = plan_filter(plan)?.unwrap(); + let range = RangeDetacher::new("t1", &0).detach(&op.predicate)?.unwrap(); + println!("c1 != null and (c1 > 1 and c1 < 5) => c1: {}", range); + assert_eq!( + range, + Range::Scope { + min: Bound::Excluded(Arc::new(DataValue::Int32(Some(1)))), + max: Bound::Excluded(Arc::new(DataValue::Int32(Some(5)))), + } + ) + } + { + let plan = select_sql_run("select * from t1 where (c1 = null or (c1 < 2 and c1 > 0) or (c1 < 6 and c1 > 4)) or ((c1 < 3 and c1 > 1) or (c1 < 7 and c1 > 5))").await?; + let op = plan_filter(plan)?.unwrap(); + let range = RangeDetacher::new("t1", &0).detach(&op.predicate)?.unwrap(); + println!("(c1 = null or (c1 < 2 and c1 > 0) or (c1 < 6 and c1 > 4)) or ((c1 < 3 and c1 > 1) or (c1 < 7 and c1 > 5)) => c1: {}", range); + assert_eq!( + range, + Range::SortedRanges(vec![ + Range::Eq(Arc::new(DataValue::Int32(None))), + Range::Scope { + min: Bound::Excluded(Arc::new(DataValue::Int32(Some(0)))), + max: Bound::Excluded(Arc::new(DataValue::Int32(Some(3)))), + }, + Range::Scope { + min: Bound::Excluded(Arc::new(DataValue::Int32(Some(4)))), + max: Bound::Excluded(Arc::new(DataValue::Int32(Some(7)))), + } + ]) + ) + } + { + let plan = select_sql_run("select * from t1 where ((c1 < 2 and c1 > 0) or (c1 < 6 and c1 > 4)) or (c1 = null or (c1 < 3 and c1 > 1) or (c1 < 7 and c1 > 5))").await?; + let op = plan_filter(plan)?.unwrap(); + let range = RangeDetacher::new("t1", &0).detach(&op.predicate)?.unwrap(); + println!("((c1 < 2 and c1 > 0) or (c1 < 6 and c1 > 4)) or (c1 = null or (c1 < 3 and c1 > 1) or (c1 < 7 and c1 > 5)) => c1: {}", range); + assert_eq!( + range, + Range::SortedRanges(vec![ + Range::Eq(Arc::new(DataValue::Int32(None))), + Range::Scope { + min: Bound::Excluded(Arc::new(DataValue::Int32(Some(0)))), + max: Bound::Excluded(Arc::new(DataValue::Int32(Some(3)))), + }, + Range::Scope { + min: Bound::Excluded(Arc::new(DataValue::Int32(Some(4)))), + max: Bound::Excluded(Arc::new(DataValue::Int32(Some(7)))), + } + ]) + ) + } + { + let plan = select_sql_run("select * from t1 where (c1 = null or (c1 < 2 and c1 > 0) or (c1 < 6 and c1 > 4)) and ((c1 < 3 and c1 > 1) or (c1 < 7 and c1 > 5))").await?; + let op = plan_filter(plan)?.unwrap(); + let range = RangeDetacher::new("t1", &0).detach(&op.predicate)?.unwrap(); + println!("(c1 = null or (c1 < 2 and c1 > 0) or (c1 < 6 and c1 > 4)) and ((c1 < 3 and c1 > 1) or (c1 < 7 and c1 > 5)) => c1: {}", range); + assert_eq!( + range, + Range::SortedRanges(vec![ + Range::Scope { + min: Bound::Excluded(Arc::new(DataValue::Int32(Some(1)))), + max: Bound::Excluded(Arc::new(DataValue::Int32(Some(2)))), + }, + Range::Scope { + min: Bound::Excluded(Arc::new(DataValue::Int32(Some(5)))), + max: Bound::Excluded(Arc::new(DataValue::Int32(Some(6)))), + } + ]) + ) + } + { + let plan = select_sql_run("select * from t1 where ((c1 < 2 and c1 > 0) or (c1 < 6 and c1 > 4)) and (c1 = null or (c1 < 3 and c1 > 1) or (c1 < 7 and c1 > 5))").await?; + let op = plan_filter(plan)?.unwrap(); + let range = RangeDetacher::new("t1", &0).detach(&op.predicate)?.unwrap(); + println!("((c1 < 2 and c1 > 0) or (c1 < 6 and c1 > 4)) and (c1 = null or (c1 < 3 and c1 > 1) or (c1 < 7 and c1 > 5)) => c1: {}", range); + assert_eq!( + range, + Range::SortedRanges(vec![ + Range::Scope { + min: Bound::Excluded(Arc::new(DataValue::Int32(Some(1)))), + max: Bound::Excluded(Arc::new(DataValue::Int32(Some(2)))), + }, + Range::Scope { + min: Bound::Excluded(Arc::new(DataValue::Int32(Some(5)))), + max: Bound::Excluded(Arc::new(DataValue::Int32(Some(6)))), + } + ]) + ) + } + + Ok(()) + } +} diff --git a/src/expression/simplify.rs b/src/expression/simplify.rs index 24779e1d..9f3e9698 100644 --- a/src/expression/simplify.rs +++ b/src/expression/simplify.rs @@ -2,433 +2,10 @@ use crate::catalog::ColumnRef; use crate::errors::DatabaseError; use crate::expression::function::ScalarFunction; use crate::expression::{BinaryOperator, ScalarExpression, UnaryOperator}; -use crate::types::value::{DataValue, ValueRef, NULL_VALUE}; +use crate::types::value::{DataValue, ValueRef}; use crate::types::{ColumnId, LogicalType}; -use ahash::RandomState; -use itertools::Itertools; -use std::cmp::Ordering; -use std::collections::{Bound, HashSet}; -use std::fmt::Formatter; +use std::mem; use std::sync::Arc; -use std::{fmt, mem}; - -#[derive(Debug, PartialEq, Eq, Clone, Hash)] -pub enum ConstantBinary { - Scope { - min: Bound, - max: Bound, - }, - Eq(ValueRef), - NotEq(ValueRef), - - And(Vec), - Or(Vec), -} - -impl ConstantBinary { - #[allow(dead_code)] - fn is_null(&self) -> Result { - match self { - ConstantBinary::Scope { min, max } => { - let op = |bound: &Bound| { - if let Bound::Included(val) | Bound::Excluded(val) = bound { - val.is_null() - } else { - false - } - }; - if op(min) || op(max) { - return Ok(true); - } - - Ok(matches!((min, max), (Bound::Unbounded, Bound::Unbounded))) - } - ConstantBinary::Eq(val) | ConstantBinary::NotEq(val) => Ok(val.is_null()), - _ => Err(DatabaseError::InvalidType), - } - } - - pub fn rearrange(self) -> Result, DatabaseError> { - match self { - ConstantBinary::Or(binaries) => { - if binaries.is_empty() { - return Ok(vec![]); - } - if binaries.len() == 1 - && matches!( - binaries[0], - ConstantBinary::Scope { - min: Bound::Unbounded, - max: Bound::Unbounded - } - ) - { - return Ok(binaries); - } - - let mut condition_binaries = Vec::new(); - - for binary in binaries { - match binary { - ConstantBinary::Or(_) => return Err(DatabaseError::InvalidType), - ConstantBinary::And(mut and_binaries) => { - condition_binaries.append(&mut and_binaries); - } - ConstantBinary::Scope { - min: Bound::Unbounded, - max: Bound::Unbounded, - } => (), - source => condition_binaries.push(source), - } - } - // Sort - condition_binaries.sort_by(|a, b| { - let op = |binary: &ConstantBinary| match binary { - ConstantBinary::Scope { min, .. } => min.clone(), - ConstantBinary::Eq(val) => Bound::Included(val.clone()), - ConstantBinary::NotEq(val) => Bound::Excluded(val.clone()), - _ => unreachable!(), - }; - - Self::bound_compared(&op(a), &op(b), true).unwrap_or(Ordering::Equal) - }); - - let mut merged_binaries: Vec = Vec::new(); - - for condition in condition_binaries { - let op = |binary: &ConstantBinary| match binary { - ConstantBinary::Scope { min, max } => (min.clone(), max.clone()), - ConstantBinary::Eq(val) => (Bound::Unbounded, Bound::Included(val.clone())), - ConstantBinary::NotEq(val) => { - (Bound::Unbounded, Bound::Excluded(val.clone())) - } - _ => unreachable!(), - }; - let mut is_push = merged_binaries.is_empty(); - - for binary in merged_binaries.iter_mut().rev() { - match binary { - ConstantBinary::Scope { max, .. } => { - let (condition_min, condition_max) = op(&condition); - let is_lt_min = Self::bound_compared(max, &condition_min, false) - .unwrap_or(Ordering::Equal) - .is_lt(); - let is_lt_max = Self::bound_compared(max, &condition_max, false) - .unwrap_or(Ordering::Equal) - .is_lt(); - - if !is_lt_min && is_lt_max { - let _ = mem::replace(max, condition_max); - } else if !matches!(condition, ConstantBinary::Scope { .. }) { - is_push = is_lt_max; - } else if is_lt_min && is_lt_max { - is_push = true - } - - break; - } - ConstantBinary::Eq(_) => is_push = true, - _ => (), - } - } - - if is_push { - merged_binaries.push(condition); - } - } - - Ok(merged_binaries) - } - ConstantBinary::And(binaries) => Ok(binaries), - source => Ok(vec![source]), - } - } - - pub fn scope_aggregation(&mut self) -> Result<(), DatabaseError> { - // Tips: Only single-level `And` and `Or` - match self { - // `Or` is allowed to contain `And`, `Scope`, `Eq/NotEq` - ConstantBinary::Or(binaries) => { - for mut binary in mem::take(binaries) { - binary.scope_aggregation()?; - match binary { - ConstantBinary::And(mut and_binaries) => binaries.append(&mut and_binaries), - ConstantBinary::Or(_) => unreachable!("`Or` does not allow nested `Or`"), - binary => binaries.push(binary), - } - } - Self::or_scope_aggregation(binaries); - } - // `And` is allowed to contain `Or`, Scope, `Eq/NotEq` - ConstantBinary::And(binaries) => { - for mut binary in mem::take(binaries) { - binary.scope_aggregation()?; - match binary { - ConstantBinary::And(_) => unreachable!("`And` does not allow nested `And`"), - ConstantBinary::Or(or_binaries) => { - binaries.append(&mut ConstantBinary::Or(or_binaries).rearrange()?); - } - binary => binaries.push(binary), - } - } - Self::and_scope_aggregation(binaries)?; - } - _ => (), - } - - Ok(()) - } - - fn bound_compared( - left_bound: &Bound, - right_bound: &Bound, - is_min: bool, - ) -> Option { - let op = |is_min, order: Ordering| { - if is_min { - order - } else { - order.reverse() - } - }; - - match (left_bound, right_bound) { - (Bound::Unbounded, Bound::Unbounded) => Some(Ordering::Equal), - (Bound::Unbounded, _) => Some(op(is_min, Ordering::Less)), - (_, Bound::Unbounded) => Some(op(is_min, Ordering::Greater)), - (Bound::Included(left), Bound::Included(right)) => left.partial_cmp(right), - (Bound::Included(left), Bound::Excluded(right)) => left - .partial_cmp(right) - .map(|order| order.then(op(is_min, Ordering::Less))), - (Bound::Excluded(left), Bound::Excluded(right)) => left.partial_cmp(right), - (Bound::Excluded(left), Bound::Included(right)) => left - .partial_cmp(right) - .map(|order| order.then(op(is_min, Ordering::Greater))), - } - } - - // Tips: It only makes sense if the condition is and aggregation - fn and_scope_aggregation(binaries: &mut Vec) -> Result<(), DatabaseError> { - if binaries.is_empty() { - return Ok(()); - } - - let mut scope_min = Bound::Unbounded; - let mut scope_max = Bound::Unbounded; - let mut eq_set = HashSet::with_hasher(RandomState::new()); - - let sort_op = |binary: &&ConstantBinary| match binary { - ConstantBinary::Scope { .. } => 3, - ConstantBinary::NotEq(_) => 2, - ConstantBinary::Eq(_) => 1, - ConstantBinary::And(_) | ConstantBinary::Or(_) => 0, - }; - - // Aggregate various ranges to get the minimum range - for binary in binaries.iter().sorted_by_key(sort_op) { - match binary { - ConstantBinary::Scope { min, max } => { - if eq_set.len() == 1 { - break; - } - - if let Some(order) = Self::bound_compared(&scope_min, min, true) { - if order.is_lt() { - scope_min = min.clone(); - } - } - - if let Some(order) = Self::bound_compared(&scope_max, max, false) { - if order.is_gt() { - scope_max = max.clone(); - } - } - } - ConstantBinary::Eq(val) => { - let _ = eq_set.insert(val.clone()); - - // when there are multiple inconsistent eq conditions for the same field in And, - // then no row can meet the conditions. - // e.g. `select * from t1 where c1 = 0 and c1 = 1` no data can be both 0 and 1 at the same time - if eq_set.len() > 1 { - binaries.clear(); - return Ok(()); - } - } - ConstantBinary::NotEq(val) => { - if eq_set.contains(val) { - binaries.clear(); - return Ok(()); - } - } - ConstantBinary::Or(_) | ConstantBinary::And(_) => { - unreachable!() - } - } - } - - binaries.clear(); - if eq_set.len() == 1 { - let eq = eq_set.into_iter().next().map(ConstantBinary::Eq).unwrap(); - - binaries.push(eq); - } else if !matches!( - (&scope_min, &scope_max), - (Bound::Unbounded, Bound::Unbounded) - ) { - // When there is something like `select * from t1 where c1 between 1 and null`, - // None will be returned - if matches!( - Self::bound_compared(&scope_min, &scope_max, true).map(Ordering::is_le), - Some(true) - ) { - binaries.push(ConstantBinary::Scope { - min: scope_min, - max: scope_max, - }); - } - } else if eq_set.is_empty() { - binaries.push(ConstantBinary::Scope { - min: Bound::Unbounded, - max: Bound::Unbounded, - }); - } - - Ok(()) - } - - // Tips: It only makes sense if the condition is or aggregation - fn or_scope_aggregation(binaries: &mut Vec) { - if binaries.is_empty() { - return; - } - let mut scopes = Vec::new(); - let mut eqs = HashSet::new(); - - let mut scope_margin = None; - - let sort_op = |binary: &&ConstantBinary| match binary { - ConstantBinary::NotEq(_) => 2, - ConstantBinary::Eq(_) => 1, - _ => 3, - }; - for binary in binaries.iter().sorted_by_key(sort_op) { - if matches!(scope_margin, Some((Bound::Unbounded, Bound::Unbounded))) { - break; - } - match binary { - ConstantBinary::Scope { - min: Bound::Unbounded, - max: Bound::Unbounded, - } => { - scope_margin = Some((Bound::Unbounded, Bound::Unbounded)); - break; - } - ConstantBinary::Scope { min, max } => { - if let Some((scope_min, scope_max)) = &mut scope_margin { - if matches!( - Self::bound_compared(scope_min, min, true).map(Ordering::is_gt), - Some(true) - ) { - let _ = mem::replace(scope_min, min.clone()); - } - if matches!( - Self::bound_compared(scope_max, max, false).map(Ordering::is_lt), - Some(true) - ) { - let _ = mem::replace(scope_max, max.clone()); - } - } else { - scope_margin = Some((min.clone(), max.clone())) - } - - scopes.push((min, max)) - } - ConstantBinary::Eq(val) => { - let _ = eqs.insert(val.clone()); - } - ConstantBinary::NotEq(val) => { - let _ = eqs.remove(val); - } - _ => (), - } - } - if matches!( - scope_margin, - Some((Bound::Unbounded, Bound::Unbounded)) | None - ) { - binaries.clear(); - if eqs.is_empty() { - binaries.push(ConstantBinary::Scope { - min: Bound::Unbounded, - max: Bound::Unbounded, - }); - } else { - for val in eqs { - binaries.push(ConstantBinary::Eq(val)); - } - } - return; - } - - let mut merge_scopes: Vec<(Bound, Bound)> = Vec::new(); - - match scope_margin { - Some((Bound::Unbounded, _)) => { - if let Some((_, max)) = scopes.iter().max_by(|(_, max_a), (_, max_b)| { - Self::bound_compared(max_a, max_b, false).unwrap() - }) { - merge_scopes.push((Bound::Unbounded, (**max).clone())) - } - } - Some((_, Bound::Unbounded)) => { - if let Some((min, _)) = scopes.iter().min_by(|(min_a, _), (min_b, _)| { - Self::bound_compared(min_a, min_b, true).unwrap() - }) { - merge_scopes.push(((**min).clone(), Bound::Unbounded)) - } - } - _ => { - scopes.sort_by(|(min_a, _), (min_b, _)| { - Self::bound_compared(min_a, min_b, true).unwrap() - }); - - for (min, max) in scopes { - if merge_scopes.is_empty() { - merge_scopes.push((min.clone(), max.clone())); - continue; - } - - let last_pos = merge_scopes.len() - 1; - let last_scope: &mut _ = &mut merge_scopes[last_pos]; - if Self::bound_compared(&last_scope.0, min, true) - .unwrap() - .is_gt() - { - merge_scopes.push((min.clone(), max.clone())); - } else if Self::bound_compared(&last_scope.1, max, false) - .unwrap() - .is_lt() - { - last_scope.1 = max.clone(); - } - } - } - } - *binaries = merge_scopes - .into_iter() - .map(|(min, max)| ConstantBinary::Scope { min, max }) - .chain(eqs.into_iter().map(ConstantBinary::Eq)) - .collect_vec(); - } - - fn join_write(f: &mut Formatter, binaries: &[ConstantBinary], op: &str) -> fmt::Result { - let binaries = binaries.iter().map(|binary| format!("{}", binary)).join(op); - write!(f, " {} ", binaries)?; - - Ok(()) - } -} #[derive(Debug)] enum Replace { @@ -556,7 +133,7 @@ impl ScalarExpression { } } - fn unpack_val(&self) -> Option { + pub(crate) fn unpack_val(&self) -> Option { match self { ScalarExpression::Constant(val) => Some(val.clone()), ScalarExpression::Alias { expr, .. } => expr.unpack_val(), @@ -588,7 +165,7 @@ impl ScalarExpression { } } - fn unpack_col(&self, is_deep: bool) -> Option { + pub(crate) fn unpack_col(&self, is_deep: bool) -> Option { match self { ScalarExpression::ColumnRef(col) => Some(col.clone()), ScalarExpression::Alias { expr, .. } => expr.unpack_col(is_deep), @@ -959,230 +536,6 @@ impl ScalarExpression { ); } - /// Tips: The definition of `Or` is not the `Or` in the Where condition. - /// The And and Or of ConstantBinary are concerned with the data range that needs to be aggregated. - /// - [`ConstantBinary::And`]: Aggregate the minimum range of all conditions in and - /// - [`ConstantBinary::Or`]: Rearrange and sort the range of each OR data - pub fn convert_binary( - &self, - table_name: &str, - id: &ColumnId, - ) -> Result, DatabaseError> { - match self { - ScalarExpression::Binary { - left_expr, - right_expr, - op, - .. - } => { - match ( - left_expr.convert_binary(table_name, id)?, - right_expr.convert_binary(table_name, id)?, - ) { - (Some(left_binary), Some(right_binary)) => match (left_binary, right_binary) { - (ConstantBinary::And(mut left), ConstantBinary::And(mut right)) => match op - { - BinaryOperator::And => { - left.append(&mut right); - - Ok(Some(ConstantBinary::And(left))) - } - BinaryOperator::Or => Ok(Some(ConstantBinary::Or(vec![ - ConstantBinary::And(left), - ConstantBinary::And(right), - ]))), - BinaryOperator::Xor => todo!(), - _ => unreachable!(), - }, - (ConstantBinary::Or(mut left), ConstantBinary::Or(mut right)) => match op { - BinaryOperator::And | BinaryOperator::Or => { - left.append(&mut right); - - Ok(Some(ConstantBinary::Or(left))) - } - BinaryOperator::Xor => todo!(), - _ => unreachable!(), - }, - (ConstantBinary::And(mut left), ConstantBinary::Or(mut right)) => { - right.append(&mut left); - - Ok(Some(ConstantBinary::Or(right))) - } - (ConstantBinary::Or(mut left), ConstantBinary::And(mut right)) => { - left.append(&mut right); - - Ok(Some(ConstantBinary::Or(left))) - } - (ConstantBinary::And(mut binaries), binary) - | (binary, ConstantBinary::And(mut binaries)) => { - if op == &BinaryOperator::Or { - Ok(Some(ConstantBinary::Or(vec![ - binary, - ConstantBinary::And(binaries), - ]))) - } else { - binaries.push(binary); - - Ok(Some(ConstantBinary::And(binaries))) - } - } - (ConstantBinary::Or(mut binaries), binary) - | (binary, ConstantBinary::Or(mut binaries)) => { - if op == &BinaryOperator::And { - Ok(Some(ConstantBinary::And(vec![ - binary, - ConstantBinary::Or(binaries), - ]))) - } else { - binaries.push(binary); - - Ok(Some(ConstantBinary::Or(binaries))) - } - } - (left, right) => match op { - BinaryOperator::And => Ok(Some(ConstantBinary::And(vec![left, right]))), - BinaryOperator::Or => Ok(Some(ConstantBinary::Or(vec![left, right]))), - BinaryOperator::Xor => todo!(), - _ => Ok(None), - }, - }, - (None, None) => { - if let (Some(col), Some(val)) = - (left_expr.unpack_col(false), right_expr.unpack_val()) - { - return Ok(Self::new_binary(table_name, id, *op, col, val, false)); - } - if let (Some(val), Some(col)) = - (left_expr.unpack_val(), right_expr.unpack_col(false)) - { - return Ok(Self::new_binary(table_name, id, *op, col, val, true)); - } - - Ok(None) - } - (Some(binary), None) => { - Ok(Self::check_or(table_name, id, right_expr, op, binary)) - } - (None, Some(binary)) => { - Ok(Self::check_or(table_name, id, left_expr, op, binary)) - } - } - } - ScalarExpression::Alias { expr, .. } - | ScalarExpression::TypeCast { expr, .. } - | ScalarExpression::Unary { expr, .. } - | ScalarExpression::In { expr, .. } - | ScalarExpression::Between { expr, .. } - | ScalarExpression::SubString { expr, .. } => expr.convert_binary(table_name, id), - ScalarExpression::IsNull { expr, negated, .. } => match expr.as_ref() { - ScalarExpression::ColumnRef(column) => { - if let (Some(col_id), Some(col_table)) = (column.id(), column.table_name()) { - if id == &col_id && col_table.as_str() == table_name { - return Ok(Some(if *negated { - ConstantBinary::NotEq(NULL_VALUE.clone()) - } else { - ConstantBinary::Eq(NULL_VALUE.clone()) - })); - } - } - - Ok(None) - } - ScalarExpression::Constant(_) - | ScalarExpression::Alias { .. } - | ScalarExpression::TypeCast { .. } - | ScalarExpression::IsNull { .. } - | ScalarExpression::Unary { .. } - | ScalarExpression::Binary { .. } - | ScalarExpression::AggCall { .. } - | ScalarExpression::In { .. } - | ScalarExpression::Between { .. } - | ScalarExpression::SubString { .. } - | ScalarExpression::Function(_) - | ScalarExpression::If { .. } - | ScalarExpression::IfNull { .. } - | ScalarExpression::NullIf { .. } - | ScalarExpression::Coalesce { .. } - | ScalarExpression::CaseWhen { .. } => expr.convert_binary(table_name, id), - ScalarExpression::Tuple(_) - | ScalarExpression::Reference { .. } - | ScalarExpression::Empty => unreachable!(), - }, - ScalarExpression::Constant(_) | ScalarExpression::ColumnRef(_) => Ok(None), - // FIXME: support `convert_binary` - ScalarExpression::Tuple(_) - | ScalarExpression::AggCall { .. } - | ScalarExpression::Function(_) - | ScalarExpression::If { .. } - | ScalarExpression::IfNull { .. } - | ScalarExpression::NullIf { .. } - | ScalarExpression::Coalesce { .. } - | ScalarExpression::CaseWhen { .. } => Ok(None), - ScalarExpression::Reference { .. } | ScalarExpression::Empty => unreachable!(), - } - } - - /// check if: c1 > c2 or c1 > 1 - /// this case it makes no sense to just extract c1 > 1 - fn check_or( - table_name: &str, - col_id: &ColumnId, - right_expr: &ScalarExpression, - op: &BinaryOperator, - binary: ConstantBinary, - ) -> Option { - if matches!(op, BinaryOperator::Or) && right_expr.exist_column(table_name, col_id) { - return None; - } - - Some(binary) - } - - fn new_binary( - table_name: &str, - col_id: &ColumnId, - mut op: BinaryOperator, - col: ColumnRef, - val: ValueRef, - is_flip: bool, - ) -> Option { - if !Self::_is_belong(table_name, &col) || col.id() != Some(*col_id) { - return None; - } - - if is_flip { - op = match op { - BinaryOperator::Gt => BinaryOperator::Lt, - BinaryOperator::Lt => BinaryOperator::Gt, - BinaryOperator::GtEq => BinaryOperator::LtEq, - BinaryOperator::LtEq => BinaryOperator::GtEq, - source_op => source_op, - }; - } - - match op { - BinaryOperator::Gt => Some(ConstantBinary::Scope { - min: Bound::Excluded(val.clone()), - max: Bound::Unbounded, - }), - BinaryOperator::Lt => Some(ConstantBinary::Scope { - min: Bound::Unbounded, - max: Bound::Excluded(val.clone()), - }), - BinaryOperator::GtEq => Some(ConstantBinary::Scope { - min: Bound::Included(val.clone()), - max: Bound::Unbounded, - }), - BinaryOperator::LtEq => Some(ConstantBinary::Scope { - min: Bound::Unbounded, - max: Bound::Included(val.clone()), - }), - BinaryOperator::Eq | BinaryOperator::Spaceship => Some(ConstantBinary::Eq(val.clone())), - BinaryOperator::NotEq => Some(ConstantBinary::NotEq(val.clone())), - _ => None, - } - } - fn _is_belong(table_name: &str, col: &ColumnRef) -> bool { matches!( col.table_name().map(|name| table_name == name.as_str()), @@ -1190,715 +543,3 @@ impl ScalarExpression { ) } } - -impl fmt::Display for ConstantBinary { - fn fmt(&self, f: &mut Formatter) -> fmt::Result { - match self { - ConstantBinary::Scope { min, max } => { - match min { - Bound::Unbounded => write!(f, "-∞")?, - Bound::Included(value) => write!(f, "[{}", value)?, - Bound::Excluded(value) => write!(f, "({}", value)?, - } - - write!(f, ", ")?; - - match max { - Bound::Unbounded => write!(f, "+∞")?, - Bound::Included(value) => write!(f, "{}]", value)?, - Bound::Excluded(value) => write!(f, "{})", value)?, - } - - Ok(()) - } - ConstantBinary::Eq(value) => write!(f, "{}", value), - ConstantBinary::NotEq(value) => write!(f, "!{}", value), - ConstantBinary::And(binaries) => Self::join_write(f, binaries, " AND "), - ConstantBinary::Or(binaries) => Self::join_write(f, binaries, " OR "), - } - } -} - -#[cfg(test)] -mod test { - use crate::catalog::{ColumnCatalog, ColumnDesc, ColumnSummary}; - use crate::errors::DatabaseError; - use crate::expression::simplify::ConstantBinary; - use crate::expression::{BinaryOperator, ScalarExpression}; - use crate::types::value::DataValue; - use crate::types::LogicalType; - use std::collections::Bound; - use std::sync::Arc; - - #[test] - fn test_convert_binary_simple() -> Result<(), DatabaseError> { - let col_1 = Arc::new(ColumnCatalog { - summary: ColumnSummary { - id: Some(0), - name: "c1".to_string(), - table_name: Some(Arc::new("t1".to_string())), - }, - nullable: false, - desc: ColumnDesc { - column_datatype: LogicalType::Integer, - is_primary: false, - is_unique: false, - default: None, - }, - }); - let val_1 = Arc::new(DataValue::Int32(Some(1))); - - let binary_eq = ScalarExpression::Binary { - op: BinaryOperator::Eq, - left_expr: Box::new(ScalarExpression::Constant(val_1.clone())), - right_expr: Box::new(ScalarExpression::ColumnRef(col_1.clone())), - ty: LogicalType::Boolean, - } - .convert_binary("t1", &0)? - .unwrap(); - - assert_eq!(binary_eq, ConstantBinary::Eq(val_1.clone())); - - let binary_not_eq = ScalarExpression::Binary { - op: BinaryOperator::NotEq, - left_expr: Box::new(ScalarExpression::Constant(val_1.clone())), - right_expr: Box::new(ScalarExpression::ColumnRef(col_1.clone())), - ty: LogicalType::Boolean, - } - .convert_binary("t1", &0)? - .unwrap(); - - assert_eq!(binary_not_eq, ConstantBinary::NotEq(val_1.clone())); - - let binary_lt = ScalarExpression::Binary { - op: BinaryOperator::Lt, - left_expr: Box::new(ScalarExpression::ColumnRef(col_1.clone())), - right_expr: Box::new(ScalarExpression::Constant(val_1.clone())), - ty: LogicalType::Boolean, - } - .convert_binary("t1", &0)? - .unwrap(); - - assert_eq!( - binary_lt, - ConstantBinary::Scope { - min: Bound::Unbounded, - max: Bound::Excluded(val_1.clone()) - } - ); - - let binary_lteq = ScalarExpression::Binary { - op: BinaryOperator::LtEq, - left_expr: Box::new(ScalarExpression::ColumnRef(col_1.clone())), - right_expr: Box::new(ScalarExpression::Constant(val_1.clone())), - ty: LogicalType::Boolean, - } - .convert_binary("t1", &0)? - .unwrap(); - - assert_eq!( - binary_lteq, - ConstantBinary::Scope { - min: Bound::Unbounded, - max: Bound::Included(val_1.clone()) - } - ); - - let binary_gt = ScalarExpression::Binary { - op: BinaryOperator::Gt, - left_expr: Box::new(ScalarExpression::ColumnRef(col_1.clone())), - right_expr: Box::new(ScalarExpression::Constant(val_1.clone())), - ty: LogicalType::Boolean, - } - .convert_binary("t1", &0)? - .unwrap(); - - assert_eq!( - binary_gt, - ConstantBinary::Scope { - min: Bound::Excluded(val_1.clone()), - max: Bound::Unbounded - } - ); - - let binary_gteq = ScalarExpression::Binary { - op: BinaryOperator::GtEq, - left_expr: Box::new(ScalarExpression::ColumnRef(col_1.clone())), - right_expr: Box::new(ScalarExpression::Constant(val_1.clone())), - ty: LogicalType::Boolean, - } - .convert_binary("t1", &0)? - .unwrap(); - - assert_eq!( - binary_gteq, - ConstantBinary::Scope { - min: Bound::Included(val_1.clone()), - max: Bound::Unbounded - } - ); - - Ok(()) - } - - #[test] - fn test_scope_aggregation_eq_noteq() -> Result<(), DatabaseError> { - let val_0 = Arc::new(DataValue::Int32(Some(0))); - let val_1 = Arc::new(DataValue::Int32(Some(1))); - let val_2 = Arc::new(DataValue::Int32(Some(2))); - let val_3 = Arc::new(DataValue::Int32(Some(3))); - - let mut binary = ConstantBinary::And(vec![ - ConstantBinary::Eq(val_0.clone()), - ConstantBinary::NotEq(val_1.clone()), - ConstantBinary::Eq(val_2.clone()), - ConstantBinary::NotEq(val_3.clone()), - ]); - - binary.scope_aggregation()?; - - assert_eq!(binary, ConstantBinary::And(vec![])); - - Ok(()) - } - - #[test] - fn test_scope_aggregation_and_eq_noteq_cover() -> Result<(), DatabaseError> { - let val_0 = Arc::new(DataValue::Int32(Some(0))); - let val_1 = Arc::new(DataValue::Int32(Some(1))); - let val_2 = Arc::new(DataValue::Int32(Some(2))); - let val_3 = Arc::new(DataValue::Int32(Some(3))); - - let mut binary = ConstantBinary::And(vec![ - ConstantBinary::Eq(val_0.clone()), - ConstantBinary::NotEq(val_1.clone()), - ConstantBinary::Eq(val_2.clone()), - ConstantBinary::NotEq(val_3.clone()), - ConstantBinary::NotEq(val_0.clone()), - ConstantBinary::NotEq(val_1.clone()), - ConstantBinary::NotEq(val_2.clone()), - ConstantBinary::NotEq(val_3.clone()), - ]); - - binary.scope_aggregation()?; - - assert_eq!(binary, ConstantBinary::And(vec![])); - - Ok(()) - } - - #[test] - fn test_scope_aggregation_scope() -> Result<(), DatabaseError> { - let val_0 = Arc::new(DataValue::Int32(Some(0))); - let val_1 = Arc::new(DataValue::Int32(Some(1))); - let val_2 = Arc::new(DataValue::Int32(Some(2))); - let val_3 = Arc::new(DataValue::Int32(Some(3))); - - let mut binary = ConstantBinary::And(vec![ - ConstantBinary::Scope { - min: Bound::Excluded(val_0.clone()), - max: Bound::Included(val_3.clone()), - }, - ConstantBinary::Scope { - min: Bound::Included(val_1.clone()), - max: Bound::Excluded(val_2.clone()), - }, - ConstantBinary::Scope { - min: Bound::Excluded(val_1.clone()), - max: Bound::Included(val_2.clone()), - }, - ConstantBinary::Scope { - min: Bound::Included(val_0.clone()), - max: Bound::Excluded(val_3.clone()), - }, - ConstantBinary::Scope { - min: Bound::Unbounded, - max: Bound::Unbounded, - }, - ]); - - binary.scope_aggregation()?; - - assert_eq!( - binary, - ConstantBinary::And(vec![ConstantBinary::Scope { - min: Bound::Excluded(val_1.clone()), - max: Bound::Excluded(val_2.clone()), - }]) - ); - - Ok(()) - } - - #[test] - fn test_scope_aggregation_and_mixed() -> Result<(), DatabaseError> { - let val_0 = Arc::new(DataValue::Int32(Some(0))); - let val_1 = Arc::new(DataValue::Int32(Some(1))); - let val_2 = Arc::new(DataValue::Int32(Some(2))); - let val_3 = Arc::new(DataValue::Int32(Some(3))); - - let mut binary = ConstantBinary::And(vec![ - ConstantBinary::Scope { - min: Bound::Excluded(val_0.clone()), - max: Bound::Included(val_3.clone()), - }, - ConstantBinary::Scope { - min: Bound::Included(val_1.clone()), - max: Bound::Excluded(val_2.clone()), - }, - ConstantBinary::Scope { - min: Bound::Excluded(val_1.clone()), - max: Bound::Included(val_2.clone()), - }, - ConstantBinary::Scope { - min: Bound::Included(val_0.clone()), - max: Bound::Excluded(val_3.clone()), - }, - ConstantBinary::Scope { - min: Bound::Unbounded, - max: Bound::Unbounded, - }, - ConstantBinary::Eq(val_1.clone()), - ConstantBinary::Eq(val_0.clone()), - ConstantBinary::NotEq(val_1.clone()), - ]); - - binary.scope_aggregation()?; - - assert_eq!(binary, ConstantBinary::And(vec![])); - - Ok(()) - } - - #[test] - fn test_scope_aggregation_or() -> Result<(), DatabaseError> { - let val_0 = Arc::new(DataValue::Int32(Some(0))); - let val_1 = Arc::new(DataValue::Int32(Some(1))); - let val_2 = Arc::new(DataValue::Int32(Some(2))); - let val_3 = Arc::new(DataValue::Int32(Some(3))); - - let mut binary = ConstantBinary::Or(vec![ - ConstantBinary::Scope { - min: Bound::Excluded(val_0.clone()), - max: Bound::Included(val_3.clone()), - }, - ConstantBinary::Scope { - min: Bound::Included(val_1.clone()), - max: Bound::Excluded(val_2.clone()), - }, - ConstantBinary::Scope { - min: Bound::Excluded(val_1.clone()), - max: Bound::Included(val_2.clone()), - }, - ConstantBinary::Scope { - min: Bound::Included(val_0.clone()), - max: Bound::Excluded(val_3.clone()), - }, - ]); - - binary.scope_aggregation()?; - - assert_eq!( - binary, - ConstantBinary::Or(vec![ConstantBinary::Scope { - min: Bound::Included(val_0.clone()), - max: Bound::Included(val_3.clone()), - }]) - ); - - Ok(()) - } - - #[test] - fn test_scope_aggregation_or_unbounded() -> Result<(), DatabaseError> { - let val_0 = Arc::new(DataValue::Int32(Some(0))); - let val_1 = Arc::new(DataValue::Int32(Some(1))); - let val_2 = Arc::new(DataValue::Int32(Some(2))); - let val_3 = Arc::new(DataValue::Int32(Some(3))); - - let mut binary = ConstantBinary::Or(vec![ - ConstantBinary::Scope { - min: Bound::Unbounded, - max: Bound::Included(val_3.clone()), - }, - ConstantBinary::Scope { - min: Bound::Unbounded, - max: Bound::Excluded(val_2.clone()), - }, - ConstantBinary::Scope { - min: Bound::Excluded(val_1.clone()), - max: Bound::Unbounded, - }, - ConstantBinary::Scope { - min: Bound::Included(val_0.clone()), - max: Bound::Unbounded, - }, - ]); - - binary.scope_aggregation()?; - - assert_eq!( - binary, - ConstantBinary::Or(vec![ConstantBinary::Scope { - min: Bound::Unbounded, - max: Bound::Unbounded - }]) - ); - - Ok(()) - } - - #[test] - fn test_scope_aggregation_or_eq_noteq_cover() -> Result<(), DatabaseError> { - let val_0 = Arc::new(DataValue::Int32(Some(0))); - let val_1 = Arc::new(DataValue::Int32(Some(1))); - let val_2 = Arc::new(DataValue::Int32(Some(2))); - let val_3 = Arc::new(DataValue::Int32(Some(3))); - - let mut binary = ConstantBinary::Or(vec![ - ConstantBinary::Eq(val_0.clone()), - ConstantBinary::NotEq(val_1.clone()), - ConstantBinary::Eq(val_2.clone()), - ConstantBinary::NotEq(val_3.clone()), - ConstantBinary::NotEq(val_0.clone()), - ConstantBinary::NotEq(val_1.clone()), - ConstantBinary::NotEq(val_2.clone()), - ConstantBinary::NotEq(val_3.clone()), - ]); - - binary.scope_aggregation()?; - - assert_eq!( - binary, - ConstantBinary::Or(vec![ConstantBinary::Scope { - min: Bound::Unbounded, - max: Bound::Unbounded - }]) - ); - - Ok(()) - } - - #[test] - fn test_scope_aggregation_or_and_mixed() -> Result<(), DatabaseError> { - let val_0 = Arc::new(DataValue::Int32(Some(0))); - let val_1 = Arc::new(DataValue::Int32(Some(1))); - let val_2 = Arc::new(DataValue::Int32(Some(2))); - let val_3 = Arc::new(DataValue::Int32(Some(3))); - - let mut binary = ConstantBinary::Or(vec![ - ConstantBinary::And(vec![ - ConstantBinary::Scope { - min: Bound::Excluded(val_0.clone()), - max: Bound::Included(val_2.clone()), - }, - ConstantBinary::Scope { - min: Bound::Included(val_0.clone()), - max: Bound::Excluded(val_2.clone()), - }, - ]), - ConstantBinary::And(vec![ - ConstantBinary::Scope { - min: Bound::Excluded(val_1.clone()), - max: Bound::Included(val_3.clone()), - }, - ConstantBinary::Scope { - min: Bound::Included(val_1.clone()), - max: Bound::Excluded(val_3.clone()), - }, - ]), - ]); - - binary.scope_aggregation()?; - - assert_eq!( - binary, - ConstantBinary::Or(vec![ConstantBinary::Scope { - min: Bound::Excluded(val_0.clone()), - max: Bound::Excluded(val_3.clone()), - },]) - ); - - Ok(()) - } - - #[test] - fn test_scope_aggregation_and_converse() -> Result<(), DatabaseError> { - let val_0 = Arc::new(DataValue::Int32(Some(0))); - let val_3 = Arc::new(DataValue::Int32(Some(3))); - - let mut binary = ConstantBinary::And(vec![ - ConstantBinary::Scope { - min: Bound::Included(val_3.clone()), - max: Bound::Unbounded, - }, - ConstantBinary::Scope { - min: Bound::Unbounded, - max: Bound::Included(val_0.clone()), - }, - ]); - - binary.scope_aggregation()?; - - assert_eq!(binary, ConstantBinary::And(vec![])); - - Ok(()) - } - - #[test] - fn test_scope_aggregation_or_converse() -> Result<(), DatabaseError> { - let val_0 = Arc::new(DataValue::Int32(Some(0))); - let val_3 = Arc::new(DataValue::Int32(Some(3))); - - let mut binary = ConstantBinary::Or(vec![ - ConstantBinary::Scope { - min: Bound::Included(val_3.clone()), - max: Bound::Unbounded, - }, - ConstantBinary::Scope { - min: Bound::Unbounded, - max: Bound::Included(val_0.clone()), - }, - ]); - - binary.scope_aggregation()?; - - assert_eq!( - binary, - ConstantBinary::Or(vec![ConstantBinary::Scope { - min: Bound::Unbounded, - max: Bound::Unbounded - }]) - ); - - Ok(()) - } - - #[test] - fn test_scope_aggregation_or_scopes() -> Result<(), DatabaseError> { - let val_0 = Arc::new(DataValue::Int32(Some(0))); - let val_1 = Arc::new(DataValue::Int32(Some(1))); - let val_2 = Arc::new(DataValue::Int32(Some(2))); - let val_3 = Arc::new(DataValue::Int32(Some(3))); - - let mut binary = ConstantBinary::Or(vec![ - ConstantBinary::Scope { - min: Bound::Excluded(val_0.clone()), - max: Bound::Included(val_2.clone()), - }, - ConstantBinary::Scope { - min: Bound::Included(val_0.clone()), - max: Bound::Excluded(val_2.clone()), - }, - ConstantBinary::Scope { - min: Bound::Excluded(val_1.clone()), - max: Bound::Included(val_3.clone()), - }, - ConstantBinary::Scope { - min: Bound::Included(val_1.clone()), - max: Bound::Excluded(val_3.clone()), - }, - ]); - - binary.scope_aggregation()?; - - assert_eq!( - binary, - ConstantBinary::Or(vec![ConstantBinary::Scope { - min: Bound::Included(val_0.clone()), - max: Bound::Included(val_3.clone()), - },]) - ); - - Ok(()) - } - - #[test] - fn test_scope_aggregation_or_and_mixed_1() -> Result<(), DatabaseError> { - let val_5 = Arc::new(DataValue::Int32(Some(5))); - let val_6 = Arc::new(DataValue::Int32(Some(6))); - let val_8 = Arc::new(DataValue::Int32(Some(8))); - let val_12 = Arc::new(DataValue::Int32(Some(12))); - - let mut binary = ConstantBinary::Or(vec![ - ConstantBinary::Eq(val_5.clone()), - ConstantBinary::And(vec![ - ConstantBinary::Scope { - min: Bound::Excluded(val_5.clone()), - max: Bound::Unbounded, - }, - ConstantBinary::Or(vec![ - ConstantBinary::Scope { - min: Bound::Excluded(val_6.clone()), - max: Bound::Unbounded, - }, - ConstantBinary::Scope { - min: Bound::Unbounded, - max: Bound::Excluded(val_8.clone()), - }, - ]), - ConstantBinary::Scope { - min: Bound::Unbounded, - max: Bound::Excluded(val_12.clone()), - }, - ]), - ]); - - binary.scope_aggregation()?; - - assert_eq!( - binary, - ConstantBinary::Or(vec![ - ConstantBinary::Scope { - min: Bound::Excluded(val_5.clone()), - max: Bound::Excluded(val_12.clone()), - }, - ConstantBinary::Eq(val_5) - ]) - ); - - Ok(()) - } - - #[test] - fn test_scope_aggregation_or_lower_unbounded() -> Result<(), DatabaseError> { - let val_0 = Arc::new(DataValue::Int32(Some(2))); - let val_1 = Arc::new(DataValue::Int32(Some(3))); - - let mut binary = ConstantBinary::Or(vec![ - ConstantBinary::Scope { - min: Bound::Unbounded, - max: Bound::Excluded(val_0.clone()), - }, - ConstantBinary::Scope { - min: Bound::Unbounded, - max: Bound::Included(val_0.clone()), - }, - ConstantBinary::Scope { - min: Bound::Unbounded, - max: Bound::Excluded(val_1.clone()), - }, - ConstantBinary::Scope { - min: Bound::Unbounded, - max: Bound::Included(val_1.clone()), - }, - ]); - - binary.scope_aggregation()?; - - assert_eq!( - binary, - ConstantBinary::Or(vec![ConstantBinary::Scope { - min: Bound::Unbounded, - max: Bound::Included(val_1.clone()), - }]) - ); - - Ok(()) - } - - #[test] - fn test_scope_aggregation_or_upper_unbounded() -> Result<(), DatabaseError> { - let val_0 = Arc::new(DataValue::Int32(Some(2))); - let val_1 = Arc::new(DataValue::Int32(Some(3))); - - let mut binary = ConstantBinary::Or(vec![ - ConstantBinary::Scope { - min: Bound::Excluded(val_0.clone()), - max: Bound::Unbounded, - }, - ConstantBinary::Scope { - min: Bound::Included(val_0.clone()), - max: Bound::Unbounded, - }, - ConstantBinary::Scope { - min: Bound::Excluded(val_1.clone()), - max: Bound::Unbounded, - }, - ConstantBinary::Scope { - min: Bound::Included(val_1.clone()), - max: Bound::Unbounded, - }, - ]); - - binary.scope_aggregation()?; - - assert_eq!( - binary, - ConstantBinary::Or(vec![ConstantBinary::Scope { - min: Bound::Included(val_0.clone()), - max: Bound::Unbounded, - }]) - ); - - Ok(()) - } - - #[test] - fn test_rearrange() -> Result<(), DatabaseError> { - let val_0 = Arc::new(DataValue::Int32(Some(0))); - let val_1 = Arc::new(DataValue::Int32(Some(1))); - let val_2 = Arc::new(DataValue::Int32(Some(2))); - let val_3 = Arc::new(DataValue::Int32(Some(3))); - - let val_5 = Arc::new(DataValue::Int32(Some(5))); - - let val_6 = Arc::new(DataValue::Int32(Some(6))); - let val_7 = Arc::new(DataValue::Int32(Some(7))); - let val_8 = Arc::new(DataValue::Int32(Some(8))); - - let val_10 = Arc::new(DataValue::Int32(Some(10))); - - let binary = ConstantBinary::Or(vec![ - ConstantBinary::Scope { - min: Bound::Excluded(val_6.clone()), - max: Bound::Included(val_10.clone()), - }, - ConstantBinary::Scope { - min: Bound::Excluded(val_0.clone()), - max: Bound::Included(val_3.clone()), - }, - ConstantBinary::Scope { - min: Bound::Included(val_1.clone()), - max: Bound::Excluded(val_2.clone()), - }, - ConstantBinary::Scope { - min: Bound::Excluded(val_1.clone()), - max: Bound::Included(val_2.clone()), - }, - ConstantBinary::Scope { - min: Bound::Included(val_0.clone()), - max: Bound::Excluded(val_3.clone()), - }, - ConstantBinary::Scope { - min: Bound::Included(val_6.clone()), - max: Bound::Included(val_7.clone()), - }, - ConstantBinary::Scope { - min: Bound::Unbounded, - max: Bound::Unbounded, - }, - ConstantBinary::NotEq(val_8.clone()), - ConstantBinary::Eq(val_5.clone()), - ConstantBinary::Eq(val_0.clone()), - ConstantBinary::Eq(val_1.clone()), - ]); - - assert_eq!( - binary.rearrange()?, - vec![ - ConstantBinary::Scope { - min: Bound::Included(val_0.clone()), - max: Bound::Included(val_3.clone()), - }, - ConstantBinary::Eq(val_5.clone()), - ConstantBinary::Scope { - min: Bound::Included(val_6.clone()), - max: Bound::Included(val_10.clone()), - } - ] - ); - - Ok(()) - } -} diff --git a/src/optimizer/core/cm_sketch.rs b/src/optimizer/core/cm_sketch.rs index b8df970d..c232bbfe 100644 --- a/src/optimizer/core/cm_sketch.rs +++ b/src/optimizer/core/cm_sketch.rs @@ -1,4 +1,4 @@ -use crate::expression::simplify::ConstantBinary; +use crate::expression::range_detacher::Range; use crate::types::value::DataValue; use rand::RngCore; use serde::{Deserialize, Serialize}; @@ -22,19 +22,13 @@ pub struct CountMinSketch { } impl CountMinSketch { - /// Tips: - /// - binaries must be used `ConstantBinary::scope_aggregation` and `ConstantBinary::rearrange` - /// - just count with `ConstantBinary::Eq` - pub fn collect_count(&self, binaries: &[ConstantBinary]) -> usize { + pub fn collect_count(&self, ranges: &[Range]) -> usize { let mut count = 0; - for binary in binaries { - count += match binary { - ConstantBinary::Eq(value) => self.estimate(value), - ConstantBinary::NotEq(_) | ConstantBinary::Scope { .. } => 0, - ConstantBinary::And(binaries) | ConstantBinary::Or(binaries) => { - self.collect_count(binaries) - } + for range in ranges { + count += match range { + Range::Eq(value) => self.estimate(value), + _ => 0, } } @@ -165,7 +159,7 @@ impl CountMinSketch { #[cfg(test)] mod tests { - use crate::expression::simplify::ConstantBinary; + use crate::expression::range_detacher::Range; use crate::optimizer::core::cm_sketch::CountMinSketch; use crate::types::value::DataValue; use std::collections::Bound; @@ -199,8 +193,8 @@ mod tests { } assert_eq!( cms.collect_count(&vec![ - ConstantBinary::Eq(Arc::new(DataValue::Int32(Some(300)))), - ConstantBinary::Scope { + Range::Eq(Arc::new(DataValue::Int32(Some(300)))), + Range::Scope { min: Bound::Unbounded, max: Bound::Unbounded, } diff --git a/src/optimizer/core/column_meta.rs b/src/optimizer/core/column_meta.rs index 20ed2dea..2039f6dd 100644 --- a/src/optimizer/core/column_meta.rs +++ b/src/optimizer/core/column_meta.rs @@ -1,6 +1,6 @@ use crate::catalog::TableName; use crate::errors::DatabaseError; -use crate::expression::simplify::ConstantBinary; +use crate::expression::range_detacher::Range; use crate::optimizer::core::cm_sketch::CountMinSketch; use crate::optimizer::core::histogram::Histogram; use crate::storage::Transaction; @@ -11,6 +11,7 @@ use serde::{Deserialize, Serialize}; use std::fs::OpenOptions; use std::io::{Read, Write}; use std::path::Path; +use std::slice; pub struct ColumnMetaLoader<'a, T: Transaction> { cache: &'a ShardingLruCache>, @@ -72,12 +73,15 @@ impl ColumnMeta { &self.histogram } - /// Tips: - /// - binaries must be used `ConstantBinary::scope_aggregation` and `ConstantBinary::rearrange` - pub fn collect_count(&self, binaries: &[ConstantBinary]) -> usize { + pub fn collect_count(&self, range: &Range) -> usize { let mut count = 0; - count += self.histogram.collect_count(binaries, &self.cm_sketch); + let ranges = if let Range::SortedRanges(ranges) = range { + ranges.as_slice() + } else { + slice::from_ref(range) + }; + count += self.histogram.collect_count(ranges, &self.cm_sketch); count } diff --git a/src/optimizer/core/histogram.rs b/src/optimizer/core/histogram.rs index 9eb517cf..dac70bda 100644 --- a/src/optimizer/core/histogram.rs +++ b/src/optimizer/core/histogram.rs @@ -1,7 +1,7 @@ use crate::catalog::ColumnCatalog; use crate::errors::DatabaseError; use crate::execution::volcano::dql::sort::radix_sort; -use crate::expression::simplify::ConstantBinary; +use crate::expression::range_detacher::Range; use crate::optimizer::core::cm_sketch::CountMinSketch; use crate::types::value::{DataValue, ValueRef}; use crate::types::{ColumnId, LogicalType}; @@ -212,13 +212,8 @@ impl Histogram { self.values_len } - /// Tips: binaries must be used `ConstantBinary::scope_aggregation` and `ConstantBinary::rearrange` - pub fn collect_count( - &self, - binaries: &[ConstantBinary], - sketch: &CountMinSketch, - ) -> usize { - if self.buckets.is_empty() || binaries.is_empty() { + pub fn collect_count(&self, ranges: &[Range], sketch: &CountMinSketch) -> usize { + if self.buckets.is_empty() || ranges.is_empty() { return 0; } @@ -227,15 +222,18 @@ impl Histogram { let mut bucket_i = 0; let mut bucket_idxs = Vec::new(); - while bucket_i < self.buckets.len() && binary_i < binaries.len() { - self._collect_count( - binaries, + while bucket_i < self.buckets.len() && binary_i < ranges.len() { + let is_dummy = self._collect_count( + ranges, &mut binary_i, &mut bucket_i, &mut bucket_idxs, &mut count, sketch, ); + if is_dummy { + return 0; + } } bucket_idxs @@ -247,13 +245,13 @@ impl Histogram { fn _collect_count( &self, - binaries: &[ConstantBinary], + ranges: &[Range], binary_i: &mut usize, bucket_i: &mut usize, bucket_idxs: &mut Vec, count: &mut usize, sketch: &CountMinSketch, - ) { + ) -> bool { let float_value = |value: &DataValue, prefix_len: usize| { match value.logical_type() { LogicalType::Varchar(_) => match value { @@ -320,8 +318,8 @@ impl Histogram { let distinct_1 = OrderedFloat(1.0 / self.number_of_distinct_value as f64); - match &binaries[*binary_i] { - ConstantBinary::Scope { min, max } => { + match &ranges[*binary_i] { + Range::Scope { min, max } => { let bucket = &self.buckets[*bucket_i]; let mut temp_count = 0; @@ -406,21 +404,15 @@ impl Histogram { } *count += cmp::max(temp_count, 0); } - ConstantBinary::Eq(value) => { + Range::Eq(value) => { *count += sketch.estimate(value); *binary_i += 1 } - ConstantBinary::NotEq(_) => (), - ConstantBinary::And(inner_binaries) | ConstantBinary::Or(inner_binaries) => self - ._collect_count( - inner_binaries, - binary_i, - bucket_i, - bucket_idxs, - count, - sketch, - ), + Range::Dummy => return true, + Range::SortedRanges(_) => unreachable!(), } + + false } } @@ -440,7 +432,7 @@ impl Bucket { mod tests { use crate::catalog::{ColumnCatalog, ColumnDesc, ColumnSummary}; use crate::errors::DatabaseError; - use crate::expression::simplify::ConstantBinary; + use crate::expression::range_detacher::Range; use crate::optimizer::core::histogram::{Bucket, HistogramBuilder}; use crate::types::value::DataValue; use crate::types::LogicalType; @@ -690,8 +682,8 @@ mod tests { let count_1 = histogram.collect_count( &vec![ - ConstantBinary::Eq(Arc::new(DataValue::Int32(Some(2)))), - ConstantBinary::Scope { + Range::Eq(Arc::new(DataValue::Int32(Some(2)))), + Range::Scope { min: Bound::Included(Arc::new(DataValue::Int32(Some(4)))), max: Bound::Excluded(Arc::new(DataValue::Int32(Some(12)))), }, @@ -702,7 +694,7 @@ mod tests { assert_eq!(count_1, 9); let count_2 = histogram.collect_count( - &vec![ConstantBinary::Scope { + &vec![Range::Scope { min: Bound::Included(Arc::new(DataValue::Int32(Some(4)))), max: Bound::Unbounded, }], @@ -712,7 +704,7 @@ mod tests { assert_eq!(count_2, 11); let count_3 = histogram.collect_count( - &vec![ConstantBinary::Scope { + &vec![Range::Scope { min: Bound::Excluded(Arc::new(DataValue::Int32(Some(7)))), max: Bound::Unbounded, }], @@ -722,7 +714,7 @@ mod tests { assert_eq!(count_3, 7); let count_4 = histogram.collect_count( - &vec![ConstantBinary::Scope { + &vec![Range::Scope { min: Bound::Unbounded, max: Bound::Included(Arc::new(DataValue::Int32(Some(11)))), }], @@ -732,7 +724,7 @@ mod tests { assert_eq!(count_4, 12); let count_5 = histogram.collect_count( - &vec![ConstantBinary::Scope { + &vec![Range::Scope { min: Bound::Unbounded, max: Bound::Excluded(Arc::new(DataValue::Int32(Some(8)))), }], @@ -742,7 +734,7 @@ mod tests { assert_eq!(count_5, 8); let count_6 = histogram.collect_count( - &vec![ConstantBinary::Scope { + &vec![Range::Scope { min: Bound::Included(Arc::new(DataValue::Int32(Some(2)))), max: Bound::Unbounded, }], @@ -752,7 +744,7 @@ mod tests { assert_eq!(count_6, 13); let count_7 = histogram.collect_count( - &vec![ConstantBinary::Scope { + &vec![Range::Scope { min: Bound::Excluded(Arc::new(DataValue::Int32(Some(1)))), max: Bound::Unbounded, }], @@ -762,7 +754,7 @@ mod tests { assert_eq!(count_7, 13); let count_8 = histogram.collect_count( - &vec![ConstantBinary::Scope { + &vec![Range::Scope { min: Bound::Unbounded, max: Bound::Included(Arc::new(DataValue::Int32(Some(12)))), }], @@ -772,7 +764,7 @@ mod tests { assert_eq!(count_8, 13); let count_9 = histogram.collect_count( - &vec![ConstantBinary::Scope { + &vec![Range::Scope { min: Bound::Unbounded, max: Bound::Excluded(Arc::new(DataValue::Int32(Some(13)))), }], @@ -782,7 +774,7 @@ mod tests { assert_eq!(count_9, 13); let count_10 = histogram.collect_count( - &vec![ConstantBinary::Scope { + &vec![Range::Scope { min: Bound::Excluded(Arc::new(DataValue::Int32(Some(0)))), max: Bound::Excluded(Arc::new(DataValue::Int32(Some(3)))), }], @@ -792,7 +784,7 @@ mod tests { assert_eq!(count_10, 2); let count_11 = histogram.collect_count( - &vec![ConstantBinary::Scope { + &vec![Range::Scope { min: Bound::Included(Arc::new(DataValue::Int32(Some(1)))), max: Bound::Included(Arc::new(DataValue::Int32(Some(2)))), }], diff --git a/src/optimizer/core/memo.rs b/src/optimizer/core/memo.rs index 2e2a729f..059bbff0 100644 --- a/src/optimizer/core/memo.rs +++ b/src/optimizer/core/memo.rs @@ -83,7 +83,7 @@ mod tests { use crate::binder::{Binder, BinderContext}; use crate::db::DataBaseBuilder; use crate::errors::DatabaseError; - use crate::expression::simplify::ConstantBinary; + use crate::expression::range_detacher::Range; use crate::optimizer::core::memo::Memo; use crate::optimizer::heuristic::batch::HepBatchStrategy; use crate::optimizer::heuristic::graph::HepGraph; @@ -172,13 +172,13 @@ mod tests { is_unique: false, is_primary: true, }), - ranges: Some(vec![ - ConstantBinary::Eq(Arc::new(DataValue::Int32(Some(2)))), - ConstantBinary::Scope { + range: Some(Range::SortedRanges(vec![ + Range::Eq(Arc::new(DataValue::Int32(Some(2)))), + Range::Scope { min: Bound::Excluded(Arc::new(DataValue::Int32(Some(40)))), max: Bound::Unbounded, } - ]), + ])), })) ); diff --git a/src/optimizer/rule/implementation/dql/scan.rs b/src/optimizer/rule/implementation/dql/scan.rs index ce624679..ddc034dc 100644 --- a/src/optimizer/rule/implementation/dql/scan.rs +++ b/src/optimizer/rule/implementation/dql/scan.rs @@ -71,17 +71,17 @@ impl ImplementationRule for IndexScanImplementation { if let Operator::Scan(scan_op) = op { let column_metas = loader.load(scan_op.table_name.clone())?; for index_info in scan_op.index_infos.iter() { - if index_info.ranges.is_none() { + if index_info.range.is_none() { continue; } let mut cost = None; - if let Some(binaries) = &index_info.ranges { + if let Some(range) = &index_info.range { // FIXME: Only UniqueIndex if let Some(column_meta) = find_column_meta(column_metas, &index_info.meta.column_ids[0]) { - let mut row_count = column_meta.collect_count(binaries); + let mut row_count = column_meta.collect_count(range); if !index_info.meta.is_primary { // need to return table query(non-covering index) diff --git a/src/optimizer/rule/normalization/pushdown_predicates.rs b/src/optimizer/rule/normalization/pushdown_predicates.rs index 2e9420e6..0e680c6c 100644 --- a/src/optimizer/rule/normalization/pushdown_predicates.rs +++ b/src/optimizer/rule/normalization/pushdown_predicates.rs @@ -1,5 +1,6 @@ use crate::catalog::ColumnRef; use crate::errors::DatabaseError; +use crate::expression::range_detacher::RangeDetacher; use crate::expression::{BinaryOperator, ScalarExpression}; use crate::optimizer::core::pattern::Pattern; use crate::optimizer::core::pattern::PatternChildrenPredicate; @@ -218,19 +219,9 @@ impl NormalizationRule for PushPredicateIntoScan { if let Some(child_id) = graph.eldest_child_at(node_id) { if let Operator::Scan(child_op) = graph.operator_mut(child_id) { //FIXME: now only support `unique` and `primary key` - for IndexInfo { meta, ranges } in &mut child_op.index_infos { - let mut option = op - .predicate - .convert_binary(meta.table_name.as_str(), &meta.column_ids[0])?; - - if let Some(mut binary) = option.take() { - binary.scope_aggregation()?; - let rearrange_ranges = binary.rearrange()?; - - let _ = ranges.replace(rearrange_ranges); - - return Ok(()); - } + for IndexInfo { meta, range } in &mut child_op.index_infos { + *range = RangeDetacher::new(meta.table_name.as_str(), &meta.column_ids[0]) + .detach(&op.predicate)?; } } } @@ -244,7 +235,7 @@ impl NormalizationRule for PushPredicateIntoScan { mod tests { use crate::binder::test::select_sql_run; use crate::errors::DatabaseError; - use crate::expression::simplify::ConstantBinary::Scope; + use crate::expression::range_detacher::Range; use crate::expression::{BinaryOperator, ScalarExpression}; use crate::optimizer::heuristic::batch::HepBatchStrategy; use crate::optimizer::heuristic::optimizer::HepOptimizer; @@ -275,12 +266,12 @@ mod tests { .find_best::(None)?; if let Operator::Scan(op) = &best_plan.childrens[0].childrens[0].operator { - let mock_binaries = vec![Scope { + let mock_range = Range::Scope { min: Bound::Excluded(Arc::new(DataValue::Int32(Some(1)))), max: Bound::Unbounded, - }]; + }; - assert_eq!(op.index_infos[1].ranges, Some(mock_binaries)); + assert_eq!(op.index_infos[1].range, Some(mock_range)); } else { unreachable!("Should be a filter operator") } diff --git a/src/optimizer/rule/normalization/simplification.rs b/src/optimizer/rule/normalization/simplification.rs index 6162da14..c4fbf138 100644 --- a/src/optimizer/rule/normalization/simplification.rs +++ b/src/optimizer/rule/normalization/simplification.rs @@ -114,17 +114,16 @@ mod test { use crate::binder::test::select_sql_run; use crate::catalog::{ColumnCatalog, ColumnDesc, ColumnSummary}; use crate::errors::DatabaseError; - use crate::expression::simplify::ConstantBinary; + use crate::expression::range_detacher::{Range, RangeDetacher}; use crate::expression::{BinaryOperator, ScalarExpression, UnaryOperator}; use crate::optimizer::heuristic::batch::HepBatchStrategy; use crate::optimizer::heuristic::optimizer::HepOptimizer; use crate::optimizer::rule::normalization::NormalizationRuleImpl; - use crate::planner::operator::filter::FilterOperator; use crate::planner::operator::Operator; use crate::planner::LogicalPlan; use crate::storage::kip::KipTransaction; use crate::types::value::DataValue; - use crate::types::LogicalType; + use crate::types::{ColumnId, LogicalType}; use std::collections::Bound; use std::sync::Arc; @@ -157,12 +156,16 @@ mod test { unreachable!(); } if let Operator::Filter(filter_op) = best_plan.childrens[0].clone().operator { - let column_binary = filter_op.predicate.convert_binary("t1", &0).unwrap(); - let final_binary = ConstantBinary::Scope { - min: Bound::Unbounded, - max: Bound::Excluded(Arc::new(DataValue::Int32(Some(-2)))), - }; - assert_eq!(column_binary, Some(final_binary)); + let range = RangeDetacher::new("t1", &0) + .detach(&filter_op.predicate)? + .unwrap(); + assert_eq!( + range, + Range::Scope { + min: Bound::Unbounded, + max: Bound::Excluded(Arc::new(DataValue::Int32(Some(-2)))), + } + ); } else { unreachable!(); } @@ -196,7 +199,7 @@ mod test { // c1 < 24 let plan_10 = select_sql_run("select * from t1 where 24 < (-1 - c1) + 1").await?; - let op = |plan: LogicalPlan, expr: &str| -> Result, DatabaseError> { + let op = |plan: LogicalPlan| -> Result, DatabaseError> { let best_plan = HepOptimizer::new(plan.clone()) .batch( "test_simplify_filter".to_string(), @@ -205,22 +208,17 @@ mod test { ) .find_best::(None)?; if let Operator::Filter(filter_op) = best_plan.childrens[0].clone().operator { - println!( - "{expr}: {:#?}", - filter_op.predicate.convert_binary("t1", &0).unwrap() - ); - - Ok(filter_op.predicate.convert_binary("t1", &0).unwrap()) + Ok(RangeDetacher::new("t1", &0).detach(&filter_op.predicate)?) } else { Ok(None) } }; - let op_1 = op(plan_1, "-(c1 + 1) > 1")?; - let op_2 = op(plan_2, "-(1 - c1) > 1")?; - let op_3 = op(plan_3, "-c1 > 1")?; - let op_4 = op(plan_4, "c1 + 1 > 1")?; - let op_5 = op(plan_9, "(-1 - c1) + 1 > 24")?; + let op_1 = op(plan_1)?; + let op_2 = op(plan_2)?; + let op_3 = op(plan_3)?; + let op_4 = op(plan_4)?; + let op_5 = op(plan_9)?; assert!(op_1.is_some()); assert!(op_2.is_some()); @@ -228,11 +226,11 @@ mod test { assert!(op_4.is_some()); assert!(op_5.is_some()); - assert_eq!(op_1, op(plan_5, "1 < -(c1 + 1)")?); - assert_eq!(op_2, op(plan_6, "1 < -(1 - c1)")?); - assert_eq!(op_3, op(plan_7, "1 < -c1")?); - assert_eq!(op_4, op(plan_8, "1 < c1 + 1")?); - assert_eq!(op_5, op(plan_10, "24 < (-1 - c1) + 1")?); + assert_eq!(op_1, op(plan_5)?); + assert_eq!(op_2, op(plan_6)?); + assert_eq!(op_3, op(plan_7)?); + assert_eq!(op_4, op(plan_8)?); + assert_eq!(op_5, op(plan_10)?); Ok(()) } @@ -306,7 +304,10 @@ mod test { Ok(()) } - fn plan_filter(plan: LogicalPlan, expr: &str) -> Result, DatabaseError> { + fn plan_filter( + plan: &LogicalPlan, + column_id: &ColumnId, + ) -> Result, DatabaseError> { let best_plan = HepOptimizer::new(plan.clone()) .batch( "test_simplify_filter".to_string(), @@ -315,9 +316,7 @@ mod test { ) .find_best::(None)?; if let Operator::Filter(filter_op) = best_plan.childrens[0].clone().operator { - println!("{expr}: {:#?}", filter_op); - - Ok(Some(filter_op)) + Ok(RangeDetacher::new("t1", &column_id).detach(&filter_op.predicate)?) } else { Ok(None) } @@ -336,89 +335,73 @@ mod test { // c1 > 0 let plan_4 = select_sql_run("select * from t1 where c1 + 1 > 1 and -c2 > 1").await?; - let op_1 = plan_filter(plan_1, "-(c1 + 1) > 1 and -(1 - c2) > 1")?.unwrap(); - let op_2 = plan_filter(plan_2, "-(1 - c1) > 1 and -(c2 + 1) > 1")?.unwrap(); - let op_3 = plan_filter(plan_3, "-c1 > 1 and c2 + 1 > 1")?.unwrap(); - let op_4 = plan_filter(plan_4, "c1 + 1 > 1 and -c2 > 1")?.unwrap(); + let range_1_c1 = plan_filter(&plan_1, &0)?.unwrap(); + let range_1_c2 = plan_filter(&plan_1, &1)?.unwrap(); + + let range_2_c1 = plan_filter(&plan_2, &0)?.unwrap(); + let range_2_c2 = plan_filter(&plan_2, &1)?.unwrap(); + + let range_3_c1 = plan_filter(&plan_3, &0)?.unwrap(); + let range_3_c2 = plan_filter(&plan_3, &1)?.unwrap(); + + let range_4_c1 = plan_filter(&plan_4, &0)?.unwrap(); + let range_4_c2 = plan_filter(&plan_4, &1)?.unwrap(); - let cb_1_c1 = op_1.predicate.convert_binary("t1", &0).unwrap(); - println!("op_1 => c1: {:#?}", cb_1_c1); assert_eq!( - cb_1_c1, - Some(ConstantBinary::Scope { + range_1_c1, + Range::Scope { min: Bound::Unbounded, max: Bound::Excluded(Arc::new(DataValue::Int32(Some(-2)))) - }) + } ); - - let cb_1_c2 = op_1.predicate.convert_binary("t1", &1).unwrap(); - println!("op_1 => c2: {:#?}", cb_1_c2); assert_eq!( - cb_1_c2, - Some(ConstantBinary::Scope { + range_1_c2, + Range::Scope { min: Bound::Excluded(Arc::new(DataValue::Int32(Some(2)))), max: Bound::Unbounded - }) + } ); - - let cb_2_c1 = op_2.predicate.convert_binary("t1", &0).unwrap(); - println!("op_2 => c1: {:#?}", cb_2_c1); assert_eq!( - cb_2_c1, - Some(ConstantBinary::Scope { + range_2_c1, + Range::Scope { min: Bound::Excluded(Arc::new(DataValue::Int32(Some(2)))), max: Bound::Unbounded - }) + } ); - - let cb_2_c2 = op_2.predicate.convert_binary("t1", &1).unwrap(); - println!("op_2 => c2: {:#?}", cb_2_c2); assert_eq!( - cb_1_c1, - Some(ConstantBinary::Scope { + range_2_c2, + Range::Scope { min: Bound::Unbounded, max: Bound::Excluded(Arc::new(DataValue::Int32(Some(-2)))) - }) + } ); - - let cb_3_c1 = op_3.predicate.convert_binary("t1", &0).unwrap(); - println!("op_3 => c1: {:#?}", cb_3_c1); assert_eq!( - cb_3_c1, - Some(ConstantBinary::Scope { + range_3_c1, + Range::Scope { min: Bound::Unbounded, max: Bound::Excluded(Arc::new(DataValue::Int32(Some(-1)))) - }) + } ); - - let cb_3_c2 = op_3.predicate.convert_binary("t1", &1).unwrap(); - println!("op_3 => c2: {:#?}", cb_3_c2); assert_eq!( - cb_3_c2, - Some(ConstantBinary::Scope { + range_3_c2, + Range::Scope { min: Bound::Excluded(Arc::new(DataValue::Int32(Some(0)))), max: Bound::Unbounded - }) + } ); - - let cb_4_c1 = op_4.predicate.convert_binary("t1", &0).unwrap(); - println!("op_4 => c1: {:#?}", cb_4_c1); assert_eq!( - cb_4_c1, - Some(ConstantBinary::Scope { + range_4_c1, + Range::Scope { min: Bound::Excluded(Arc::new(DataValue::Int32(Some(0)))), max: Bound::Unbounded - }) + } ); - - let cb_4_c2 = op_4.predicate.convert_binary("t1", &1).unwrap(); - println!("op_4 => c2: {:#?}", cb_4_c2); assert_eq!( - cb_4_c2, - Some(ConstantBinary::Scope { + range_4_c2, + Range::Scope { min: Bound::Unbounded, max: Bound::Excluded(Arc::new(DataValue::Int32(Some(-1)))) - }) + } ); Ok(()) @@ -426,14 +409,10 @@ mod test { #[tokio::test] async fn test_simplify_filter_multiple_column_in_or() -> Result<(), DatabaseError> { - // c1 + 1 < -1 => c1 < -2 + // c1 > c2 or c1 > 1 let plan_1 = select_sql_run("select * from t1 where c1 > c2 or c1 > 1").await?; - let op_1 = plan_filter(plan_1, "c1 > c2 or c1 > 1")?.unwrap(); - - let cb_1_c1 = op_1.predicate.convert_binary("t1", &0).unwrap(); - println!("op_1 => c1: {:#?}", cb_1_c1); - assert_eq!(cb_1_c1, None); + assert_eq!(plan_filter(&plan_1, &0)?, None); Ok(()) } @@ -443,64 +422,12 @@ mod test { { let plan_1 = select_sql_run("select * from t1 where c1 = 4 and c1 > c2 or c1 > 1").await?; - let op_1 = plan_filter(plan_1, "c1 = 4 and c2 > c1 or c1 > 1")?.unwrap(); - - let cb_1_c1 = op_1.predicate.convert_binary("t1", &0).unwrap(); - println!("op_1 => c1: {:#?}", cb_1_c1); - assert_eq!( - cb_1_c1, - Some(ConstantBinary::Or(vec![ - ConstantBinary::Eq(Arc::new(DataValue::Int32(Some(4)))), - ConstantBinary::Scope { - min: Bound::Excluded(Arc::new(DataValue::Int32(Some(1)))), - max: Bound::Unbounded - } - ])) - ); - - Ok(()) - } - - #[tokio::test] - async fn test_simplify_filter_and_or_mixed() -> Result<(), DatabaseError> { - let plan_1 = select_sql_run( - "select * from t1 where c1 = 5 or (c1 > 5 and (c1 > 6 or c1 < 8) and c1 < 12)", - ) - .await?; - - let op_1 = plan_filter( - plan_1, - "c1 = 5 or (c1 > 5 and (c1 > 6 or c1 < 8) and c1 < 12)", - )? - .unwrap(); - - let cb_1_c1 = op_1.predicate.convert_binary("t1", &0).unwrap(); - println!("op_1 => c1: {:#?}", cb_1_c1); assert_eq!( - cb_1_c1, - Some(ConstantBinary::Or(vec![ - ConstantBinary::Eq(Arc::new(DataValue::Int32(Some(5)))), - ConstantBinary::And(vec![ - ConstantBinary::Scope { - min: Bound::Excluded(Arc::new(DataValue::Int32(Some(5)))), - max: Bound::Unbounded, - }, - ConstantBinary::Or(vec![ - ConstantBinary::Scope { - min: Bound::Excluded(Arc::new(DataValue::Int32(Some(6)))), - max: Bound::Unbounded, - }, - ConstantBinary::Scope { - min: Bound::Unbounded, - max: Bound::Excluded(Arc::new(DataValue::Int32(Some(8)))), - } - ]), - ConstantBinary::Scope { - min: Bound::Unbounded, - max: Bound::Excluded(Arc::new(DataValue::Int32(Some(12)))), - } - ]) - ])) + plan_filter(&plan_1, &0)?, + Some(Range::Scope { + min: Bound::Excluded(Arc::new(DataValue::Int32(Some(1)))), + max: Bound::Unbounded, + }) ); Ok(()) @@ -510,11 +437,10 @@ mod test { async fn test_simplify_filter_column_is_null() -> Result<(), DatabaseError> { let plan_1 = select_sql_run("select * from t1 where c1 is null").await?; - let op_1 = plan_filter(plan_1, "c1 is null")?.unwrap(); - - let cb_1_c1 = op_1.predicate.convert_binary("t1", &0).unwrap(); - println!("op_1 => c1: {:#?}", cb_1_c1); - assert_eq!(cb_1_c1, Some(ConstantBinary::Eq(Arc::new(DataValue::Null)))); + assert_eq!( + plan_filter(&plan_1, &0)?, + Some(Range::Eq(Arc::new(DataValue::Null))) + ); Ok(()) } @@ -523,14 +449,7 @@ mod test { async fn test_simplify_filter_column_is_not_null() -> Result<(), DatabaseError> { let plan_1 = select_sql_run("select * from t1 where c1 is not null").await?; - let op_1 = plan_filter(plan_1, "c1 is not null")?.unwrap(); - - let cb_1_c1 = op_1.predicate.convert_binary("t1", &0).unwrap(); - println!("op_1 => c1: {:#?}", cb_1_c1); - assert_eq!( - cb_1_c1, - Some(ConstantBinary::NotEq(Arc::new(DataValue::Null))) - ); + assert_eq!(plan_filter(&plan_1, &0)?, None); Ok(()) } @@ -539,16 +458,12 @@ mod test { async fn test_simplify_filter_column_in() -> Result<(), DatabaseError> { let plan_1 = select_sql_run("select * from t1 where c1 in (1, 2, 3)").await?; - let op_1 = plan_filter(plan_1, "c1 in (1, 2, 3)")?.unwrap(); - - let cb_1_c1 = op_1.predicate.convert_binary("t1", &0).unwrap(); - println!("op_1 => c1: {:#?}", cb_1_c1); assert_eq!( - cb_1_c1, - Some(ConstantBinary::Or(vec![ - ConstantBinary::Eq(Arc::new(DataValue::Int32(Some(2)))), - ConstantBinary::Eq(Arc::new(DataValue::Int32(Some(1)))), - ConstantBinary::Eq(Arc::new(DataValue::Int32(Some(3)))), + plan_filter(&plan_1, &0)?, + Some(Range::SortedRanges(vec![ + Range::Eq(Arc::new(DataValue::Int32(Some(1)))), + Range::Eq(Arc::new(DataValue::Int32(Some(2)))), + Range::Eq(Arc::new(DataValue::Int32(Some(3)))), ])) ); @@ -559,18 +474,7 @@ mod test { async fn test_simplify_filter_column_not_in() -> Result<(), DatabaseError> { let plan_1 = select_sql_run("select * from t1 where c1 not in (1, 2, 3)").await?; - let op_1 = plan_filter(plan_1, "c1 not in (1, 2, 3)")?.unwrap(); - - let cb_1_c1 = op_1.predicate.convert_binary("t1", &0).unwrap(); - println!("op_1 => c1: {:#?}", cb_1_c1); - assert_eq!( - cb_1_c1, - Some(ConstantBinary::And(vec![ - ConstantBinary::NotEq(Arc::new(DataValue::Int32(Some(2)))), - ConstantBinary::NotEq(Arc::new(DataValue::Int32(Some(1)))), - ConstantBinary::NotEq(Arc::new(DataValue::Int32(Some(3)))), - ])) - ); + assert_eq!(plan_filter(&plan_1, &0)?, None); Ok(()) } diff --git a/src/planner/operator/scan.rs b/src/planner/operator/scan.rs index d731b013..8eb40b61 100644 --- a/src/planner/operator/scan.rs +++ b/src/planner/operator/scan.rs @@ -41,7 +41,7 @@ impl ScanOperator { .iter() .map(|meta| IndexInfo { meta: meta.clone(), - ranges: None, + range: None, }) .collect_vec(); diff --git a/src/storage/kip.rs b/src/storage/kip.rs index 6ddcc0c0..88060874 100644 --- a/src/storage/kip.rs +++ b/src/storage/kip.rs @@ -1,6 +1,6 @@ use crate::catalog::{ColumnCatalog, ColumnRef, TableCatalog, TableMeta, TableName}; use crate::errors::DatabaseError; -use crate::expression::simplify::ConstantBinary; +use crate::expression::range_detacher::Range; use crate::optimizer::core::column_meta::{ColumnMeta, ColumnMetaLoader}; use crate::storage::table_codec::TableCodec; use crate::storage::{Bounds, IndexIter, Iter, Storage, Transaction}; @@ -105,7 +105,7 @@ impl Transaction for KipTransaction { (offset_option, limit_option): Bounds, columns: Vec<(usize, ColumnRef)>, index_meta: IndexMetaRef, - ranges: Vec, + ranges: Vec, ) -> Result, DatabaseError> { assert!(columns.is_sorted_by_key(|(i, _)| i)); assert!(columns.iter().map(|(i, _)| i).all_unique()); @@ -516,7 +516,7 @@ mod test { use crate::catalog::{ColumnCatalog, ColumnDesc}; use crate::db::DataBaseBuilder; use crate::errors::DatabaseError; - use crate::expression::simplify::ConstantBinary; + use crate::expression::range_detacher::Range; use crate::storage::kip::KipStorage; use crate::storage::{IndexIter, Iter, Storage, Transaction}; use crate::types::index::IndexMeta; @@ -634,8 +634,8 @@ mod test { }), table: &table, ranges: VecDeque::from(vec![ - ConstantBinary::Eq(Arc::new(DataValue::Int32(Some(0)))), - ConstantBinary::Scope { + Range::Eq(Arc::new(DataValue::Int32(Some(0)))), + Range::Scope { min: Bound::Included(Arc::new(DataValue::Int32(Some(2)))), max: Bound::Included(Arc::new(DataValue::Int32(Some(4)))), }, @@ -679,7 +679,7 @@ mod test { (Some(0), Some(1)), columns, table.indexes[0].clone(), - vec![ConstantBinary::Scope { + vec![Range::Scope { min: Bound::Excluded(Arc::new(DataValue::Int32(Some(0)))), max: Bound::Unbounded, }], diff --git a/src/storage/mod.rs b/src/storage/mod.rs index 5c311429..bd34bf60 100644 --- a/src/storage/mod.rs +++ b/src/storage/mod.rs @@ -3,7 +3,7 @@ mod table_codec; use crate::catalog::{ColumnCatalog, ColumnRef, TableCatalog, TableMeta, TableName}; use crate::errors::DatabaseError; -use crate::expression::simplify::ConstantBinary; +use crate::expression::range_detacher::Range; use crate::optimizer::core::column_meta::ColumnMetaLoader; use crate::storage::table_codec::TableCodec; use crate::types::index::{Index, IndexMetaRef}; @@ -46,7 +46,7 @@ pub trait Transaction: Sync + Send + 'static { bounds: Bounds, columns: Vec<(usize, ColumnRef)>, index_meta: IndexMetaRef, - ranges: Vec, + ranges: Vec, ) -> Result, DatabaseError>; fn add_index( @@ -117,7 +117,7 @@ pub struct IndexIter<'a> { // for buffering data index_values: VecDeque, - ranges: VecDeque, + ranges: VecDeque, scope_iter: Option>, } @@ -224,7 +224,7 @@ impl Iter for IndexIter<'_> { // 4. When `scope_iter` and `index_values` do not have a value, use the next expression to iterate if let Some(binary) = self.ranges.pop_front() { match binary { - ConstantBinary::Scope { min, max } => { + Range::Scope { min, max } => { let table_name = &self.table.name; let index_meta = &self.index_meta; @@ -258,7 +258,7 @@ impl Iter for IndexIter<'_> { )?; self.scope_iter = Some(iter); } - ConstantBinary::Eq(val) => { + Range::Eq(val) => { let key = self.val_to_key(val)?; if let Some(bytes) = self.tx.get(&key)? { let index = if self.index_meta.is_unique { diff --git a/src/types/index.rs b/src/types/index.rs index ef61d89e..5adc85ee 100644 --- a/src/types/index.rs +++ b/src/types/index.rs @@ -1,8 +1,7 @@ use crate::catalog::TableName; -use crate::expression::simplify::ConstantBinary; +use crate::expression::range_detacher::Range; use crate::types::value::ValueRef; use crate::types::{ColumnId, LogicalType}; -use itertools::Itertools; use serde::{Deserialize, Serialize}; use std::fmt; use std::fmt::Formatter; @@ -14,7 +13,7 @@ pub type IndexMetaRef = Arc; #[derive(Debug, Clone, Eq, PartialEq, Hash)] pub struct IndexInfo { pub(crate) meta: IndexMetaRef, - pub(crate) ranges: Option>, + pub(crate) range: Option, } #[derive(Debug, Clone, Serialize, Deserialize, Eq, PartialEq, Hash)] @@ -44,16 +43,8 @@ impl fmt::Display for IndexInfo { write!(f, "{}", self.meta)?; write!(f, " => ")?; - if let Some(binaries) = &self.ranges { - if binaries.is_empty() { - write!(f, "DUMMY")?; - return Ok(()); - } - let binaries = binaries - .iter() - .map(|binary| format!("{}", binary)) - .join(", "); - write!(f, "{}", binaries)?; + if let Some(range) = &self.range { + write!(f, "{}", range)?; } else { write!(f, "EMPTY")?; } diff --git a/tests/slt/where_by_index.slt b/tests/slt/where_by_index.slt index 3afa8160..d144532f 100644 --- a/tests/slt/where_by_index.slt +++ b/tests/slt/where_by_index.slt @@ -123,7 +123,7 @@ select * from t1 where (id >= 0 or id <= 3) and (id >= 9 or id <= 12) limit 10; 27 28 29 query IIT -select * from t1 where id = 5 or (id > 5 and (id > 6 or id < 8) and id < 12); +select * from t1 where id = 5 or (id > 5 and (id > 6 or id < 8) and id < 12); ---- 6 7 8 9 10 11