From 6bfa14ae1d562d762d10bc0538ec22d69fefbaa5 Mon Sep 17 00:00:00 2001 From: Kould <2435992353@qq.com> Date: Tue, 12 Mar 2024 00:18:40 +0800 Subject: [PATCH] feat: support `CreateIndex` and `IndexType::Normal` & `IndexType::Composite` (#154) * feat: implement `DataValue::Tuple` `>,>=,<,<=` and supports combining an arbitrary range with a vec of preceding eq ranges * feat: support `CreateIndex` and `IndexType::Normal` & `IndexType::Composite` * test: add `create_index.slt` for `CreateIndex` and add more case for `IndexType::Composite` on `where_by_index.slt` * test: add null case for `IndexType::Composite` on `where_by_index.slt` * fix: `find_statistics_meta` wrong parameters * fix: index out of bounds for `delete.rs` * fix: display for range's unbounded --- Cargo.toml | 2 +- README.md | 2 +- src/binder/alter_table.rs | 124 +++--- src/binder/analyze.rs | 8 +- src/binder/create_index.rs | 60 +++ src/binder/create_table.rs | 5 +- src/binder/mod.rs | 9 + src/catalog/column.rs | 4 - src/catalog/table.rs | 20 +- src/errors.rs | 2 + src/execution/volcano/ddl/add_column.rs | 10 +- src/execution/volcano/ddl/create_index.rs | 81 ++++ src/execution/volcano/ddl/mod.rs | 1 + src/execution/volcano/dml/analyze.rs | 64 ++-- src/execution/volcano/dml/delete.rs | 109 +++--- src/execution/volcano/dml/insert.rs | 44 +-- src/execution/volcano/dml/update.rs | 38 +- src/execution/volcano/dql/projection.rs | 22 +- src/execution/volcano/mod.rs | 6 + src/expression/range_detacher.rs | 358 +++++++++++++++++- src/expression/value_compute.rs | 34 ++ src/optimizer/core/histogram.rs | 253 +++++++------ src/optimizer/core/memo.rs | 9 +- src/optimizer/core/mod.rs | 2 +- src/optimizer/core/rule.rs | 4 +- .../{column_meta.rs => statistics_meta.rs} | 97 ++--- src/optimizer/heuristic/optimizer.rs | 4 +- .../rule/implementation/ddl/add_column.rs | 2 +- .../rule/implementation/ddl/create_table.rs | 2 +- .../rule/implementation/ddl/drop_column.rs | 2 +- .../rule/implementation/ddl/drop_table.rs | 2 +- .../rule/implementation/ddl/truncate.rs | 2 +- .../rule/implementation/dml/analyze.rs | 2 +- .../rule/implementation/dml/copy_from_file.rs | 2 +- .../rule/implementation/dml/copy_to_file.rs | 2 +- .../rule/implementation/dml/delete.rs | 2 +- .../rule/implementation/dml/insert.rs | 2 +- .../rule/implementation/dml/update.rs | 2 +- .../rule/implementation/dql/aggregate.rs | 2 +- .../rule/implementation/dql/dummy.rs | 2 +- .../rule/implementation/dql/filter.rs | 2 +- src/optimizer/rule/implementation/dql/join.rs | 2 +- .../rule/implementation/dql/limit.rs | 2 +- .../rule/implementation/dql/projection.rs | 2 +- src/optimizer/rule/implementation/dql/scan.rs | 42 +- src/optimizer/rule/implementation/dql/sort.rs | 2 +- .../rule/implementation/dql/values.rs | 2 +- src/optimizer/rule/implementation/marcos.rs | 2 +- src/optimizer/rule/implementation/mod.rs | 4 +- .../rule/normalization/column_pruning.rs | 1 + .../rule/normalization/expression_remapper.rs | 1 + .../rule/normalization/pushdown_predicates.rs | 37 +- src/planner/mod.rs | 7 +- src/planner/operator/analyze.rs | 5 +- src/planner/operator/create_index.rs | 32 ++ src/planner/operator/mod.rs | 8 +- src/storage/kip.rs | 89 +++-- src/storage/mod.rs | 142 +++++-- src/storage/table_codec.rs | 85 +++-- src/types/index.rs | 52 ++- src/types/value.rs | 5 + tests/slt/create_index.slt | 28 ++ tests/slt/dummy.slt | 20 + tests/slt/where_by_index.slt | 65 ++++ 64 files changed, 1450 insertions(+), 585 deletions(-) create mode 100644 src/binder/create_index.rs create mode 100644 src/execution/volcano/ddl/create_index.rs rename src/optimizer/core/{column_meta.rs => statistics_meta.rs} (62%) create mode 100644 src/planner/operator/create_index.rs create mode 100644 tests/slt/create_index.slt diff --git a/Cargo.toml b/Cargo.toml index 74230488..e6794e08 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -2,7 +2,7 @@ [package] name = "fnck_sql" -version = "0.0.1-alpha.11.fix3" +version = "0.0.1-alpha.12" edition = "2021" authors = ["Kould ", "Xwg "] description = "Fast Insert OLTP SQL DBMS" diff --git a/README.md b/README.md index a4804689..20608e6a 100755 --- a/README.md +++ b/README.md @@ -128,7 +128,7 @@ let fnck_sql = DataBaseBuilder::path("./data") - Rollback (Server only) - Create - [x] Table - - [ ] Index + - [x] Index: Unique\Normal\Composite - Drop - [x] Table - [ ] Index diff --git a/src/binder/alter_table.rs b/src/binder/alter_table.rs index 27edd6f7..c40dc122 100644 --- a/src/binder/alter_table.rs +++ b/src/binder/alter_table.rs @@ -19,73 +19,69 @@ impl<'a, T: Transaction> Binder<'a, T> { operation: &AlterTableOperation, ) -> Result { let table_name: Arc = Arc::new(lower_case_name(name)?); + let table = self + .context + .table(table_name.clone()) + .ok_or(DatabaseError::TableNotFound)?; + let plan = match operation { + AlterTableOperation::AddColumn { + column_keyword: _, + if_not_exists, + column_def, + } => { + let plan = ScanOperator::build(table_name.clone(), table); + let column = self.bind_column(column_def)?; - if let Some(table) = self.context.table(table_name.clone()) { - let plan = match operation { - AlterTableOperation::AddColumn { - column_keyword: _, - if_not_exists, - column_def, - } => { - let plan = ScanOperator::build(table_name.clone(), table); - let column = self.bind_column(column_def)?; - - if !is_valid_identifier(column.name()) { - return Err(DatabaseError::InvalidColumn( - "illegal column naming".to_string(), - )); - } - LogicalPlan::new( - Operator::AddColumn(AddColumnOperator { - table_name, - if_not_exists: *if_not_exists, - column, - }), - vec![plan], - ) + if !is_valid_identifier(column.name()) { + return Err(DatabaseError::InvalidColumn( + "illegal column naming".to_string(), + )); } - AlterTableOperation::DropColumn { - column_name, - if_exists, - .. - } => { - let plan = ScanOperator::build(table_name.clone(), table); - let column_name = column_name.value.clone(); + LogicalPlan::new( + Operator::AddColumn(AddColumnOperator { + table_name, + if_not_exists: *if_not_exists, + column, + }), + vec![plan], + ) + } + AlterTableOperation::DropColumn { + column_name, + if_exists, + .. + } => { + let plan = ScanOperator::build(table_name.clone(), table); + let column_name = column_name.value.clone(); - LogicalPlan::new( - Operator::DropColumn(DropColumnOperator { - table_name, - if_exists: *if_exists, - column_name, - }), - vec![plan], - ) - } - AlterTableOperation::DropPrimaryKey => todo!(), - AlterTableOperation::RenameColumn { - old_column_name: _, - new_column_name: _, - } => todo!(), - AlterTableOperation::RenameTable { table_name: _ } => todo!(), - AlterTableOperation::ChangeColumn { - old_name: _, - new_name: _, - data_type: _, - options: _, - } => todo!(), - AlterTableOperation::AlterColumn { - column_name: _, - op: _, - } => todo!(), - _ => todo!(), - }; + LogicalPlan::new( + Operator::DropColumn(DropColumnOperator { + table_name, + if_exists: *if_exists, + column_name, + }), + vec![plan], + ) + } + AlterTableOperation::DropPrimaryKey => todo!(), + AlterTableOperation::RenameColumn { + old_column_name: _, + new_column_name: _, + } => todo!(), + AlterTableOperation::RenameTable { table_name: _ } => todo!(), + AlterTableOperation::ChangeColumn { + old_name: _, + new_name: _, + data_type: _, + options: _, + } => todo!(), + AlterTableOperation::AlterColumn { + column_name: _, + op: _, + } => todo!(), + _ => todo!(), + }; - Ok(plan) - } else { - Err(DatabaseError::InvalidTable(format!( - "not found table {}", - table_name - ))) - } + Ok(plan) } } diff --git a/src/binder/analyze.rs b/src/binder/analyze.rs index 5bf5e4ed..4d8f0a02 100644 --- a/src/binder/analyze.rs +++ b/src/binder/analyze.rs @@ -5,7 +5,6 @@ use crate::planner::operator::scan::ScanOperator; use crate::planner::operator::Operator; use crate::planner::LogicalPlan; use crate::storage::Transaction; -use itertools::Itertools; use sqlparser::ast::ObjectName; use std::sync::Arc; @@ -14,16 +13,13 @@ impl<'a, T: Transaction> Binder<'a, T> { let table_name = Arc::new(lower_case_name(name)?); let table_catalog = self.context.table_and_bind(table_name.clone(), None)?; - let columns = table_catalog - .columns() - .filter_map(|column| column.desc.is_index().then_some(column.clone())) - .collect_vec(); + let index_metas = table_catalog.indexes.clone(); let scan_op = ScanOperator::build(table_name.clone(), table_catalog); Ok(LogicalPlan::new( Operator::Analyze(AnalyzeOperator { table_name, - columns, + index_metas, }), vec![scan_op], )) diff --git a/src/binder/create_index.rs b/src/binder/create_index.rs new file mode 100644 index 00000000..ffb7fe4e --- /dev/null +++ b/src/binder/create_index.rs @@ -0,0 +1,60 @@ +use crate::binder::{lower_case_name, Binder}; +use crate::errors::DatabaseError; +use crate::expression::ScalarExpression; +use crate::planner::operator::create_index::CreateIndexOperator; +use crate::planner::operator::scan::ScanOperator; +use crate::planner::operator::Operator; +use crate::planner::LogicalPlan; +use crate::storage::Transaction; +use crate::types::index::IndexType; +use sqlparser::ast::{ObjectName, OrderByExpr}; +use std::sync::Arc; + +impl<'a, T: Transaction> Binder<'a, T> { + pub(crate) fn bind_create_index( + &mut self, + table_name: &ObjectName, + name: &ObjectName, + exprs: &[OrderByExpr], + if_not_exists: bool, + is_unique: bool, + ) -> Result { + let table_name = Arc::new(lower_case_name(table_name)?); + let index_name = lower_case_name(name)?; + let ty = if is_unique { + IndexType::Unique + } else if exprs.len() == 1 { + IndexType::Normal + } else { + IndexType::Composite + }; + + let table = self.context.table_and_bind(table_name.clone(), None)?; + let plan = ScanOperator::build(table_name.clone(), table); + let mut columns = Vec::with_capacity(exprs.len()); + + for expr in exprs { + // TODO: Expression Index + match self.bind_expr(&expr.expr)? { + ScalarExpression::ColumnRef(column) => columns.push(column), + expr => { + return Err(DatabaseError::UnsupportedStmt(format!( + "create index on {}", + expr + ))) + } + } + } + + Ok(LogicalPlan::new( + Operator::CreateIndex(CreateIndexOperator { + table_name, + columns, + index_name, + if_not_exists, + ty, + }), + vec![plan], + )) + } +} diff --git a/src/binder/create_table.rs b/src/binder/create_table.rs index 2208d0ba..c5789903 100644 --- a/src/binder/create_table.rs +++ b/src/binder/create_table.rs @@ -80,15 +80,14 @@ impl<'a, T: Transaction> Binder<'a, T> { )); } - let plan = LogicalPlan::new( + Ok(LogicalPlan::new( Operator::CreateTable(CreateTableOperator { table_name, columns, if_not_exists, }), vec![], - ); - Ok(plan) + )) } pub fn bind_column(&mut self, column_def: &ColumnDef) -> Result { diff --git a/src/binder/mod.rs b/src/binder/mod.rs index 1b777859..226bce7e 100644 --- a/src/binder/mod.rs +++ b/src/binder/mod.rs @@ -2,6 +2,7 @@ pub mod aggregate; mod alter_table; mod analyze; pub mod copy; +mod create_index; mod create_table; mod delete; mod describe; @@ -248,6 +249,14 @@ impl<'a, T: Transaction> Binder<'a, T> { describe_alias: true, table_name, } => self.bind_describe(table_name)?, + Statement::CreateIndex { + table_name, + name, + columns, + if_not_exists, + unique, + .. + } => self.bind_create_index(table_name, name, columns, *if_not_exists, *unique)?, _ => return Err(DatabaseError::UnsupportedStmt(stmt.to_string())), }; Ok(plan) diff --git a/src/catalog/column.rs b/src/catalog/column.rs index eb69ecee..8533180d 100644 --- a/src/catalog/column.rs +++ b/src/catalog/column.rs @@ -115,8 +115,4 @@ impl ColumnDesc { default, } } - - pub(crate) fn is_index(&self) -> bool { - self.is_unique || self.is_primary - } } diff --git a/src/catalog/table.rs b/src/catalog/table.rs index ead2e16b..50da4067 100644 --- a/src/catalog/table.rs +++ b/src/catalog/table.rs @@ -6,7 +6,7 @@ use std::{slice, vec}; use crate::catalog::{ColumnCatalog, ColumnRef}; use crate::errors::DatabaseError; -use crate::types::index::{IndexMeta, IndexMetaRef}; +use crate::types::index::{IndexMeta, IndexMetaRef, IndexType}; use crate::types::tuple::SchemaRef; use crate::types::{ColumnId, LogicalType}; @@ -33,7 +33,7 @@ impl TableCatalog { pub(crate) fn get_unique_index(&self, col_id: &ColumnId) -> Option<&IndexMetaRef> { self.indexes .iter() - .find(|meta| meta.is_unique && &meta.column_ids[0] == col_id) + .find(|meta| matches!(meta.ty, IndexType::Unique) && &meta.column_ids[0] == col_id) } #[allow(dead_code)] @@ -60,6 +60,10 @@ impl TableCatalog { self.schema_ref.iter() } + pub(crate) fn indexes(&self) -> slice::Iter<'_, IndexMetaRef> { + self.indexes.iter() + } + pub(crate) fn schema_ref(&self) -> &SchemaRef { &self.schema_ref } @@ -113,9 +117,14 @@ impl TableCatalog { &mut self, name: String, column_ids: Vec, - is_unique: bool, - is_primary: bool, + ty: IndexType, ) -> Result<&IndexMeta, DatabaseError> { + for index in self.indexes.iter() { + if index.name == name { + return Err(DatabaseError::DuplicateIndex(name)); + } + } + let index_id = self.indexes.last().map(|index| index.id + 1).unwrap_or(0); let pk_ty = *self.primary_key()?.1.datatype(); let index = IndexMeta { @@ -124,8 +133,7 @@ impl TableCatalog { table_name: self.name.clone(), pk_ty, name, - is_unique, - is_primary, + ty, }; self.indexes.push(Arc::new(index)); Ok(self.indexes.last().unwrap()) diff --git a/src/errors.rs b/src/errors.rs index 19966f73..4a1dc556 100644 --- a/src/errors.rs +++ b/src/errors.rs @@ -101,6 +101,8 @@ pub enum DatabaseError { TableNotFound, #[error("the some column: {0} already exists")] DuplicateColumn(String), + #[error("the some index: {0} already exists")] + DuplicateIndex(String), #[error("add column must be nullable or specify a default value")] NeedNullAbleOrDefault, #[error("the table already exists")] diff --git a/src/execution/volcano/ddl/add_column.rs b/src/execution/volcano/ddl/add_column.rs index a2fa63f4..67afa14e 100644 --- a/src/execution/volcano/ddl/add_column.rs +++ b/src/execution/volcano/ddl/add_column.rs @@ -4,10 +4,11 @@ use crate::types::tuple::Tuple; use crate::types::tuple_builder::TupleBuilder; use crate::types::value::DataValue; use futures_async_stream::try_stream; +use std::slice; use std::sync::Arc; use crate::planner::LogicalPlan; -use crate::types::index::Index; +use crate::types::index::{Index, IndexType}; use crate::{planner::operator::alter_table::add_column::AddColumnOperator, storage::Transaction}; pub struct AddColumn { @@ -66,11 +67,8 @@ impl AddColumn { .cloned(), ) { for (tuple_id, value) in unique_values { - let index = Index { - id: unique_meta.id, - column_values: vec![value], - }; - transaction.add_index(table_name, index, &tuple_id, true)?; + let index = Index::new(unique_meta.id, slice::from_ref(&value), IndexType::Unique); + transaction.add_index(table_name, index, &tuple_id)?; } } diff --git a/src/execution/volcano/ddl/create_index.rs b/src/execution/volcano/ddl/create_index.rs new file mode 100644 index 00000000..c5395121 --- /dev/null +++ b/src/execution/volcano/ddl/create_index.rs @@ -0,0 +1,81 @@ +use crate::execution::volcano::dql::projection::Projection; +use crate::execution::volcano::DatabaseError; +use crate::execution::volcano::{build_read, BoxedExecutor, WriteExecutor}; +use crate::expression::ScalarExpression; +use crate::planner::operator::create_index::CreateIndexOperator; +use crate::planner::LogicalPlan; +use crate::storage::Transaction; +use crate::types::index::Index; +use crate::types::tuple::Tuple; +use crate::types::ColumnId; +use futures_async_stream::try_stream; + +pub struct CreateIndex { + op: CreateIndexOperator, + input: LogicalPlan, +} + +impl From<(CreateIndexOperator, LogicalPlan)> for CreateIndex { + fn from((op, input): (CreateIndexOperator, LogicalPlan)) -> Self { + Self { op, input } + } +} + +impl WriteExecutor for CreateIndex { + fn execute_mut(self, transaction: &mut T) -> BoxedExecutor { + self._execute(transaction) + } +} + +impl CreateIndex { + #[try_stream(boxed, ok = Tuple, error = DatabaseError)] + async fn _execute(mut self, transaction: &mut T) { + let CreateIndexOperator { + table_name, + index_name, + columns, + if_not_exists, + ty, + } = self.op; + let (column_ids, column_exprs): (Vec, Vec) = columns + .into_iter() + .filter_map(|column| { + column + .id() + .map(|id| (id, ScalarExpression::ColumnRef(column))) + }) + .unzip(); + let schema = self.input.output_schema().clone(); + let index_id = match transaction.add_index_meta(&table_name, index_name, column_ids, ty) { + Ok(index_id) => index_id, + Err(DatabaseError::DuplicateIndex(index_name)) => { + return if if_not_exists { + Ok(()) + } else { + Err(DatabaseError::DuplicateIndex(index_name)) + } + } + err => err?, + }; + let mut index_values = Vec::new(); + + #[for_await] + for tuple in build_read(self.input, transaction) { + let mut tuple: Tuple = tuple?; + + let tuple_id = if let Some(tuple_id) = tuple.id.take() { + tuple_id + } else { + continue; + }; + index_values.push(( + tuple_id, + Projection::projection(&tuple, &column_exprs, &schema)?, + )); + } + for (tuple_id, values) in index_values { + let index = Index::new(index_id, &values, ty); + transaction.add_index(table_name.as_str(), index, &tuple_id)?; + } + } +} diff --git a/src/execution/volcano/ddl/mod.rs b/src/execution/volcano/ddl/mod.rs index 9faa1f6e..384bd2d7 100644 --- a/src/execution/volcano/ddl/mod.rs +++ b/src/execution/volcano/ddl/mod.rs @@ -1,4 +1,5 @@ pub mod add_column; +pub(crate) mod create_index; pub(crate) mod create_table; pub mod drop_column; pub(crate) mod drop_table; diff --git a/src/execution/volcano/dml/analyze.rs b/src/execution/volcano/dml/analyze.rs index ceb56ecb..1561fdd2 100644 --- a/src/execution/volcano/dml/analyze.rs +++ b/src/execution/volcano/dml/analyze.rs @@ -1,28 +1,29 @@ -use crate::catalog::{ColumnRef, TableMeta, TableName}; +use crate::catalog::{TableMeta, TableName}; use crate::errors::DatabaseError; +use crate::execution::volcano::dql::projection::Projection; use crate::execution::volcano::{build_read, BoxedExecutor, WriteExecutor}; -use crate::optimizer::core::column_meta::ColumnMeta; use crate::optimizer::core::histogram::HistogramBuilder; +use crate::optimizer::core::statistics_meta::StatisticsMeta; use crate::planner::operator::analyze::AnalyzeOperator; use crate::planner::LogicalPlan; use crate::storage::Transaction; +use crate::types::index::IndexMetaRef; use crate::types::tuple::Tuple; use crate::types::value::DataValue; use futures_async_stream::try_stream; use itertools::Itertools; -use std::collections::HashMap; use std::fmt::Formatter; use std::sync::Arc; use std::time::{SystemTime, UNIX_EPOCH}; use std::{fmt, fs}; const DEFAULT_NUM_OF_BUCKETS: usize = 100; -const DEFAULT_COLUMN_METAS_PATH: &str = "fnck_sql_column_metas"; +const DEFAULT_STATISTICS_META_PATH: &str = "fnck_sql_statistics_metas"; pub struct Analyze { table_name: TableName, input: LogicalPlan, - columns: Vec, + index_metas: Vec, } impl From<(AnalyzeOperator, LogicalPlan)> for Analyze { @@ -30,7 +31,7 @@ impl From<(AnalyzeOperator, LogicalPlan)> for Analyze { ( AnalyzeOperator { table_name, - columns, + index_metas, }, input, ): (AnalyzeOperator, LogicalPlan), @@ -38,7 +39,7 @@ impl From<(AnalyzeOperator, LogicalPlan)> for Analyze { Analyze { table_name, input, - columns, + index_metas, } } } @@ -55,27 +56,35 @@ impl Analyze { let Analyze { table_name, mut input, - columns, + index_metas, } = self; let schema = input.output_schema().clone(); - let mut builders = HashMap::with_capacity(columns.len()); - - for column in &columns { - builders.insert(column.id(), HistogramBuilder::new(column, None)?); + let mut builders = Vec::with_capacity(index_metas.len()); + let table = transaction + .table(table_name.clone()) + .cloned() + .ok_or(DatabaseError::TableNotFound)?; + + for index in table.indexes() { + builders.push(( + index.id, + index.column_exprs(&table)?, + HistogramBuilder::new(index, None)?, + )); } #[for_await] for tuple in build_read(input, transaction) { - let Tuple { values, .. } = tuple?; + let tuple = tuple?; - for (i, column) in schema.iter().enumerate() { - if !column.desc.is_index() { - continue; - } + for (_, exprs, builder) in builders.iter_mut() { + let values = Projection::projection(&tuple, exprs, &schema)?; - if let Some(builder) = builders.get_mut(&column.id()) { - builder.append(&values[i])? + if values.len() == 1 { + builder.append(&values[0])?; + } else { + builder.append(&Arc::new(DataValue::Tuple(Some(values))))?; } } } @@ -85,19 +94,18 @@ impl Analyze { .as_secs(); let dir_path = dirs::config_dir() .expect("Your system does not have a Config directory!") - .join(DEFAULT_COLUMN_METAS_PATH) + .join(DEFAULT_STATISTICS_META_PATH) .join(table_name.as_str()) .join(ts.to_string()); fs::create_dir_all(&dir_path)?; let mut meta = TableMeta::empty(table_name.clone()); - for (column_id, builder) in builders { - let path = dir_path.join(column_id.unwrap().to_string()); + for (index_id, _, builder) in builders { + let path = dir_path.join(index_id.to_string()); let (histogram, sketch) = builder.build(DEFAULT_NUM_OF_BUCKETS)?; - ColumnMeta::new(histogram, sketch).to_file(&path)?; - + StatisticsMeta::new(histogram, sketch).to_file(&path)?; meta.colum_meta_paths.push(path.to_string_lossy().into()); } transaction.save_table_meta(&meta)?; @@ -114,13 +122,9 @@ impl Analyze { impl fmt::Display for AnalyzeOperator { fn fmt(&self, f: &mut Formatter) -> fmt::Result { - let columns = self - .columns - .iter() - .map(|column| column.name().to_string()) - .join(", "); + let indexes = self.index_metas.iter().map(|index| &index.name).join(", "); - write!(f, "Analyze {} -> [{}]", self.table_name, columns)?; + write!(f, "Analyze {} -> [{}]", self.table_name, indexes)?; Ok(()) } diff --git a/src/execution/volcano/dml/delete.rs b/src/execution/volcano/dml/delete.rs index 275e8b7f..36ae51cc 100644 --- a/src/execution/volcano/dml/delete.rs +++ b/src/execution/volcano/dml/delete.rs @@ -1,13 +1,16 @@ use crate::catalog::TableName; use crate::errors::DatabaseError; +use crate::execution::volcano::dql::projection::Projection; use crate::execution::volcano::{build_read, BoxedExecutor, WriteExecutor}; +use crate::expression::ScalarExpression; use crate::planner::operator::delete::DeleteOperator; use crate::planner::LogicalPlan; use crate::storage::Transaction; -use crate::types::index::Index; +use crate::types::index::{Index, IndexId, IndexType}; use crate::types::tuple::Tuple; +use crate::types::value::ValueRef; use futures_async_stream::try_stream; -use itertools::Itertools; +use std::collections::HashMap; pub struct Delete { table_name: TableName, @@ -26,60 +29,72 @@ impl WriteExecutor for Delete { } } +struct Value { + exprs: Vec, + value_rows: Vec>, + index_ty: IndexType, +} + impl Delete { #[try_stream(boxed, ok = Tuple, error = DatabaseError)] async fn _execute(self, transaction: &mut T) { - let Delete { table_name, input } = self; - let option_index_metas = transaction.table(table_name.clone()).map(|table_catalog| { - table_catalog - .columns() - .enumerate() - .filter_map(|(i, col)| { - col.desc - .is_unique - .then(|| { - col.id().and_then(|col_id| { - table_catalog - .get_unique_index(&col_id) - .map(|index_meta| (i, index_meta.clone())) - }) - }) - .flatten() - }) - .collect_vec() - }); - - if let Some(index_metas) = option_index_metas { - let mut tuple_ids = Vec::new(); - let mut indexes = Vec::new(); + let Delete { + table_name, + mut input, + } = self; + let schema = input.output_schema().clone(); + let table = transaction + .table(table_name.clone()) + .cloned() + .ok_or(DatabaseError::TableNotFound)?; + let mut tuple_ids = Vec::new(); + let mut indexes: HashMap = HashMap::new(); - #[for_await] - for tuple in build_read(input, transaction) { - let tuple: Tuple = tuple?; + #[for_await] + for tuple in build_read(input, transaction) { + let tuple: Tuple = tuple?; - for (i, index_meta) in index_metas.iter() { - let value = &tuple.values[*i]; + for index_meta in table.indexes() { + if let Some(Value { + exprs, value_rows, .. + }) = indexes.get_mut(&index_meta.id) + { + value_rows.push(Projection::projection(&tuple, exprs, &schema)?); + } else { + let exprs = index_meta.column_exprs(&table)?; + let values = Projection::projection(&tuple, &exprs, &schema)?; - if !value.is_null() { - let index = Index { - id: index_meta.id, - column_values: vec![value.clone()], - }; - - indexes.push(index); - } + indexes.insert( + index_meta.id, + Value { + exprs, + value_rows: vec![values], + index_ty: index_meta.ty, + }, + ); } - - if let Some(tuple_id) = tuple.id { - tuple_ids.push(tuple_id); - } - } - for index in indexes { - transaction.del_index(&table_name, &index)?; } - for tuple_id in tuple_ids { - transaction.delete(&table_name, tuple_id)?; + tuple_ids.push(tuple.id.unwrap()); + } + for ( + index_id, + Value { + value_rows, + index_ty, + .. + }, + ) in indexes + { + for (i, values) in value_rows.into_iter().enumerate() { + transaction.del_index( + &table_name, + &Index::new(index_id, &values, index_ty), + Some(&tuple_ids[i]), + )?; } } + for tuple_id in tuple_ids { + transaction.delete(&table_name, tuple_id)?; + } } } diff --git a/src/execution/volcano/dml/insert.rs b/src/execution/volcano/dml/insert.rs index 77b8b05b..3be35465 100644 --- a/src/execution/volcano/dml/insert.rs +++ b/src/execution/volcano/dml/insert.rs @@ -1,5 +1,6 @@ use crate::catalog::TableName; use crate::errors::DatabaseError; +use crate::execution::volcano::dql::projection::Projection; use crate::execution::volcano::{build_read, BoxedExecutor, WriteExecutor}; use crate::planner::operator::insert::InsertOperator; use crate::planner::LogicalPlan; @@ -49,8 +50,7 @@ impl Insert { mut input, is_overwrite, } = self; - let mut unique_values = HashMap::new(); - let mut tuple_values = Vec::new(); + let mut tuples = Vec::new(); let schema = input.output_schema().clone(); let pk_index = schema @@ -79,43 +79,27 @@ impl Insert { .remove(&col.id()) .or_else(|| col.default_value()) .unwrap_or_else(|| Arc::new(DataValue::none(col.datatype()))); - - if col.desc.is_unique && !value.is_null() { - unique_values - .entry(col.id()) - .or_insert_with(Vec::new) - .push((tuple_id.clone(), value.clone())) - } if value.is_null() && !col.nullable { return Err(DatabaseError::NotNull); } values.push(value) } - tuple_values.push((tuple_id, values)); + tuples.push(Tuple { + id: Some(tuple_id), + values, + }); } + for index_meta in table_catalog.indexes() { + let exprs = index_meta.column_exprs(&table_catalog)?; - // Unique Index - for (col_id, values) in unique_values { - if let Some(index_meta) = table_catalog.get_unique_index(&col_id.unwrap()) { - for (tuple_id, value) in values { - let index = Index { - id: index_meta.id, - column_values: vec![value], - }; - - transaction.add_index(&table_name, index, &tuple_id, true)?; - } + for tuple in tuples.iter() { + let values = Projection::projection(tuple, &exprs, &schema)?; + let index = Index::new(index_meta.id, &values, index_meta.ty); + transaction.add_index(&table_name, index, tuple.id.as_ref().unwrap())?; } } - for (tuple_id, values) in tuple_values { - transaction.append( - &table_name, - Tuple { - id: Some(tuple_id), - values, - }, - is_overwrite, - )?; + for tuple in tuples { + transaction.append(&table_name, tuple, is_overwrite)?; } } } diff --git a/src/execution/volcano/dml/update.rs b/src/execution/volcano/dml/update.rs index 87428ad3..3fca6469 100644 --- a/src/execution/volcano/dml/update.rs +++ b/src/execution/volcano/dml/update.rs @@ -1,5 +1,6 @@ use crate::catalog::TableName; use crate::errors::DatabaseError; +use crate::execution::volcano::dql::projection::Projection; use crate::execution::volcano::{build_read, BoxedExecutor, WriteExecutor}; use crate::planner::operator::update::UpdateOperator; use crate::planner::LogicalPlan; @@ -62,7 +63,17 @@ impl Update { tuples.push(tuple); } + let mut index_metas = Vec::new(); + for index_meta in table_catalog.indexes() { + let exprs = index_meta.column_exprs(&table_catalog)?; + for tuple in tuples.iter() { + let values = Projection::projection(tuple, &exprs, &input_schema)?; + let index = Index::new(index_meta.id, &values, index_meta.ty); + transaction.del_index(&table_name, &index, Some(tuple.id.as_ref().unwrap()))?; + } + index_metas.push((index_meta, exprs)); + } for mut tuple in tuples { let mut is_overwrite = true; @@ -74,31 +85,14 @@ impl Update { transaction.delete(&table_name, old_key)?; is_overwrite = false; } - if column.desc.is_unique && value != &tuple.values[i] { - if let Some(index_meta) = - table_catalog.get_unique_index(&column.id().unwrap()) - { - let mut index = Index { - id: index_meta.id, - column_values: vec![tuple.values[i].clone()], - }; - transaction.del_index(&table_name, &index)?; - - if !value.is_null() { - index.column_values[0] = value.clone(); - transaction.add_index( - &table_name, - index, - tuple.id.as_ref().unwrap(), - true, - )?; - } - } - } - tuple.values[i] = value.clone(); } } + for (index_meta, exprs) in index_metas.iter() { + let values = Projection::projection(&tuple, exprs, &input_schema)?; + let index = Index::new(index_meta.id, &values, index_meta.ty); + transaction.add_index(&table_name, index, tuple.id.as_ref().unwrap())?; + } transaction.append(&table_name, tuple, is_overwrite)?; } diff --git a/src/execution/volcano/dql/projection.rs b/src/execution/volcano/dql/projection.rs index 06a858e1..f0b84320 100644 --- a/src/execution/volcano/dql/projection.rs +++ b/src/execution/volcano/dql/projection.rs @@ -1,3 +1,4 @@ +use crate::catalog::ColumnRef; use crate::errors::DatabaseError; use crate::execution::volcano::{build_read, BoxedExecutor, ReadExecutor}; use crate::expression::ScalarExpression; @@ -5,6 +6,7 @@ use crate::planner::operator::project::ProjectOperator; use crate::planner::LogicalPlan; use crate::storage::Transaction; use crate::types::tuple::Tuple; +use crate::types::value::ValueRef; use futures_async_stream::try_stream; pub struct Projection { @@ -25,6 +27,19 @@ impl ReadExecutor for Projection { } impl Projection { + pub fn projection( + tuple: &Tuple, + exprs: &[ScalarExpression], + schmea: &[ColumnRef], + ) -> Result, DatabaseError> { + let mut values = Vec::with_capacity(exprs.len()); + + for expr in exprs.iter() { + values.push(expr.eval(tuple, schmea)?); + } + Ok(values) + } + #[try_stream(boxed, ok = Tuple, error = DatabaseError)] pub async fn _execute(self, transaction: &T) { let Projection { exprs, mut input } = self; @@ -33,13 +48,8 @@ impl Projection { #[for_await] for tuple in build_read(input, transaction) { let mut tuple = tuple?; - let mut values = Vec::with_capacity(exprs.len()); - - for expr in exprs.iter() { - values.push(expr.eval(&tuple, &schema)?); - } - tuple.values = values; + tuple.values = Self::projection(&tuple, &exprs, &schema)?; yield tuple; } } diff --git a/src/execution/volcano/mod.rs b/src/execution/volcano/mod.rs index 6647bc1e..b7b4ed46 100644 --- a/src/execution/volcano/mod.rs +++ b/src/execution/volcano/mod.rs @@ -3,6 +3,7 @@ pub(crate) mod dml; pub(crate) mod dql; use crate::errors::DatabaseError; +use crate::execution::volcano::ddl::create_index::CreateIndex; use crate::execution::volcano::ddl::create_table::CreateTable; use crate::execution::volcano::ddl::drop_column::DropColumn; use crate::execution::volcano::ddl::drop_table::DropTable; @@ -154,6 +155,11 @@ pub fn build_write(plan: LogicalPlan, transaction: &mut T) -> Bo DropColumn::from((op, input)).execute_mut(transaction) } Operator::CreateTable(op) => CreateTable::from(op).execute_mut(transaction), + Operator::CreateIndex(op) => { + let input = childrens.pop().unwrap(); + + CreateIndex::from((op, input)).execute_mut(transaction) + } Operator::DropTable(op) => DropTable::from(op).execute_mut(transaction), Operator::Truncate(op) => Truncate::from(op).execute_mut(transaction), Operator::CopyFromFile(op) => CopyFromFile::from(op).execute_mut(transaction), diff --git a/src/expression/range_detacher.rs b/src/expression/range_detacher.rs index eaf86b78..a57113f4 100644 --- a/src/expression/range_detacher.rs +++ b/src/expression/range_detacher.rs @@ -1,13 +1,14 @@ use crate::catalog::ColumnRef; use crate::errors::DatabaseError; use crate::expression::{BinaryOperator, ScalarExpression}; -use crate::types::value::{ValueRef, NULL_VALUE}; +use crate::types::value::{DataValue, ValueRef, NULL_VALUE}; use crate::types::ColumnId; use itertools::Itertools; use std::cmp::Ordering; use std::collections::Bound; -use std::fmt; use std::fmt::Formatter; +use std::sync::Arc; +use std::{fmt, mem}; /// Used to represent binary relationships between fields and constants /// Tips: The NotEq case is ignored because it makes expression composition very complex @@ -23,6 +24,156 @@ pub enum Range { SortedRanges(Vec), } +struct TreeNode { + value: Option, + children: Vec>, +} + +impl TreeNode { + fn new(value: Option) -> Self { + TreeNode { + value, + children: Vec::new(), + } + } + + fn add_child(&mut self, child: TreeNode) { + self.children.push(child); + } +} + +impl TreeNode { + fn enumeration(self, path: &mut Vec, combinations: &mut Vec>) { + if self.value.is_none() && self.children.is_empty() { + combinations.push(path.clone()); + } + for mut child in self.children { + if let Some(val) = child.value.take() { + path.push(val); + Self::enumeration(child, path, combinations); + let _ = path.pop(); + } else { + Self::enumeration(child, path, combinations); + } + } + } +} + +fn build_tree(ranges: &[Range], current_level: usize) -> Option> { + fn build_subtree<'a>( + ranges: &'a [Range], + range: &'a Range, + current_level: usize, + ) -> Option> { + let value = match range { + Range::Eq(value) => value, + _ => return None, + }; + let mut child = TreeNode::new(Some(value)); + let subtree = build_tree(ranges, current_level + 1)?; + + if !subtree.children.is_empty() || current_level == ranges.len() - 1 { + child.add_child(subtree); + } + Some(child) + } + + let mut root = TreeNode::new(None); + + if current_level < ranges.len() { + match &ranges[current_level] { + Range::SortedRanges(child_ranges) => { + for range in child_ranges.iter() { + root.children + .push(build_subtree(ranges, range, current_level)?); + } + } + range => { + root.children + .push(build_subtree(ranges, range, current_level)?); + } + } + } + Some(root) +} + +impl Range { + pub(crate) fn only_eq(&self) -> bool { + match self { + Range::Eq(_) => true, + Range::SortedRanges(ranges) => ranges.iter().all(|range| range.only_eq()), + _ => false, + } + } + + pub(crate) fn combining_eqs(&self, eqs: &[Range]) -> Option { + #[allow(clippy::map_clone)] + fn merge_value(tuple: &[&ValueRef], value: ValueRef) -> ValueRef { + let mut merge_tuple = Vec::with_capacity(tuple.len() + 1); + merge_tuple.extend(tuple.iter().map(|v| Arc::clone(v))); + merge_tuple.push(value); + + Arc::new(DataValue::Tuple(Some(merge_tuple))) + } + fn _to_tuple_range(tuple: &[&ValueRef], range: Range) -> Range { + fn merge_value_on_bound( + tuple: &[&ValueRef], + bound: Bound, + ) -> Bound { + match bound { + Bound::Included(v) => Bound::Included(merge_value(tuple, v)), + Bound::Excluded(v) => Bound::Excluded(merge_value(tuple, v)), + Bound::Unbounded => Bound::Unbounded, + } + } + + match range { + Range::Scope { min, max } => Range::Scope { + min: merge_value_on_bound(tuple, min), + max: merge_value_on_bound(tuple, max), + }, + Range::Eq(v) => Range::Eq(merge_value(tuple, v)), + Range::Dummy => Range::Dummy, + Range::SortedRanges(mut ranges) => { + for range in &mut ranges { + *range = _to_tuple_range(tuple, mem::replace(range, Range::Dummy)); + } + Range::SortedRanges(ranges) + } + } + } + + let node = build_tree(eqs, 0)?; + let mut combinations = Vec::new(); + + node.enumeration(&mut Vec::new(), &mut combinations); + + if let Some(combination) = match self { + Range::Scope { + min: Bound::Unbounded, + .. + } => combinations.last(), + Range::Scope { + max: Bound::Unbounded, + .. + } => combinations.first(), + _ => None, + } { + return Some(_to_tuple_range(combination, self.clone())); + } + + let mut ranges = Vec::new(); + + for tuple in combinations { + match _to_tuple_range(&tuple, self.clone()) { + Range::SortedRanges(mut res_ranges) => ranges.append(&mut res_ranges), + range => ranges.push(range), + } + } + Some(RangeDetacher::ranges2range(ranges)) + } +} + pub struct RangeDetacher<'a> { table_name: &'a str, column_id: &'a ColumnId, @@ -246,7 +397,7 @@ impl<'a> RangeDetacher<'a> { let merged_ranges = Self::extract_merge_ranges(op, Some(Range::Scope { min, max }), ranges, &mut 0); - Self::ranges2range(merged_ranges) + Some(Self::ranges2range(merged_ranges)) } // e.g. c1 = 1 ? c1 = 2 (Range::Eq(left_val), Range::Eq(right_val)) => { @@ -280,7 +431,7 @@ impl<'a> RangeDetacher<'a> { let merged_ranges = Self::extract_merge_ranges(op, Some(Range::Eq(eq)), ranges, &mut 0); - Self::ranges2range(merged_ranges) + Some(Self::ranges2range(merged_ranges)) } // e.g. (c1 = 1 or c1 = 2) ? (c1 = 1 or c1 = 2) (Range::SortedRanges(left_ranges), Range::SortedRanges(mut right_ranges)) => { @@ -291,18 +442,18 @@ impl<'a> RangeDetacher<'a> { Self::extract_merge_ranges(op, Some(left_range), right_ranges, &mut idx) } - Self::ranges2range(right_ranges) + Some(Self::ranges2range(right_ranges)) } } } - fn ranges2range(mut merged_ranges: Vec) -> Option { + fn ranges2range(mut merged_ranges: Vec) -> Range { if merged_ranges.is_empty() { - Some(Range::Dummy) + Range::Dummy } else if merged_ranges.len() == 1 { - Some(merged_ranges.pop().unwrap()) + merged_ranges.pop().unwrap() } else { - Some(Range::SortedRanges(merged_ranges)) + Range::SortedRanges(merged_ranges) } } @@ -603,7 +754,7 @@ impl fmt::Display for Range { match self { Range::Scope { min, max } => { match min { - Bound::Unbounded => write!(f, "(-∞")?, + Bound::Unbounded => write!(f, "(-inf")?, Bound::Included(value) => write!(f, "[{}", value)?, Bound::Excluded(value) => write!(f, "({}", value)?, } @@ -611,7 +762,7 @@ impl fmt::Display for Range { write!(f, ", ")?; match max { - Bound::Unbounded => write!(f, "+∞)")?, + Bound::Unbounded => write!(f, "+inf)")?, Bound::Included(value) => write!(f, "{}]", value)?, Bound::Excluded(value) => write!(f, "{})", value)?, } @@ -1282,4 +1433,189 @@ mod test { Ok(()) } + + #[test] + fn test_to_tuple_range_some() { + let eqs_ranges = vec![ + Range::Eq(Arc::new(DataValue::Int32(Some(1)))), + Range::SortedRanges(vec![ + Range::Eq(Arc::new(DataValue::Int32(None))), + Range::Eq(Arc::new(DataValue::Int32(Some(1)))), + Range::Eq(Arc::new(DataValue::Int32(Some(2)))), + ]), + Range::SortedRanges(vec![ + Range::Eq(Arc::new(DataValue::Int32(Some(1)))), + Range::Eq(Arc::new(DataValue::Int32(Some(2)))), + ]), + ]; + + let range = Range::Scope { + min: Bound::Included(Arc::new(DataValue::Int32(Some(1)))), + max: Bound::Unbounded, + } + .combining_eqs(&eqs_ranges); + + assert_eq!( + range, + Some(Range::Scope { + min: Bound::Included(Arc::new(DataValue::Tuple(Some(vec![ + Arc::new(DataValue::Int32(Some(1))), + Arc::new(DataValue::Int32(None)), + Arc::new(DataValue::Int32(Some(1))), + Arc::new(DataValue::Int32(Some(1))), + ])))), + max: Bound::Unbounded, + }) + ); + + let range = Range::Scope { + min: Bound::Unbounded, + max: Bound::Included(Arc::new(DataValue::Int32(Some(1)))), + } + .combining_eqs(&eqs_ranges); + + assert_eq!( + range, + Some(Range::Scope { + min: Bound::Unbounded, + max: Bound::Included(Arc::new(DataValue::Tuple(Some(vec![ + Arc::new(DataValue::Int32(Some(1))), + Arc::new(DataValue::Int32(Some(2))), + Arc::new(DataValue::Int32(Some(2))), + Arc::new(DataValue::Int32(Some(1))), + ])))), + }) + ); + + let range = Range::Scope { + min: Bound::Included(Arc::new(DataValue::Int32(Some(1)))), + max: Bound::Included(Arc::new(DataValue::Int32(Some(2)))), + } + .combining_eqs(&eqs_ranges); + + assert_eq!( + range, + Some(Range::SortedRanges(vec![ + Range::Scope { + min: Bound::Included(Arc::new(DataValue::Tuple(Some(vec![ + Arc::new(DataValue::Int32(Some(1))), + Arc::new(DataValue::Int32(None)), + Arc::new(DataValue::Int32(Some(1))), + Arc::new(DataValue::Int32(Some(1))), + ])))), + max: Bound::Included(Arc::new(DataValue::Tuple(Some(vec![ + Arc::new(DataValue::Int32(Some(1))), + Arc::new(DataValue::Int32(None)), + Arc::new(DataValue::Int32(Some(1))), + Arc::new(DataValue::Int32(Some(2))), + ])))), + }, + Range::Scope { + min: Bound::Included(Arc::new(DataValue::Tuple(Some(vec![ + Arc::new(DataValue::Int32(Some(1))), + Arc::new(DataValue::Int32(None)), + Arc::new(DataValue::Int32(Some(2))), + Arc::new(DataValue::Int32(Some(1))), + ])))), + max: Bound::Included(Arc::new(DataValue::Tuple(Some(vec![ + Arc::new(DataValue::Int32(Some(1))), + Arc::new(DataValue::Int32(None)), + Arc::new(DataValue::Int32(Some(2))), + Arc::new(DataValue::Int32(Some(2))), + ])))), + }, + Range::Scope { + min: Bound::Included(Arc::new(DataValue::Tuple(Some(vec![ + Arc::new(DataValue::Int32(Some(1))), + Arc::new(DataValue::Int32(Some(1))), + Arc::new(DataValue::Int32(Some(1))), + Arc::new(DataValue::Int32(Some(1))), + ])))), + max: Bound::Included(Arc::new(DataValue::Tuple(Some(vec![ + Arc::new(DataValue::Int32(Some(1))), + Arc::new(DataValue::Int32(Some(1))), + Arc::new(DataValue::Int32(Some(1))), + Arc::new(DataValue::Int32(Some(2))), + ])))), + }, + Range::Scope { + min: Bound::Included(Arc::new(DataValue::Tuple(Some(vec![ + Arc::new(DataValue::Int32(Some(1))), + Arc::new(DataValue::Int32(Some(1))), + Arc::new(DataValue::Int32(Some(2))), + Arc::new(DataValue::Int32(Some(1))), + ])))), + max: Bound::Included(Arc::new(DataValue::Tuple(Some(vec![ + Arc::new(DataValue::Int32(Some(1))), + Arc::new(DataValue::Int32(Some(1))), + Arc::new(DataValue::Int32(Some(2))), + Arc::new(DataValue::Int32(Some(2))), + ])))), + }, + Range::Scope { + min: Bound::Included(Arc::new(DataValue::Tuple(Some(vec![ + Arc::new(DataValue::Int32(Some(1))), + Arc::new(DataValue::Int32(Some(2))), + Arc::new(DataValue::Int32(Some(1))), + Arc::new(DataValue::Int32(Some(1))), + ])))), + max: Bound::Included(Arc::new(DataValue::Tuple(Some(vec![ + Arc::new(DataValue::Int32(Some(1))), + Arc::new(DataValue::Int32(Some(2))), + Arc::new(DataValue::Int32(Some(1))), + Arc::new(DataValue::Int32(Some(2))), + ])))), + }, + Range::Scope { + min: Bound::Included(Arc::new(DataValue::Tuple(Some(vec![ + Arc::new(DataValue::Int32(Some(1))), + Arc::new(DataValue::Int32(Some(2))), + Arc::new(DataValue::Int32(Some(2))), + Arc::new(DataValue::Int32(Some(1))), + ])))), + max: Bound::Included(Arc::new(DataValue::Tuple(Some(vec![ + Arc::new(DataValue::Int32(Some(1))), + Arc::new(DataValue::Int32(Some(2))), + Arc::new(DataValue::Int32(Some(2))), + Arc::new(DataValue::Int32(Some(2))), + ])))), + }, + ])) + ) + } + + #[test] + fn test_to_tuple_range_none() { + let eqs_ranges_1 = vec![ + Range::Eq(Arc::new(DataValue::Int32(Some(1)))), + Range::SortedRanges(vec![ + Range::Eq(Arc::new(DataValue::Int32(Some(1)))), + Range::Scope { + min: Bound::Unbounded, + max: Bound::Unbounded, + }, + ]), + ]; + let eqs_ranges_2 = vec![ + Range::Eq(Arc::new(DataValue::Int32(Some(1)))), + Range::Scope { + min: Bound::Unbounded, + max: Bound::Unbounded, + }, + ]; + + let range_1 = Range::Scope { + min: Bound::Included(Arc::new(DataValue::Int32(Some(1)))), + max: Bound::Unbounded, + } + .combining_eqs(&eqs_ranges_1); + let range_2 = Range::Scope { + min: Bound::Included(Arc::new(DataValue::Int32(Some(1)))), + max: Bound::Unbounded, + } + .combining_eqs(&eqs_ranges_2); + + assert_eq!(range_1, None); + assert_eq!(range_2, None); + } } diff --git a/src/expression/value_compute.rs b/src/expression/value_compute.rs index 0f29e3a9..e018cb09 100644 --- a/src/expression/value_compute.rs +++ b/src/expression/value_compute.rs @@ -3,6 +3,7 @@ use crate::expression::{BinaryOperator, UnaryOperator}; use crate::types::value::{DataValue, ValueRef}; use crate::types::LogicalType; use regex::Regex; +use std::cmp::Ordering; fn unpack_bool(value: DataValue) -> Option { match value { @@ -592,6 +593,23 @@ impl DataValue { DataValue::Boolean(value) } + BinaryOperator::Gt + | BinaryOperator::GtEq + | BinaryOperator::Lt + | BinaryOperator::LtEq => { + let value = match (left_value, right_value) { + (Some(v1), Some(v2)) => Self::tuple_cmp(v1, v2).map(|order| match op { + BinaryOperator::Gt => order.is_gt(), + BinaryOperator::Lt => order.is_lt(), + BinaryOperator::GtEq => order.is_ge(), + BinaryOperator::LtEq => order.is_le(), + _ => unreachable!(), + }), + (_, _) => None, + }; + + DataValue::Boolean(value) + } _ => return Err(DatabaseError::UnsupportedBinaryOperator(unified_type, *op)), } } @@ -599,6 +617,22 @@ impl DataValue { Ok(value) } + + fn tuple_cmp(v1: Vec, v2: Vec) -> Option { + let mut order = Ordering::Equal; + let mut v1_iter = v1.iter(); + let mut v2_iter = v2.iter(); + + while order == Ordering::Equal { + order = match (v1_iter.next(), v2_iter.next()) { + (Some(v1), Some(v2)) => v1.partial_cmp(v2)?, + (Some(_), None) => Ordering::Greater, + (None, Some(_)) => Ordering::Less, + (None, None) => break, + } + } + Some(order) + } } #[cfg(test)] diff --git a/src/optimizer/core/histogram.rs b/src/optimizer/core/histogram.rs index dac70bda..ae9c5bca 100644 --- a/src/optimizer/core/histogram.rs +++ b/src/optimizer/core/histogram.rs @@ -1,20 +1,19 @@ -use crate::catalog::ColumnCatalog; use crate::errors::DatabaseError; use crate::execution::volcano::dql::sort::radix_sort; use crate::expression::range_detacher::Range; +use crate::expression::BinaryOperator; use crate::optimizer::core::cm_sketch::CountMinSketch; +use crate::types::index::{IndexId, IndexMeta}; use crate::types::value::{DataValue, ValueRef}; -use crate::types::{ColumnId, LogicalType}; +use crate::types::LogicalType; use ordered_float::OrderedFloat; use serde::{Deserialize, Serialize}; -use std::cmp::Ordering; use std::collections::Bound; use std::sync::Arc; use std::{cmp, mem}; pub struct HistogramBuilder { - column_id: ColumnId, - data_type: LogicalType, + index_id: IndexId, null_count: usize, values: Vec<((usize, ValueRef), Vec)>, @@ -25,8 +24,7 @@ pub struct HistogramBuilder { // Equal depth histogram #[derive(Debug, Clone, PartialEq, Serialize, Deserialize)] pub struct Histogram { - column_id: ColumnId, - data_type: LogicalType, + index_id: IndexId, number_of_distinct_value: usize, null_count: usize, @@ -48,10 +46,9 @@ struct Bucket { } impl HistogramBuilder { - pub fn new(column: &ColumnCatalog, capacity: Option) -> Result { + pub fn new(index_meta: &IndexMeta, capacity: Option) -> Result { Ok(Self { - column_id: column.id().ok_or(DatabaseError::OwnerLessColumn)?, - data_type: *column.datatype(), + index_id: index_meta.id, null_count: 0, values: capacity.map(Vec::with_capacity).unwrap_or_default(), value_index: 0, @@ -86,8 +83,7 @@ impl HistogramBuilder { let mut sketch = CountMinSketch::new(self.values.len(), 0.95, 1.0); let HistogramBuilder { - column_id, - data_type, + index_id, null_count, values, .. @@ -102,7 +98,7 @@ impl HistogramBuilder { let sorted_values = radix_sort(values); for i in 0..number_of_buckets { - let mut bucket = Bucket::empty(&data_type); + let mut bucket = Bucket::empty(); let j = (i + 1) * bucket_len; bucket.upper = sorted_values[cmp::min(j, values_len) - 1].1.clone(); @@ -133,8 +129,7 @@ impl HistogramBuilder { Ok(( Histogram { - column_id, - data_type, + index_id, number_of_distinct_value, null_count, values_len, @@ -158,63 +153,69 @@ impl HistogramBuilder { } } -fn is_under(value: &ValueRef, target: &Bound, is_min: bool) -> bool { +fn is_under( + value: &ValueRef, + target: &Bound, + is_min: bool, +) -> Result { let _is_under = |value: &ValueRef, target: &ValueRef, is_min: bool| { - value - .partial_cmp(target) - .map(|order| { - if is_min { - Ordering::is_lt(order) - } else { - Ordering::is_le(order) - } - }) - .unwrap() + let res = value.binary_op( + target, + &if is_min { + BinaryOperator::Lt + } else { + BinaryOperator::LtEq + }, + )?; + Ok::(matches!(res, DataValue::Boolean(Some(true)))) }; - match target { - Bound::Included(target) => _is_under(value, target, is_min), - Bound::Excluded(target) => _is_under(value, target, !is_min), + Ok(match target { + Bound::Included(target) => _is_under(value, target, is_min)?, + Bound::Excluded(target) => _is_under(value, target, !is_min)?, Bound::Unbounded => !is_min, - } + }) } -fn is_above(value: &ValueRef, target: &Bound, is_min: bool) -> bool { +fn is_above( + value: &ValueRef, + target: &Bound, + is_min: bool, +) -> Result { let _is_above = |value: &ValueRef, target: &ValueRef, is_min: bool| { - value - .partial_cmp(target) - .map(|order| { - if is_min { - Ordering::is_ge(order) - } else { - Ordering::is_gt(order) - } - }) - .unwrap() + let res = value.binary_op( + target, + &if is_min { + BinaryOperator::GtEq + } else { + BinaryOperator::Gt + }, + )?; + Ok::(matches!(res, DataValue::Boolean(Some(true)))) }; - - match target { - Bound::Included(target) => _is_above(value, target, is_min), - Bound::Excluded(target) => _is_above(value, target, !is_min), + Ok(match target { + Bound::Included(target) => _is_above(value, target, is_min)?, + Bound::Excluded(target) => _is_above(value, target, !is_min)?, Bound::Unbounded => is_min, - } + }) } impl Histogram { - pub fn column_id(&self) -> ColumnId { - self.column_id - } - pub fn data_type(&self) -> LogicalType { - self.data_type + pub fn index_id(&self) -> IndexId { + self.index_id } pub fn values_len(&self) -> usize { self.values_len } - pub fn collect_count(&self, ranges: &[Range], sketch: &CountMinSketch) -> usize { + pub fn collect_count( + &self, + ranges: &[Range], + sketch: &CountMinSketch, + ) -> Result { if self.buckets.is_empty() || ranges.is_empty() { - return 0; + return Ok(0); } let mut count = 0; @@ -230,17 +231,17 @@ impl Histogram { &mut bucket_idxs, &mut count, sketch, - ); + )?; if is_dummy { - return 0; + return Ok(0); } } - bucket_idxs + Ok(bucket_idxs .iter() .map(|idx| self.buckets[*idx].count as usize) .sum::() - + count + + count) } fn _collect_count( @@ -251,9 +252,9 @@ impl Histogram { bucket_idxs: &mut Vec, count: &mut usize, sketch: &CountMinSketch, - ) -> bool { + ) -> Result { let float_value = |value: &DataValue, prefix_len: usize| { - match value.logical_type() { + let value = match value.logical_type() { LogicalType::Varchar(_) => match value { DataValue::Utf8(value) => value.as_ref().map(|string| { if prefix_len > string.len() { @@ -280,12 +281,10 @@ impl Histogram { }, LogicalType::Date | LogicalType::DateTime => match value { DataValue::Date32(value) => DataValue::Int32(*value) - .cast(&LogicalType::Double) - .unwrap() + .cast(&LogicalType::Double)? .double(), DataValue::Date64(value) => DataValue::Int64(*value) - .cast(&LogicalType::Double) - .unwrap() + .cast(&LogicalType::Double)? .double(), _ => unreachable!(), }, @@ -303,17 +302,33 @@ impl Histogram { | LogicalType::UBigint | LogicalType::Float | LogicalType::Double - | LogicalType::Decimal(_, _) => { - value.clone().cast(&LogicalType::Double).unwrap().double() - } - LogicalType::Tuple => unreachable!(), + | LogicalType::Decimal(_, _) => value.clone().cast(&LogicalType::Double)?.double(), + LogicalType::Tuple => match value { + DataValue::Tuple(Some(values)) => { + let mut float = 0.0; + + for (i, value) in values.iter().enumerate() { + if let Some(f) = + DataValue::clone(value).cast(&LogicalType::Double)?.double() + { + float += f / (10_i32.pow(i as u32) as f64); + } + } + Some(float) + } + DataValue::Tuple(None) => None, + _ => unreachable!(), + }, } - .unwrap_or(0.0) + .unwrap_or(0.0); + Ok::(value) }; let calc_fraction = |start: &DataValue, end: &DataValue, value: &DataValue| { let prefix_len = start.common_prefix_length(end).unwrap_or(0); - (float_value(value, prefix_len) - float_value(start, prefix_len)) - / (float_value(end, prefix_len) - float_value(start, prefix_len)) + Ok::( + (float_value(value, prefix_len)? - float_value(start, prefix_len)?) + / (float_value(end, prefix_len)? - float_value(start, prefix_len)?), + ) }; let distinct_1 = OrderedFloat(1.0 / self.number_of_distinct_value as f64); @@ -328,21 +343,21 @@ impl Histogram { _ => false, }; - if (is_above(&bucket.lower, min, true) || is_eq(&bucket.lower, min)) - && (is_under(&bucket.upper, max, false) || is_eq(&bucket.upper, max)) + if (is_above(&bucket.lower, min, true)? || is_eq(&bucket.lower, min)) + && (is_under(&bucket.upper, max, false)? || is_eq(&bucket.upper, max)) { bucket_idxs.push(mem::replace(bucket_i, *bucket_i + 1)); - } else if is_above(&bucket.lower, max, false) { + } else if is_above(&bucket.lower, max, false)? { *binary_i += 1; - } else if is_under(&bucket.upper, min, true) { + } else if is_under(&bucket.upper, min, true)? { *bucket_i += 1; - } else if is_above(&bucket.lower, min, true) { + } else if is_above(&bucket.lower, min, true)? { let (temp_ratio, option) = match max { Bound::Included(val) => { - (calc_fraction(&bucket.lower, &bucket.upper, val), None) + (calc_fraction(&bucket.lower, &bucket.upper, val)?, None) } Bound::Excluded(val) => ( - calc_fraction(&bucket.lower, &bucket.upper, val), + calc_fraction(&bucket.lower, &bucket.upper, val)?, Some(sketch.estimate(val)), ), Bound::Unbounded => unreachable!(), @@ -353,13 +368,13 @@ impl Histogram { temp_count = temp_count.saturating_sub(count); } *bucket_i += 1; - } else if is_under(&bucket.upper, max, false) { + } else if is_under(&bucket.upper, max, false)? { let (temp_ratio, option) = match min { Bound::Included(val) => { - (calc_fraction(&bucket.lower, &bucket.upper, val), None) + (calc_fraction(&bucket.lower, &bucket.upper, val)?, None) } Bound::Excluded(val) => ( - calc_fraction(&bucket.lower, &bucket.upper, val), + calc_fraction(&bucket.lower, &bucket.upper, val)?, Some(sketch.estimate(val)), ), Bound::Unbounded => unreachable!(), @@ -373,20 +388,20 @@ impl Histogram { } else { let (temp_ratio_max, option_max) = match max { Bound::Included(val) => { - (calc_fraction(&bucket.lower, &bucket.upper, val), None) + (calc_fraction(&bucket.lower, &bucket.upper, val)?, None) } Bound::Excluded(val) => ( - calc_fraction(&bucket.lower, &bucket.upper, val), + calc_fraction(&bucket.lower, &bucket.upper, val)?, Some(sketch.estimate(val)), ), Bound::Unbounded => unreachable!(), }; let (temp_ratio_min, option_min) = match min { Bound::Included(val) => { - (calc_fraction(&bucket.lower, &bucket.upper, val), None) + (calc_fraction(&bucket.lower, &bucket.upper, val)?, None) } Bound::Excluded(val) => ( - calc_fraction(&bucket.lower, &bucket.upper, val), + calc_fraction(&bucket.lower, &bucket.upper, val)?, Some(sketch.estimate(val)), ), Bound::Unbounded => unreachable!(), @@ -408,17 +423,17 @@ impl Histogram { *count += sketch.estimate(value); *binary_i += 1 } - Range::Dummy => return true, + Range::Dummy => return Ok(true), Range::SortedRanges(_) => unreachable!(), } - false + Ok(false) } } impl Bucket { - fn empty(data_type: &LogicalType) -> Self { - let empty_value = Arc::new(DataValue::none(data_type)); + fn empty() -> Self { + let empty_value = Arc::new(DataValue::Null); Bucket { lower: empty_value.clone(), @@ -430,37 +445,29 @@ impl Bucket { #[cfg(test)] mod tests { - use crate::catalog::{ColumnCatalog, ColumnDesc, ColumnSummary}; use crate::errors::DatabaseError; use crate::expression::range_detacher::Range; use crate::optimizer::core::histogram::{Bucket, HistogramBuilder}; + use crate::types::index::{IndexMeta, IndexType}; use crate::types::value::DataValue; use crate::types::LogicalType; use std::ops::Bound; use std::sync::Arc; - fn int32_column() -> ColumnCatalog { - ColumnCatalog { - summary: ColumnSummary { - id: Some(1), - name: "c1".to_string(), - table_name: None, - }, - nullable: false, - desc: ColumnDesc { - column_datatype: LogicalType::UInteger, - is_primary: false, - is_unique: false, - default: None, - }, + fn index_meta() -> IndexMeta { + IndexMeta { + id: 0, + column_ids: vec![0], + table_name: Arc::new("t1".to_string()), + pk_ty: LogicalType::Integer, + name: "pk_c1".to_string(), + ty: IndexType::PrimaryKey, } } #[test] fn test_sort_tuples_on_histogram() -> Result<(), DatabaseError> { - let column = int32_column(); - - let mut builder = HistogramBuilder::new(&column, Some(15))?; + let mut builder = HistogramBuilder::new(&index_meta(), Some(15))?; builder.append(&Arc::new(DataValue::Int32(Some(0))))?; builder.append(&Arc::new(DataValue::Int32(Some(1))))?; @@ -526,9 +533,7 @@ mod tests { #[test] fn test_rev_sort_tuples_on_histogram() -> Result<(), DatabaseError> { - let column = int32_column(); - - let mut builder = HistogramBuilder::new(&column, Some(15))?; + let mut builder = HistogramBuilder::new(&index_meta(), Some(15))?; builder.append(&Arc::new(DataValue::Int32(Some(14))))?; builder.append(&Arc::new(DataValue::Int32(Some(13))))?; @@ -592,9 +597,7 @@ mod tests { #[test] fn test_non_average_on_histogram() -> Result<(), DatabaseError> { - let column = int32_column(); - - let mut builder = HistogramBuilder::new(&column, Some(15))?; + let mut builder = HistogramBuilder::new(&index_meta(), Some(15))?; builder.append(&Arc::new(DataValue::Int32(Some(14))))?; builder.append(&Arc::new(DataValue::Int32(Some(13))))?; @@ -653,9 +656,7 @@ mod tests { #[test] fn test_collect_count() -> Result<(), DatabaseError> { - let column = int32_column(); - - let mut builder = HistogramBuilder::new(&column, Some(15))?; + let mut builder = HistogramBuilder::new(&index_meta(), Some(15))?; builder.append(&Arc::new(DataValue::Int32(Some(14))))?; builder.append(&Arc::new(DataValue::Int32(Some(13))))?; @@ -689,7 +690,7 @@ mod tests { }, ], &sketch, - ); + )?; assert_eq!(count_1, 9); @@ -699,7 +700,7 @@ mod tests { max: Bound::Unbounded, }], &sketch, - ); + )?; assert_eq!(count_2, 11); @@ -709,7 +710,7 @@ mod tests { max: Bound::Unbounded, }], &sketch, - ); + )?; assert_eq!(count_3, 7); @@ -719,7 +720,7 @@ mod tests { max: Bound::Included(Arc::new(DataValue::Int32(Some(11)))), }], &sketch, - ); + )?; assert_eq!(count_4, 12); @@ -729,7 +730,7 @@ mod tests { max: Bound::Excluded(Arc::new(DataValue::Int32(Some(8)))), }], &sketch, - ); + )?; assert_eq!(count_5, 8); @@ -739,7 +740,7 @@ mod tests { max: Bound::Unbounded, }], &sketch, - ); + )?; assert_eq!(count_6, 13); @@ -749,7 +750,7 @@ mod tests { max: Bound::Unbounded, }], &sketch, - ); + )?; assert_eq!(count_7, 13); @@ -759,7 +760,7 @@ mod tests { max: Bound::Included(Arc::new(DataValue::Int32(Some(12)))), }], &sketch, - ); + )?; assert_eq!(count_8, 13); @@ -769,7 +770,7 @@ mod tests { max: Bound::Excluded(Arc::new(DataValue::Int32(Some(13)))), }], &sketch, - ); + )?; assert_eq!(count_9, 13); @@ -779,7 +780,7 @@ mod tests { max: Bound::Excluded(Arc::new(DataValue::Int32(Some(3)))), }], &sketch, - ); + )?; assert_eq!(count_10, 2); @@ -789,7 +790,7 @@ mod tests { max: Bound::Included(Arc::new(DataValue::Int32(Some(2)))), }], &sketch, - ); + )?; assert_eq!(count_11, 2); diff --git a/src/optimizer/core/memo.rs b/src/optimizer/core/memo.rs index 059bbff0..951e918c 100644 --- a/src/optimizer/core/memo.rs +++ b/src/optimizer/core/memo.rs @@ -1,7 +1,7 @@ use crate::errors::DatabaseError; -use crate::optimizer::core::column_meta::ColumnMetaLoader; use crate::optimizer::core::pattern::PatternMatcher; use crate::optimizer::core::rule::{ImplementationRule, MatchPattern}; +use crate::optimizer::core::statistics_meta::StatisticMetaLoader; use crate::optimizer::heuristic::batch::HepMatchOrder; use crate::optimizer::heuristic::graph::{HepGraph, HepNodeId}; use crate::optimizer::heuristic::matcher::HepMatcher; @@ -36,7 +36,7 @@ pub struct Memo { impl Memo { pub(crate) fn new( graph: &HepGraph, - loader: &ColumnMetaLoader<'_, T>, + loader: &StatisticMetaLoader<'_, T>, implementations: &[ImplementationRuleImpl], ) -> Result { let node_count = graph.node_count(); @@ -93,7 +93,7 @@ mod tests { use crate::planner::operator::PhysicalOption; use crate::storage::kip::KipTransaction; use crate::storage::{Storage, Transaction}; - use crate::types::index::{IndexInfo, IndexMeta}; + use crate::types::index::{IndexInfo, IndexMeta, IndexType}; use crate::types::value::DataValue; use crate::types::LogicalType; use petgraph::stable_graph::NodeIndex; @@ -169,8 +169,7 @@ mod tests { table_name: Arc::new("t1".to_string()), pk_ty: LogicalType::Integer, name: "pk_c1".to_string(), - is_unique: false, - is_primary: true, + ty: IndexType::PrimaryKey, }), range: Some(Range::SortedRanges(vec![ Range::Eq(Arc::new(DataValue::Int32(Some(2)))), diff --git a/src/optimizer/core/mod.rs b/src/optimizer/core/mod.rs index 7b734378..8865a8ed 100644 --- a/src/optimizer/core/mod.rs +++ b/src/optimizer/core/mod.rs @@ -1,7 +1,7 @@ pub(crate) mod cm_sketch; -pub(crate) mod column_meta; pub(crate) mod histogram; pub(crate) mod memo; pub(crate) mod opt_expr; pub(crate) mod pattern; pub(crate) mod rule; +pub(crate) mod statistics_meta; diff --git a/src/optimizer/core/rule.rs b/src/optimizer/core/rule.rs index 106a8923..69883b64 100644 --- a/src/optimizer/core/rule.rs +++ b/src/optimizer/core/rule.rs @@ -1,7 +1,7 @@ use crate::errors::DatabaseError; -use crate::optimizer::core::column_meta::ColumnMetaLoader; use crate::optimizer::core::memo::GroupExpression; use crate::optimizer::core::pattern::Pattern; +use crate::optimizer::core::statistics_meta::StatisticMetaLoader; use crate::optimizer::heuristic::graph::{HepGraph, HepNodeId}; use crate::planner::operator::Operator; use crate::storage::Transaction; @@ -19,7 +19,7 @@ pub trait ImplementationRule: MatchPattern { fn to_expression( &self, op: &Operator, - loader: &ColumnMetaLoader, + loader: &StatisticMetaLoader, group_expr: &mut GroupExpression, ) -> Result<(), DatabaseError>; } diff --git a/src/optimizer/core/column_meta.rs b/src/optimizer/core/statistics_meta.rs similarity index 62% rename from src/optimizer/core/column_meta.rs rename to src/optimizer/core/statistics_meta.rs index 2039f6dd..e50ae8e9 100644 --- a/src/optimizer/core/column_meta.rs +++ b/src/optimizer/core/statistics_meta.rs @@ -4,8 +4,8 @@ use crate::expression::range_detacher::Range; use crate::optimizer::core::cm_sketch::CountMinSketch; use crate::optimizer::core::histogram::Histogram; use crate::storage::Transaction; +use crate::types::index::IndexId; use crate::types::value::DataValue; -use crate::types::{ColumnId, LogicalType}; use kip_db::kernel::utils::lru_cache::ShardingLruCache; use serde::{Deserialize, Serialize}; use std::fs::OpenOptions; @@ -13,67 +13,62 @@ use std::io::{Read, Write}; use std::path::Path; use std::slice; -pub struct ColumnMetaLoader<'a, T: Transaction> { - cache: &'a ShardingLruCache>, +pub struct StatisticMetaLoader<'a, T: Transaction> { + cache: &'a ShardingLruCache>, tx: &'a T, } -impl<'a, T: Transaction> ColumnMetaLoader<'a, T> { +impl<'a, T: Transaction> StatisticMetaLoader<'a, T> { pub fn new( tx: &'a T, - cache: &'a ShardingLruCache>, - ) -> ColumnMetaLoader<'a, T> { - ColumnMetaLoader { cache, tx } + cache: &'a ShardingLruCache>, + ) -> StatisticMetaLoader<'a, T> { + StatisticMetaLoader { cache, tx } } - pub fn load(&self, table_name: TableName) -> Result<&Vec, DatabaseError> { + pub fn load(&self, table_name: TableName) -> Result<&Vec, DatabaseError> { let option = self.cache.get(&table_name); - if let Some(column_metas) = option { - Ok(column_metas) + if let Some(statistics_metas) = option { + Ok(statistics_metas) } else { - let paths = self.tx.column_meta_paths(&table_name)?; - let mut column_metas = Vec::with_capacity(paths.len()); + let paths = self.tx.statistics_meta_paths(&table_name)?; + let mut statistics_metas = Vec::with_capacity(paths.len()); for path in paths { - column_metas.push(ColumnMeta::from_file(path)?); + statistics_metas.push(StatisticsMeta::from_file(path)?); } - Ok(self.cache.get_or_insert(table_name, |_| Ok(column_metas))?) + Ok(self + .cache + .get_or_insert(table_name, |_| Ok(statistics_metas))?) } } } #[derive(Debug, Serialize, Deserialize)] -pub struct ColumnMeta { - column_id: ColumnId, - data_type: LogicalType, +pub struct StatisticsMeta { + index_id: IndexId, histogram: Histogram, cm_sketch: CountMinSketch, } -impl ColumnMeta { +impl StatisticsMeta { pub fn new(histogram: Histogram, cm_sketch: CountMinSketch) -> Self { - ColumnMeta { - column_id: histogram.column_id(), - data_type: histogram.data_type(), + StatisticsMeta { + index_id: histogram.index_id(), histogram, cm_sketch, } } - - pub fn column_id(&self) -> ColumnId { - self.column_id - } - pub fn data_type(&self) -> LogicalType { - self.data_type + pub fn index_id(&self) -> IndexId { + self.index_id } - pub fn histogram(&self) -> &Histogram { &self.histogram } - pub fn collect_count(&self, range: &Range) -> usize { + pub fn collect_count(&self, range: &Range) -> Result { let mut count = 0; let ranges = if let Range::SortedRanges(ranges) = range { @@ -81,8 +76,8 @@ impl ColumnMeta { } else { slice::from_ref(range) }; - count += self.histogram.collect_count(ranges, &self.cm_sketch); - count + count += self.histogram.collect_count(ranges, &self.cm_sketch)?; + Ok(count) } pub fn to_file(&self, path: impl AsRef) -> Result<(), DatabaseError> { @@ -113,38 +108,28 @@ impl ColumnMeta { #[cfg(test)] mod tests { - use crate::catalog::{ColumnCatalog, ColumnDesc, ColumnSummary}; use crate::errors::DatabaseError; - use crate::optimizer::core::column_meta::ColumnMeta; use crate::optimizer::core::histogram::HistogramBuilder; + use crate::optimizer::core::statistics_meta::StatisticsMeta; + use crate::types::index::{IndexMeta, IndexType}; use crate::types::value::DataValue; use crate::types::LogicalType; use std::sync::Arc; use tempfile::TempDir; - fn int32_column() -> ColumnCatalog { - ColumnCatalog { - summary: ColumnSummary { - id: Some(1), - name: "c1".to_string(), - table_name: None, - }, - nullable: false, - desc: ColumnDesc { - column_datatype: LogicalType::UInteger, - is_primary: false, - is_unique: false, - default: None, - }, - } - } - #[test] fn test_to_file_and_from_file() -> Result<(), DatabaseError> { let temp_dir = TempDir::new().expect("unable to create temporary working directory"); - let column = int32_column(); + let index = IndexMeta { + id: 0, + column_ids: vec![0], + table_name: Arc::new("t1".to_string()), + pk_ty: LogicalType::Integer, + name: "pk_c1".to_string(), + ty: IndexType::PrimaryKey, + }; - let mut builder = HistogramBuilder::new(&column, Some(15))?; + let mut builder = HistogramBuilder::new(&index, Some(15))?; builder.append(&Arc::new(DataValue::Int32(Some(14))))?; builder.append(&Arc::new(DataValue::Int32(Some(13))))?; @@ -170,13 +155,13 @@ mod tests { let (histogram, sketch) = builder.build(4)?; let path = temp_dir.path().join("meta"); - ColumnMeta::new(histogram.clone(), sketch.clone()).to_file(path.clone())?; - let column_meta = ColumnMeta::from_file(path)?; + StatisticsMeta::new(histogram.clone(), sketch.clone()).to_file(path.clone())?; + let statistics_meta = StatisticsMeta::from_file(path)?; - assert_eq!(histogram, column_meta.histogram); + assert_eq!(histogram, statistics_meta.histogram); assert_eq!( sketch.estimate(&DataValue::Null), - column_meta.cm_sketch.estimate(&DataValue::Null) + statistics_meta.cm_sketch.estimate(&DataValue::Null) ); Ok(()) diff --git a/src/optimizer/heuristic/optimizer.rs b/src/optimizer/heuristic/optimizer.rs index d1a0e514..16d5f4c5 100644 --- a/src/optimizer/heuristic/optimizer.rs +++ b/src/optimizer/heuristic/optimizer.rs @@ -1,8 +1,8 @@ use crate::errors::DatabaseError; -use crate::optimizer::core::column_meta::ColumnMetaLoader; use crate::optimizer::core::memo::Memo; use crate::optimizer::core::pattern::PatternMatcher; use crate::optimizer::core::rule::{MatchPattern, NormalizationRule}; +use crate::optimizer::core::statistics_meta::StatisticMetaLoader; use crate::optimizer::heuristic::batch::{HepBatch, HepBatchStrategy}; use crate::optimizer::heuristic::graph::{HepGraph, HepNodeId}; use crate::optimizer::heuristic::matcher::HepMatcher; @@ -44,7 +44,7 @@ impl HepOptimizer { pub fn find_best( mut self, - loader: Option<&ColumnMetaLoader<'_, T>>, + loader: Option<&StatisticMetaLoader<'_, T>>, ) -> Result { for ref batch in self.batches { let mut batch_over = false; diff --git a/src/optimizer/rule/implementation/ddl/add_column.rs b/src/optimizer/rule/implementation/ddl/add_column.rs index 3b672735..120df156 100644 --- a/src/optimizer/rule/implementation/ddl/add_column.rs +++ b/src/optimizer/rule/implementation/ddl/add_column.rs @@ -1,8 +1,8 @@ use crate::errors::DatabaseError; -use crate::optimizer::core::column_meta::ColumnMetaLoader; use crate::optimizer::core::memo::{Expression, GroupExpression}; use crate::optimizer::core::pattern::{Pattern, PatternChildrenPredicate}; use crate::optimizer::core::rule::{ImplementationRule, MatchPattern}; +use crate::optimizer::core::statistics_meta::StatisticMetaLoader; use crate::planner::operator::{Operator, PhysicalOption}; use crate::single_mapping; use crate::storage::Transaction; diff --git a/src/optimizer/rule/implementation/ddl/create_table.rs b/src/optimizer/rule/implementation/ddl/create_table.rs index 5d6a6ec6..78d52255 100644 --- a/src/optimizer/rule/implementation/ddl/create_table.rs +++ b/src/optimizer/rule/implementation/ddl/create_table.rs @@ -1,8 +1,8 @@ use crate::errors::DatabaseError; -use crate::optimizer::core::column_meta::ColumnMetaLoader; use crate::optimizer::core::memo::{Expression, GroupExpression}; use crate::optimizer::core::pattern::{Pattern, PatternChildrenPredicate}; use crate::optimizer::core::rule::{ImplementationRule, MatchPattern}; +use crate::optimizer::core::statistics_meta::StatisticMetaLoader; use crate::planner::operator::{Operator, PhysicalOption}; use crate::single_mapping; use crate::storage::Transaction; diff --git a/src/optimizer/rule/implementation/ddl/drop_column.rs b/src/optimizer/rule/implementation/ddl/drop_column.rs index 41641345..15ec7398 100644 --- a/src/optimizer/rule/implementation/ddl/drop_column.rs +++ b/src/optimizer/rule/implementation/ddl/drop_column.rs @@ -1,8 +1,8 @@ use crate::errors::DatabaseError; -use crate::optimizer::core::column_meta::ColumnMetaLoader; use crate::optimizer::core::memo::{Expression, GroupExpression}; use crate::optimizer::core::pattern::{Pattern, PatternChildrenPredicate}; use crate::optimizer::core::rule::{ImplementationRule, MatchPattern}; +use crate::optimizer::core::statistics_meta::StatisticMetaLoader; use crate::planner::operator::{Operator, PhysicalOption}; use crate::single_mapping; use crate::storage::Transaction; diff --git a/src/optimizer/rule/implementation/ddl/drop_table.rs b/src/optimizer/rule/implementation/ddl/drop_table.rs index 2f7b0a87..1e388293 100644 --- a/src/optimizer/rule/implementation/ddl/drop_table.rs +++ b/src/optimizer/rule/implementation/ddl/drop_table.rs @@ -1,8 +1,8 @@ use crate::errors::DatabaseError; -use crate::optimizer::core::column_meta::ColumnMetaLoader; use crate::optimizer::core::memo::{Expression, GroupExpression}; use crate::optimizer::core::pattern::{Pattern, PatternChildrenPredicate}; use crate::optimizer::core::rule::{ImplementationRule, MatchPattern}; +use crate::optimizer::core::statistics_meta::StatisticMetaLoader; use crate::planner::operator::{Operator, PhysicalOption}; use crate::single_mapping; use crate::storage::Transaction; diff --git a/src/optimizer/rule/implementation/ddl/truncate.rs b/src/optimizer/rule/implementation/ddl/truncate.rs index 2088fcc9..017b708a 100644 --- a/src/optimizer/rule/implementation/ddl/truncate.rs +++ b/src/optimizer/rule/implementation/ddl/truncate.rs @@ -1,8 +1,8 @@ use crate::errors::DatabaseError; -use crate::optimizer::core::column_meta::ColumnMetaLoader; use crate::optimizer::core::memo::{Expression, GroupExpression}; use crate::optimizer::core::pattern::{Pattern, PatternChildrenPredicate}; use crate::optimizer::core::rule::{ImplementationRule, MatchPattern}; +use crate::optimizer::core::statistics_meta::StatisticMetaLoader; use crate::planner::operator::{Operator, PhysicalOption}; use crate::single_mapping; use crate::storage::Transaction; diff --git a/src/optimizer/rule/implementation/dml/analyze.rs b/src/optimizer/rule/implementation/dml/analyze.rs index 18524212..4eb03bb0 100644 --- a/src/optimizer/rule/implementation/dml/analyze.rs +++ b/src/optimizer/rule/implementation/dml/analyze.rs @@ -1,8 +1,8 @@ use crate::errors::DatabaseError; -use crate::optimizer::core::column_meta::ColumnMetaLoader; use crate::optimizer::core::memo::{Expression, GroupExpression}; use crate::optimizer::core::pattern::{Pattern, PatternChildrenPredicate}; use crate::optimizer::core::rule::{ImplementationRule, MatchPattern}; +use crate::optimizer::core::statistics_meta::StatisticMetaLoader; use crate::planner::operator::{Operator, PhysicalOption}; use crate::single_mapping; use crate::storage::Transaction; diff --git a/src/optimizer/rule/implementation/dml/copy_from_file.rs b/src/optimizer/rule/implementation/dml/copy_from_file.rs index 9365df5a..f56c983b 100644 --- a/src/optimizer/rule/implementation/dml/copy_from_file.rs +++ b/src/optimizer/rule/implementation/dml/copy_from_file.rs @@ -1,8 +1,8 @@ use crate::errors::DatabaseError; -use crate::optimizer::core::column_meta::ColumnMetaLoader; use crate::optimizer::core::memo::{Expression, GroupExpression}; use crate::optimizer::core::pattern::{Pattern, PatternChildrenPredicate}; use crate::optimizer::core::rule::{ImplementationRule, MatchPattern}; +use crate::optimizer::core::statistics_meta::StatisticMetaLoader; use crate::planner::operator::{Operator, PhysicalOption}; use crate::single_mapping; use crate::storage::Transaction; diff --git a/src/optimizer/rule/implementation/dml/copy_to_file.rs b/src/optimizer/rule/implementation/dml/copy_to_file.rs index 051d6360..ef979de7 100644 --- a/src/optimizer/rule/implementation/dml/copy_to_file.rs +++ b/src/optimizer/rule/implementation/dml/copy_to_file.rs @@ -1,8 +1,8 @@ use crate::errors::DatabaseError; -use crate::optimizer::core::column_meta::ColumnMetaLoader; use crate::optimizer::core::memo::{Expression, GroupExpression}; use crate::optimizer::core::pattern::{Pattern, PatternChildrenPredicate}; use crate::optimizer::core::rule::{ImplementationRule, MatchPattern}; +use crate::optimizer::core::statistics_meta::StatisticMetaLoader; use crate::planner::operator::{Operator, PhysicalOption}; use crate::single_mapping; use crate::storage::Transaction; diff --git a/src/optimizer/rule/implementation/dml/delete.rs b/src/optimizer/rule/implementation/dml/delete.rs index 2191a4e4..a917ccb1 100644 --- a/src/optimizer/rule/implementation/dml/delete.rs +++ b/src/optimizer/rule/implementation/dml/delete.rs @@ -1,8 +1,8 @@ use crate::errors::DatabaseError; -use crate::optimizer::core::column_meta::ColumnMetaLoader; use crate::optimizer::core::memo::{Expression, GroupExpression}; use crate::optimizer::core::pattern::{Pattern, PatternChildrenPredicate}; use crate::optimizer::core::rule::{ImplementationRule, MatchPattern}; +use crate::optimizer::core::statistics_meta::StatisticMetaLoader; use crate::planner::operator::{Operator, PhysicalOption}; use crate::single_mapping; use crate::storage::Transaction; diff --git a/src/optimizer/rule/implementation/dml/insert.rs b/src/optimizer/rule/implementation/dml/insert.rs index 274b2a14..62aa4811 100644 --- a/src/optimizer/rule/implementation/dml/insert.rs +++ b/src/optimizer/rule/implementation/dml/insert.rs @@ -1,8 +1,8 @@ use crate::errors::DatabaseError; -use crate::optimizer::core::column_meta::ColumnMetaLoader; use crate::optimizer::core::memo::{Expression, GroupExpression}; use crate::optimizer::core::pattern::{Pattern, PatternChildrenPredicate}; use crate::optimizer::core::rule::{ImplementationRule, MatchPattern}; +use crate::optimizer::core::statistics_meta::StatisticMetaLoader; use crate::planner::operator::{Operator, PhysicalOption}; use crate::single_mapping; use crate::storage::Transaction; diff --git a/src/optimizer/rule/implementation/dml/update.rs b/src/optimizer/rule/implementation/dml/update.rs index 7e2488f2..83efc09e 100644 --- a/src/optimizer/rule/implementation/dml/update.rs +++ b/src/optimizer/rule/implementation/dml/update.rs @@ -1,8 +1,8 @@ use crate::errors::DatabaseError; -use crate::optimizer::core::column_meta::ColumnMetaLoader; use crate::optimizer::core::memo::{Expression, GroupExpression}; use crate::optimizer::core::pattern::{Pattern, PatternChildrenPredicate}; use crate::optimizer::core::rule::{ImplementationRule, MatchPattern}; +use crate::optimizer::core::statistics_meta::StatisticMetaLoader; use crate::planner::operator::{Operator, PhysicalOption}; use crate::single_mapping; use crate::storage::Transaction; diff --git a/src/optimizer/rule/implementation/dql/aggregate.rs b/src/optimizer/rule/implementation/dql/aggregate.rs index aece1faf..036f9b34 100644 --- a/src/optimizer/rule/implementation/dql/aggregate.rs +++ b/src/optimizer/rule/implementation/dql/aggregate.rs @@ -1,8 +1,8 @@ use crate::errors::DatabaseError; -use crate::optimizer::core::column_meta::ColumnMetaLoader; use crate::optimizer::core::memo::{Expression, GroupExpression}; use crate::optimizer::core::pattern::{Pattern, PatternChildrenPredicate}; use crate::optimizer::core::rule::{ImplementationRule, MatchPattern}; +use crate::optimizer::core::statistics_meta::StatisticMetaLoader; use crate::planner::operator::{Operator, PhysicalOption}; use crate::single_mapping; use crate::storage::Transaction; diff --git a/src/optimizer/rule/implementation/dql/dummy.rs b/src/optimizer/rule/implementation/dql/dummy.rs index cc211919..49e0e8d3 100644 --- a/src/optimizer/rule/implementation/dql/dummy.rs +++ b/src/optimizer/rule/implementation/dql/dummy.rs @@ -1,8 +1,8 @@ use crate::errors::DatabaseError; -use crate::optimizer::core::column_meta::ColumnMetaLoader; use crate::optimizer::core::memo::{Expression, GroupExpression}; use crate::optimizer::core::pattern::{Pattern, PatternChildrenPredicate}; use crate::optimizer::core::rule::{ImplementationRule, MatchPattern}; +use crate::optimizer::core::statistics_meta::StatisticMetaLoader; use crate::planner::operator::{Operator, PhysicalOption}; use crate::single_mapping; use crate::storage::Transaction; diff --git a/src/optimizer/rule/implementation/dql/filter.rs b/src/optimizer/rule/implementation/dql/filter.rs index 2946d4fb..cba4d391 100644 --- a/src/optimizer/rule/implementation/dql/filter.rs +++ b/src/optimizer/rule/implementation/dql/filter.rs @@ -1,8 +1,8 @@ use crate::errors::DatabaseError; -use crate::optimizer::core::column_meta::ColumnMetaLoader; use crate::optimizer::core::memo::{Expression, GroupExpression}; use crate::optimizer::core::pattern::{Pattern, PatternChildrenPredicate}; use crate::optimizer::core::rule::{ImplementationRule, MatchPattern}; +use crate::optimizer::core::statistics_meta::StatisticMetaLoader; use crate::planner::operator::{Operator, PhysicalOption}; use crate::single_mapping; use crate::storage::Transaction; diff --git a/src/optimizer/rule/implementation/dql/join.rs b/src/optimizer/rule/implementation/dql/join.rs index 59d62450..a85d92c0 100644 --- a/src/optimizer/rule/implementation/dql/join.rs +++ b/src/optimizer/rule/implementation/dql/join.rs @@ -1,8 +1,8 @@ use crate::errors::DatabaseError; -use crate::optimizer::core::column_meta::ColumnMetaLoader; use crate::optimizer::core::memo::{Expression, GroupExpression}; use crate::optimizer::core::pattern::{Pattern, PatternChildrenPredicate}; use crate::optimizer::core::rule::{ImplementationRule, MatchPattern}; +use crate::optimizer::core::statistics_meta::StatisticMetaLoader; use crate::planner::operator::{Operator, PhysicalOption}; use crate::single_mapping; use crate::storage::Transaction; diff --git a/src/optimizer/rule/implementation/dql/limit.rs b/src/optimizer/rule/implementation/dql/limit.rs index 05c785f3..586b1bb0 100644 --- a/src/optimizer/rule/implementation/dql/limit.rs +++ b/src/optimizer/rule/implementation/dql/limit.rs @@ -1,8 +1,8 @@ use crate::errors::DatabaseError; -use crate::optimizer::core::column_meta::ColumnMetaLoader; use crate::optimizer::core::memo::{Expression, GroupExpression}; use crate::optimizer::core::pattern::{Pattern, PatternChildrenPredicate}; use crate::optimizer::core::rule::{ImplementationRule, MatchPattern}; +use crate::optimizer::core::statistics_meta::StatisticMetaLoader; use crate::planner::operator::{Operator, PhysicalOption}; use crate::single_mapping; use crate::storage::Transaction; diff --git a/src/optimizer/rule/implementation/dql/projection.rs b/src/optimizer/rule/implementation/dql/projection.rs index ca4bc6a8..5c2639fe 100644 --- a/src/optimizer/rule/implementation/dql/projection.rs +++ b/src/optimizer/rule/implementation/dql/projection.rs @@ -1,8 +1,8 @@ use crate::errors::DatabaseError; -use crate::optimizer::core::column_meta::ColumnMetaLoader; use crate::optimizer::core::memo::{Expression, GroupExpression}; use crate::optimizer::core::pattern::{Pattern, PatternChildrenPredicate}; use crate::optimizer::core::rule::{ImplementationRule, MatchPattern}; +use crate::optimizer::core::statistics_meta::StatisticMetaLoader; use crate::planner::operator::{Operator, PhysicalOption}; use crate::single_mapping; use crate::storage::Transaction; diff --git a/src/optimizer/rule/implementation/dql/scan.rs b/src/optimizer/rule/implementation/dql/scan.rs index ddc034dc..d4b088d4 100644 --- a/src/optimizer/rule/implementation/dql/scan.rs +++ b/src/optimizer/rule/implementation/dql/scan.rs @@ -1,11 +1,11 @@ use crate::errors::DatabaseError; -use crate::optimizer::core::column_meta::{ColumnMeta, ColumnMetaLoader}; use crate::optimizer::core::memo::{Expression, GroupExpression}; use crate::optimizer::core::pattern::{Pattern, PatternChildrenPredicate}; use crate::optimizer::core::rule::{ImplementationRule, MatchPattern}; +use crate::optimizer::core::statistics_meta::{StatisticMetaLoader, StatisticsMeta}; use crate::planner::operator::{Operator, PhysicalOption}; use crate::storage::Transaction; -use crate::types::ColumnId; +use crate::types::index::{IndexId, IndexType}; use lazy_static::lazy_static; lazy_static! { @@ -30,15 +30,17 @@ impl ImplementationRule for SeqScanImplementation { fn to_expression( &self, op: &Operator, - loader: &ColumnMetaLoader, + loader: &StatisticMetaLoader, group_expr: &mut GroupExpression, ) -> Result<(), DatabaseError> { if let Operator::Scan(scan_op) = op { - let column_metas = loader.load(scan_op.table_name.clone())?; + let statistics_metas = loader.load(scan_op.table_name.clone())?; let mut cost = None; - if let Some(column_meta) = find_column_meta(column_metas, &scan_op.primary_key) { - cost = Some(column_meta.histogram().values_len()); + if let Some(statistics_meta) = + find_statistics_meta(statistics_metas, &scan_op.primary_key) + { + cost = Some(statistics_meta.histogram().values_len()); } group_expr.append_expr(Expression { @@ -65,11 +67,11 @@ impl ImplementationRule for IndexScanImplementation { fn to_expression( &self, op: &Operator, - loader: &ColumnMetaLoader<'_, T>, + loader: &StatisticMetaLoader<'_, T>, group_expr: &mut GroupExpression, ) -> Result<(), DatabaseError> { if let Operator::Scan(scan_op) = op { - let column_metas = loader.load(scan_op.table_name.clone())?; + let statistics_metas = loader.load(scan_op.table_name.clone())?; for index_info in scan_op.index_infos.iter() { if index_info.range.is_none() { continue; @@ -78,12 +80,12 @@ impl ImplementationRule for IndexScanImplementation { if let Some(range) = &index_info.range { // FIXME: Only UniqueIndex - if let Some(column_meta) = - find_column_meta(column_metas, &index_info.meta.column_ids[0]) + if let Some(statistics_meta) = + find_statistics_meta(statistics_metas, &index_info.meta.id) { - let mut row_count = column_meta.collect_count(range); + let mut row_count = statistics_meta.collect_count(range)?; - if !index_info.meta.is_primary { + if !matches!(index_info.meta.ty, IndexType::PrimaryKey) { // need to return table query(non-covering index) row_count *= 2; } @@ -104,13 +106,13 @@ impl ImplementationRule for IndexScanImplementation { } } -fn find_column_meta<'a>( - column_metas: &'a [ColumnMeta], - column_id: &ColumnId, -) -> Option<&'a ColumnMeta> { - assert!(column_metas.is_sorted_by_key(ColumnMeta::column_id)); - column_metas - .binary_search_by(|column_meta| column_meta.column_id().cmp(column_id)) +fn find_statistics_meta<'a>( + statistics_metas: &'a [StatisticsMeta], + index_id: &IndexId, +) -> Option<&'a StatisticsMeta> { + assert!(statistics_metas.is_sorted_by_key(StatisticsMeta::index_id)); + statistics_metas + .binary_search_by(|statistics_meta| statistics_meta.index_id().cmp(index_id)) .ok() - .map(|i| &column_metas[i]) + .map(|i| &statistics_metas[i]) } diff --git a/src/optimizer/rule/implementation/dql/sort.rs b/src/optimizer/rule/implementation/dql/sort.rs index bd0a2ae2..31cf912f 100644 --- a/src/optimizer/rule/implementation/dql/sort.rs +++ b/src/optimizer/rule/implementation/dql/sort.rs @@ -1,8 +1,8 @@ use crate::errors::DatabaseError; -use crate::optimizer::core::column_meta::ColumnMetaLoader; use crate::optimizer::core::memo::{Expression, GroupExpression}; use crate::optimizer::core::pattern::{Pattern, PatternChildrenPredicate}; use crate::optimizer::core::rule::{ImplementationRule, MatchPattern}; +use crate::optimizer::core::statistics_meta::StatisticMetaLoader; use crate::planner::operator::{Operator, PhysicalOption}; use crate::single_mapping; use crate::storage::Transaction; diff --git a/src/optimizer/rule/implementation/dql/values.rs b/src/optimizer/rule/implementation/dql/values.rs index 32b81874..770eccff 100644 --- a/src/optimizer/rule/implementation/dql/values.rs +++ b/src/optimizer/rule/implementation/dql/values.rs @@ -1,8 +1,8 @@ use crate::errors::DatabaseError; -use crate::optimizer::core::column_meta::ColumnMetaLoader; use crate::optimizer::core::memo::{Expression, GroupExpression}; use crate::optimizer::core::pattern::{Pattern, PatternChildrenPredicate}; use crate::optimizer::core::rule::{ImplementationRule, MatchPattern}; +use crate::optimizer::core::statistics_meta::StatisticMetaLoader; use crate::planner::operator::{Operator, PhysicalOption}; use crate::single_mapping; use crate::storage::Transaction; diff --git a/src/optimizer/rule/implementation/marcos.rs b/src/optimizer/rule/implementation/marcos.rs index 5e5faa4d..d2803967 100644 --- a/src/optimizer/rule/implementation/marcos.rs +++ b/src/optimizer/rule/implementation/marcos.rs @@ -11,7 +11,7 @@ macro_rules! single_mapping { fn to_expression( &self, _: &Operator, - _: &ColumnMetaLoader<'_, T>, + _: &StatisticMetaLoader<'_, T>, group_expr: &mut GroupExpression, ) -> Result<(), DatabaseError> { //TODO: CostModel diff --git a/src/optimizer/rule/implementation/mod.rs b/src/optimizer/rule/implementation/mod.rs index 9fc422e7..fd0efbf3 100644 --- a/src/optimizer/rule/implementation/mod.rs +++ b/src/optimizer/rule/implementation/mod.rs @@ -4,10 +4,10 @@ pub(crate) mod dql; pub(crate) mod marcos; use crate::errors::DatabaseError; -use crate::optimizer::core::column_meta::ColumnMetaLoader; use crate::optimizer::core::memo::GroupExpression; use crate::optimizer::core::pattern::Pattern; use crate::optimizer::core::rule::{ImplementationRule, MatchPattern}; +use crate::optimizer::core::statistics_meta::StatisticMetaLoader; use crate::optimizer::rule::implementation::ddl::add_column::AddColumnImplementation; use crate::optimizer::rule::implementation::ddl::create_table::CreateTableImplementation; use crate::optimizer::rule::implementation::ddl::drop_column::DropColumnImplementation; @@ -97,7 +97,7 @@ impl ImplementationRule for ImplementationRuleImpl { fn to_expression( &self, operator: &Operator, - loader: &ColumnMetaLoader<'_, T>, + loader: &StatisticMetaLoader<'_, T>, group_expr: &mut GroupExpression, ) -> Result<(), DatabaseError> { match self { diff --git a/src/optimizer/rule/normalization/column_pruning.rs b/src/optimizer/rule/normalization/column_pruning.rs index d3503775..f6dcc040 100644 --- a/src/optimizer/rule/normalization/column_pruning.rs +++ b/src/optimizer/rule/normalization/column_pruning.rs @@ -144,6 +144,7 @@ impl ColumnPruning { } // DDL Single Plan Operator::CreateTable(_) + | Operator::CreateIndex(_) | Operator::DropTable(_) | Operator::Truncate(_) | Operator::Show diff --git a/src/optimizer/rule/normalization/expression_remapper.rs b/src/optimizer/rule/normalization/expression_remapper.rs index a9f759fa..7817030b 100644 --- a/src/optimizer/rule/normalization/expression_remapper.rs +++ b/src/optimizer/rule/normalization/expression_remapper.rs @@ -89,6 +89,7 @@ impl ExpressionRemapper { | Operator::AddColumn(_) | Operator::DropColumn(_) | Operator::CreateTable(_) + | Operator::CreateIndex(_) | Operator::DropTable(_) | Operator::Truncate(_) | Operator::CopyFromFile(_) diff --git a/src/optimizer/rule/normalization/pushdown_predicates.rs b/src/optimizer/rule/normalization/pushdown_predicates.rs index 0e680c6c..dce4b24f 100644 --- a/src/optimizer/rule/normalization/pushdown_predicates.rs +++ b/src/optimizer/rule/normalization/pushdown_predicates.rs @@ -9,7 +9,7 @@ use crate::optimizer::heuristic::graph::{HepGraph, HepNodeId}; use crate::planner::operator::filter::FilterOperator; use crate::planner::operator::join::JoinType; use crate::planner::operator::Operator; -use crate::types::index::IndexInfo; +use crate::types::index::{IndexInfo, IndexType}; use crate::types::LogicalType; use itertools::Itertools; use lazy_static::lazy_static; @@ -220,8 +220,39 @@ impl NormalizationRule for PushPredicateIntoScan { if let Operator::Scan(child_op) = graph.operator_mut(child_id) { //FIXME: now only support `unique` and `primary key` for IndexInfo { meta, range } in &mut child_op.index_infos { - *range = RangeDetacher::new(meta.table_name.as_str(), &meta.column_ids[0]) - .detach(&op.predicate)?; + if range.is_some() { + continue; + } + *range = match meta.ty { + IndexType::PrimaryKey | IndexType::Unique | IndexType::Normal => { + RangeDetacher::new(meta.table_name.as_str(), &meta.column_ids[0]) + .detach(&op.predicate)? + } + IndexType::Composite => { + let mut res = None; + let mut eq_ranges = Vec::with_capacity(meta.column_ids.len()); + + for column_id in meta.column_ids.iter() { + if let Some(range) = + RangeDetacher::new(meta.table_name.as_str(), column_id) + .detach(&op.predicate)? + { + if range.only_eq() { + eq_ranges.push(range); + continue; + } + res = range.combining_eqs(&eq_ranges); + } + break; + } + if res.is_none() { + if let Some(range) = eq_ranges.pop() { + res = range.combining_eqs(&eq_ranges); + } + } + res + } + } } } } diff --git a/src/planner/mod.rs b/src/planner/mod.rs index 6499a5a3..d1287596 100644 --- a/src/planner/mod.rs +++ b/src/planner/mod.rs @@ -94,7 +94,7 @@ impl LogicalPlan { Operator::Dummy => Arc::new(vec![]), Operator::Show => Arc::new(vec![ Arc::new(ColumnCatalog::new_dummy("TABLE".to_string())), - Arc::new(ColumnCatalog::new_dummy("COLUMN_METAS_LEN".to_string())), + Arc::new(ColumnCatalog::new_dummy("STATISTICS_METAS_LEN".to_string())), ]), Operator::Explain => { Arc::new(vec![Arc::new(ColumnCatalog::new_dummy("PLAN".to_string()))]) @@ -116,7 +116,7 @@ impl LogicalPlan { "DELETED".to_string(), ))]), Operator::Analyze(_) => Arc::new(vec![Arc::new(ColumnCatalog::new_dummy( - "COLUMN_META_PATH".to_string(), + "STATISTICS_META_PATH".to_string(), ))]), Operator::AddColumn(_) => Arc::new(vec![Arc::new(ColumnCatalog::new_dummy( "ADD COLUMN SUCCESS".to_string(), @@ -127,6 +127,9 @@ impl LogicalPlan { Operator::CreateTable(_) => Arc::new(vec![Arc::new(ColumnCatalog::new_dummy( "CREATE TABLE SUCCESS".to_string(), ))]), + Operator::CreateIndex(_) => Arc::new(vec![Arc::new(ColumnCatalog::new_dummy( + "CREATE INDEX SUCCESS".to_string(), + ))]), Operator::DropTable(_) => Arc::new(vec![Arc::new(ColumnCatalog::new_dummy( "DROP TABLE SUCCESS".to_string(), ))]), diff --git a/src/planner/operator/analyze.rs b/src/planner/operator/analyze.rs index d6ee5c31..1796d9c7 100644 --- a/src/planner/operator/analyze.rs +++ b/src/planner/operator/analyze.rs @@ -1,7 +1,8 @@ -use crate::catalog::{ColumnRef, TableName}; +use crate::catalog::TableName; +use crate::types::index::IndexMetaRef; #[derive(Debug, PartialEq, Eq, Clone, Hash)] pub struct AnalyzeOperator { pub table_name: TableName, - pub columns: Vec, + pub index_metas: Vec, } diff --git a/src/planner/operator/create_index.rs b/src/planner/operator/create_index.rs new file mode 100644 index 00000000..8aa8bdc5 --- /dev/null +++ b/src/planner/operator/create_index.rs @@ -0,0 +1,32 @@ +use crate::catalog::{ColumnRef, TableName}; +use crate::types::index::IndexType; +use itertools::Itertools; +use std::fmt; +use std::fmt::Formatter; + +#[derive(Debug, PartialEq, Eq, Clone, Hash)] +pub struct CreateIndexOperator { + pub table_name: TableName, + /// List of columns of the index + pub columns: Vec, + pub index_name: String, + pub if_not_exists: bool, + pub ty: IndexType, +} + +impl fmt::Display for CreateIndexOperator { + fn fmt(&self, f: &mut Formatter) -> fmt::Result { + let columns = self + .columns + .iter() + .map(|column| column.name().to_string()) + .join(", "); + write!( + f, + "Create Index On {} -> [{}], If Not Exists: {}", + self.table_name, columns, self.if_not_exists + )?; + + Ok(()) + } +} diff --git a/src/planner/operator/mod.rs b/src/planner/operator/mod.rs index f58fd0df..5119560c 100644 --- a/src/planner/operator/mod.rs +++ b/src/planner/operator/mod.rs @@ -3,6 +3,7 @@ pub mod alter_table; pub mod analyze; pub mod copy_from_file; pub mod copy_to_file; +pub mod create_index; pub mod create_table; pub mod delete; pub mod describe; @@ -25,6 +26,7 @@ use crate::planner::operator::alter_table::drop_column::DropColumnOperator; use crate::planner::operator::analyze::AnalyzeOperator; use crate::planner::operator::copy_from_file::CopyFromFileOperator; use crate::planner::operator::copy_to_file::CopyToFileOperator; +use crate::planner::operator::create_index::CreateIndexOperator; use crate::planner::operator::create_table::CreateTableOperator; use crate::planner::operator::delete::DeleteOperator; use crate::planner::operator::describe::DescribeOperator; @@ -71,6 +73,7 @@ pub enum Operator { AddColumn(AddColumnOperator), DropColumn(DropColumnOperator), CreateTable(CreateTableOperator), + CreateIndex(CreateIndexOperator), DropTable(DropTableOperator), Truncate(TruncateOperator), // Copy @@ -145,6 +148,7 @@ impl Operator { | Operator::AddColumn(_) | Operator::DropColumn(_) | Operator::CreateTable(_) + | Operator::CreateIndex(_) | Operator::DropTable(_) | Operator::Truncate(_) | Operator::CopyFromFile(_) @@ -195,7 +199,7 @@ impl Operator { .collect_vec(), Operator::Values(ValuesOperator { schema_ref, .. }) | Operator::Union(UnionOperator { schema_ref }) => Vec::clone(schema_ref), - Operator::Analyze(op) => op.columns.clone(), + Operator::Analyze(_) => vec![], Operator::Delete(op) => vec![op.primary_key_column.clone()], Operator::Dummy | Operator::Limit(_) @@ -207,6 +211,7 @@ impl Operator { | Operator::AddColumn(_) | Operator::DropColumn(_) | Operator::CreateTable(_) + | Operator::CreateIndex(_) | Operator::DropTable(_) | Operator::Truncate(_) | Operator::CopyFromFile(_) @@ -237,6 +242,7 @@ impl fmt::Display for Operator { Operator::AddColumn(op) => write!(f, "{}", op), Operator::DropColumn(op) => write!(f, "{}", op), Operator::CreateTable(op) => write!(f, "{}", op), + Operator::CreateIndex(op) => write!(f, "{}", op), Operator::DropTable(op) => write!(f, "{}", op), Operator::Truncate(op) => write!(f, "{}", op), Operator::CopyFromFile(op) => write!(f, "{}", op), diff --git a/src/storage/kip.rs b/src/storage/kip.rs index 88060874..24a9065f 100644 --- a/src/storage/kip.rs +++ b/src/storage/kip.rs @@ -1,10 +1,10 @@ use crate::catalog::{ColumnCatalog, ColumnRef, TableCatalog, TableMeta, TableName}; use crate::errors::DatabaseError; use crate::expression::range_detacher::Range; -use crate::optimizer::core::column_meta::{ColumnMeta, ColumnMetaLoader}; +use crate::optimizer::core::statistics_meta::{StatisticMetaLoader, StatisticsMeta}; use crate::storage::table_codec::TableCodec; use crate::storage::{Bounds, IndexIter, Iter, Storage, Transaction}; -use crate::types::index::{Index, IndexMetaRef}; +use crate::types::index::{Index, IndexId, IndexMetaRef, IndexType}; use crate::types::tuple::{Tuple, TupleId}; use crate::types::{ColumnId, LogicalType}; use itertools::Itertools; @@ -22,7 +22,7 @@ use std::sync::Arc; #[derive(Clone)] pub struct KipStorage { pub inner: Arc, - pub(crate) meta_cache: Arc>>, + pub(crate) meta_cache: Arc>>, } impl KipStorage { @@ -56,7 +56,7 @@ impl Storage for KipStorage { pub struct KipTransaction { tx: mvcc::Transaction, table_cache: ShardingLruCache, - meta_cache: Arc>>, + meta_cache: Arc>>, } impl Transaction for KipTransaction { @@ -136,24 +136,43 @@ impl Transaction for KipTransaction { }) } + fn add_index_meta( + &mut self, + table_name: &TableName, + index_name: String, + column_ids: Vec, + ty: IndexType, + ) -> Result { + if let Some(mut table) = self.table(table_name.clone()).cloned() { + let index_meta = table.add_index_meta(index_name, column_ids, ty)?; + let (key, value) = TableCodec::encode_index_meta(table_name, index_meta)?; + self.tx.set(key, value); + self.table_cache.remove(table_name); + + Ok(index_meta.id) + } else { + Err(DatabaseError::TableNotFound) + } + } + fn add_index( &mut self, table_name: &str, index: Index, tuple_id: &TupleId, - is_unique: bool, ) -> Result<(), DatabaseError> { + if matches!(index.ty, IndexType::PrimaryKey) { + return Ok(()); + } let (key, value) = TableCodec::encode_index(table_name, &index, tuple_id)?; - if let Some(bytes) = self.tx.get(&key)? { - if is_unique { + if matches!(index.ty, IndexType::Unique) { + if let Some(bytes) = self.tx.get(&key)? { return if bytes != value { Err(DatabaseError::DuplicateUniqueValue) } else { Ok(()) }; - } else { - todo!("联合索引") } } @@ -162,10 +181,17 @@ impl Transaction for KipTransaction { Ok(()) } - fn del_index(&mut self, table_name: &str, index: &Index) -> Result<(), DatabaseError> { - let key = TableCodec::encode_index_key(table_name, index)?; - - self.tx.remove(&key)?; + fn del_index( + &mut self, + table_name: &str, + index: &Index, + tuple_id: Option<&TupleId>, + ) -> Result<(), DatabaseError> { + if matches!(index.ty, IndexType::PrimaryKey) { + return Ok(()); + } + self.tx + .remove(&TableCodec::encode_index_key(table_name, index, tuple_id)?)?; Ok(()) } @@ -219,8 +245,7 @@ impl Transaction for KipTransaction { let meta_ref = table.add_index_meta( format!("uk_{}", column.name()), vec![col_id], - true, - false, + IndexType::Unique, )?; let (key, value) = TableCodec::encode_index_meta(table_name, meta_ref)?; self.tx.set(key, value); @@ -242,20 +267,22 @@ impl Transaction for KipTransaction { table_name: &TableName, column_name: &str, ) -> Result<(), DatabaseError> { - if let Some(catalog) = self.table(table_name.clone()).cloned() { - let column = catalog.get_column_by_name(column_name).unwrap(); + if let Some(table_catalog) = self.table(table_name.clone()).cloned() { + let column = table_catalog.get_column_by_name(column_name).unwrap(); let (key, _) = TableCodec::encode_column(table_name, column)?; self.tx.remove(&key)?; - if let Some(index_meta) = catalog.get_unique_index(&column.id().unwrap()) { + for index_meta in table_catalog.indexes.iter() { + if !index_meta.column_ids.contains(&column.id().unwrap()) { + continue; + } let (index_meta_key, _) = TableCodec::encode_index_meta(table_name, index_meta)?; self.tx.remove(&index_meta_key)?; let (index_min, index_max) = TableCodec::index_bound(table_name, &index_meta.id); Self::_drop_data(&mut self.tx, &index_min, &index_max)?; } - self.table_cache.remove(table_name); Ok(()) @@ -362,6 +389,7 @@ impl Transaction for KipTransaction { } fn save_table_meta(&mut self, table_meta: &TableMeta) -> Result<(), DatabaseError> { + // TODO: clean old meta file let _ = self.meta_cache.remove(&table_meta.table_name); let (key, value) = TableCodec::encode_root_table(table_meta)?; self.tx.set(key, value); @@ -369,7 +397,7 @@ impl Transaction for KipTransaction { Ok(()) } - fn column_meta_paths(&self, table_name: &str) -> Result, DatabaseError> { + fn statistics_meta_paths(&self, table_name: &str) -> Result, DatabaseError> { if let Some(bytes) = self .tx .get(&TableCodec::encode_root_table_key(table_name))? @@ -382,11 +410,11 @@ impl Transaction for KipTransaction { Ok(vec![]) } - fn meta_loader(&self) -> ColumnMetaLoader + fn meta_loader(&self) -> StatisticMetaLoader where Self: Sized, { - ColumnMetaLoader::new(self, &self.meta_cache) + StatisticMetaLoader::new(self, &self.meta_cache) } async fn commit(self) -> Result<(), DatabaseError> { @@ -446,20 +474,26 @@ impl KipTransaction { let table_name = table.name.clone(); let index_column = table .columns() - .filter(|column| column.desc.is_index()) + .filter(|column| column.desc.is_primary || column.desc.is_unique) .map(|column| (column.id().unwrap(), column.clone())) .collect_vec(); for (col_id, col) in index_column { let is_primary = col.desc.is_primary; + let index_ty = if is_primary { + IndexType::PrimaryKey + } else if col.desc.is_unique { + IndexType::Unique + } else { + continue; + }; // FIXME: composite indexes may exist on future let prefix = if is_primary { "pk" } else { "uk" }; let meta_ref = table.add_index_meta( format!("{}_{}", prefix, col.name()), vec![col_id], - col.desc.is_unique, - is_primary, + index_ty, )?; let (key, value) = TableCodec::encode_index_meta(&table_name, meta_ref)?; tx.set(key, value); @@ -519,7 +553,7 @@ mod test { use crate::expression::range_detacher::Range; use crate::storage::kip::KipStorage; use crate::storage::{IndexIter, Iter, Storage, Transaction}; - use crate::types::index::IndexMeta; + use crate::types::index::{IndexMeta, IndexType}; use crate::types::tuple::Tuple; use crate::types::value::DataValue; use crate::types::LogicalType; @@ -629,8 +663,7 @@ mod test { table_name, pk_ty: LogicalType::Integer, name: "pk_a".to_string(), - is_unique: false, - is_primary: true, + ty: IndexType::PrimaryKey, }), table: &table, ranges: VecDeque::from(vec![ diff --git a/src/storage/mod.rs b/src/storage/mod.rs index bd34bf60..bbe01ade 100644 --- a/src/storage/mod.rs +++ b/src/storage/mod.rs @@ -4,18 +4,18 @@ mod table_codec; use crate::catalog::{ColumnCatalog, ColumnRef, TableCatalog, TableMeta, TableName}; use crate::errors::DatabaseError; use crate::expression::range_detacher::Range; -use crate::optimizer::core::column_meta::ColumnMetaLoader; +use crate::optimizer::core::statistics_meta::StatisticMetaLoader; use crate::storage::table_codec::TableCodec; -use crate::types::index::{Index, IndexMetaRef}; +use crate::types::index::{Index, IndexId, IndexMetaRef, IndexType}; use crate::types::tuple::{Tuple, TupleId}; -use crate::types::value::ValueRef; +use crate::types::value::{DataValue, ValueRef}; use crate::types::ColumnId; use kip_db::kernel::lsm::iterator::Iter as DBIter; use kip_db::kernel::lsm::mvcc; use std::collections::{Bound, VecDeque}; -use std::mem; use std::ops::SubAssign; use std::sync::Arc; +use std::{mem, slice}; pub trait Storage: Sync + Send + Clone + 'static { type TransactionType: Transaction; @@ -49,15 +49,27 @@ pub trait Transaction: Sync + Send + 'static { ranges: Vec, ) -> Result, DatabaseError>; + fn add_index_meta( + &mut self, + table_name: &TableName, + index_name: String, + column_ids: Vec, + ty: IndexType, + ) -> Result; + fn add_index( &mut self, table_name: &str, index: Index, tuple_id: &TupleId, - is_unique: bool, ) -> Result<(), DatabaseError>; - fn del_index(&mut self, table_name: &str, index: &Index) -> Result<(), DatabaseError>; + fn del_index( + &mut self, + table_name: &str, + index: &Index, + tuple_id: Option<&TupleId>, + ) -> Result<(), DatabaseError>; fn append( &mut self, @@ -89,8 +101,8 @@ pub trait Transaction: Sync + Send + 'static { fn table(&self, table_name: TableName) -> Option<&TableCatalog>; fn table_metas(&self) -> Result, DatabaseError>; fn save_table_meta(&mut self, table_meta: &TableMeta) -> Result<(), DatabaseError>; - fn column_meta_paths(&self, table_name: &str) -> Result, DatabaseError>; - fn meta_loader(&self) -> ColumnMetaLoader + fn statistics_meta_paths(&self, table_name: &str) -> Result, DatabaseError>; + fn meta_loader(&self) -> StatisticMetaLoader where Self: Sized; @@ -132,13 +144,31 @@ impl IndexIter<'_> { } } - fn val_to_key(&self, val: ValueRef) -> Result, DatabaseError> { - if self.index_meta.is_unique { - let index = Index::new(self.index_meta.id, vec![val]); + fn bound_key(&self, val: &ValueRef, is_upper: bool) -> Result, DatabaseError> { + match self.index_meta.ty { + IndexType::PrimaryKey => TableCodec::encode_tuple_key(&self.table.name, val), + IndexType::Unique => { + let index = + Index::new(self.index_meta.id, slice::from_ref(val), self.index_meta.ty); - TableCodec::encode_index_key(&self.table.name, &index) - } else { - TableCodec::encode_tuple_key(&self.table.name, &val) + TableCodec::encode_index_key(&self.table.name, &index, None) + } + IndexType::Normal => { + let index = + Index::new(self.index_meta.id, slice::from_ref(val), self.index_meta.ty); + + TableCodec::encode_index_bound_key(&self.table.name, &index, is_upper) + } + IndexType::Composite => { + let values = if let DataValue::Tuple(Some(values)) = val.as_ref() { + values.as_slice() + } else { + slice::from_ref(val) + }; + let index = Index::new(self.index_meta.id, values, self.index_meta.ty); + + TableCodec::encode_index_bound_key(&self.table.name, &index, is_upper) + } } } @@ -160,6 +190,7 @@ impl IndexIter<'_> { } } +/// expression -> index value -> tuple impl Iter for IndexIter<'_> { fn next_tuple(&mut self) -> Result, DatabaseError> { // 1. check limit @@ -198,7 +229,7 @@ impl Iter for IndexIter<'_> { let mut has_next = false; while let Some((_, value_option)) = iter.try_next()? { if let Some(value) = value_option { - let index = if self.index_meta.is_primary { + let index = if matches!(self.index_meta.ty, IndexType::PrimaryKey) { let tuple = TableCodec::decode_tuple( &self.table.types(), &self.projections, @@ -228,28 +259,33 @@ impl Iter for IndexIter<'_> { let table_name = &self.table.name; let index_meta = &self.index_meta; - let bound_encode = |bound: Bound| -> Result<_, DatabaseError> { - match bound { - Bound::Included(val) => Ok(Bound::Included(self.val_to_key(val)?)), - Bound::Excluded(val) => Ok(Bound::Excluded(self.val_to_key(val)?)), - Bound::Unbounded => Ok(Bound::Unbounded), - } - }; + let bound_encode = + |bound: Bound, is_upper: bool| -> Result<_, DatabaseError> { + match bound { + Bound::Included(val) => { + Ok(Bound::Included(self.bound_key(&val, is_upper)?)) + } + Bound::Excluded(val) => { + Ok(Bound::Excluded(self.bound_key(&val, is_upper)?)) + } + Bound::Unbounded => Ok(Bound::Unbounded), + } + }; let check_bound = |value: &mut Bound>, bound: Vec| { if matches!(value, Bound::Unbounded) { let _ = mem::replace(value, Bound::Included(bound)); } }; - let (bound_min, bound_max) = if index_meta.is_unique { - TableCodec::index_bound(table_name, &index_meta.id) - } else { + let (bound_min, bound_max) = if matches!(index_meta.ty, IndexType::PrimaryKey) { TableCodec::tuple_bound(table_name) + } else { + TableCodec::index_bound(table_name, &index_meta.id) }; - let mut encode_min = bound_encode(min)?; + let mut encode_min = bound_encode(min, false)?; check_bound(&mut encode_min, bound_min); - let mut encode_max = bound_encode(max)?; + let mut encode_max = bound_encode(max, true)?; check_bound(&mut encode_max, bound_max); let iter = self.tx.iter( @@ -259,28 +295,50 @@ impl Iter for IndexIter<'_> { self.scope_iter = Some(iter); } Range::Eq(val) => { - let key = self.val_to_key(val)?; - if let Some(bytes) = self.tx.get(&key)? { - let index = if self.index_meta.is_unique { - IndexValue::Normal(TableCodec::decode_index( - &bytes, - &self.index_meta.pk_ty, - )) - } else if self.index_meta.is_primary { + match self.index_meta.ty { + IndexType::PrimaryKey => { + let bytes = + self.tx.get(&self.bound_key(&val, false)?)?.ok_or_else(|| { + DatabaseError::NotFound( + "secondary index", + format!("value -> {}", val), + ) + })?; + let tuple = TableCodec::decode_tuple( &self.table.types(), &self.projections, &self.tuple_schema_ref, &bytes, ); - - IndexValue::PrimaryKey(tuple) - } else { - todo!() - }; - self.index_values.push_back(index); + self.index_values.push_back(IndexValue::PrimaryKey(tuple)); + self.scope_iter = None; + } + IndexType::Unique => { + let bytes = + self.tx.get(&self.bound_key(&val, false)?)?.ok_or_else(|| { + DatabaseError::NotFound( + "secondary index", + format!("value -> {}", val), + ) + })?; + + self.index_values.push_back(IndexValue::Normal( + TableCodec::decode_index(&bytes, &self.index_meta.pk_ty), + )); + self.scope_iter = None; + } + IndexType::Normal | IndexType::Composite => { + let min = self.bound_key(&val, false)?; + let max = self.bound_key(&val, true)?; + + let iter = self.tx.iter( + Bound::Included(min.as_slice()), + Bound::Included(max.as_slice()), + )?; + self.scope_iter = Some(iter); + } } - self.scope_iter = None; } _ => (), } diff --git a/src/storage/table_codec.rs b/src/storage/table_codec.rs index e9f01959..05d90612 100644 --- a/src/storage/table_codec.rs +++ b/src/storage/table_codec.rs @@ -1,6 +1,6 @@ use crate::catalog::{ColumnCatalog, TableMeta}; use crate::errors::DatabaseError; -use crate::types::index::{Index, IndexId, IndexMeta}; +use crate::types::index::{Index, IndexId, IndexMeta, IndexType}; use crate::types::tuple::{Schema, Tuple, TupleId}; use crate::types::value::DataValue; use crate::types::LogicalType; @@ -202,12 +202,12 @@ impl TableCodec { } /// NonUnique Index: - /// Key: {TableName}{INDEX_TAG}{BOUND_MIN_TAG}{IndexID}{BOUND_MIN_TAG}{DataValue1}{DataValue2} .. - /// Value: TupleIDs + /// Key: {TableName}{INDEX_TAG}{BOUND_MIN_TAG}{IndexID}{BOUND_MIN_TAG}{DataValue1}{DataValue2} .. {TupleId} + /// Value: TupleID /// /// Unique Index: /// Key: {TableName}{INDEX_TAG}{BOUND_MIN_TAG}{IndexID}{BOUND_MIN_TAG}{DataValue} - /// Value: TupleIDs + /// Value: TupleID /// /// Tips: The unique index has only one ColumnID and one corresponding DataValue, /// so it can be positioned directly. @@ -216,22 +216,51 @@ impl TableCodec { index: &Index, tuple_id: &TupleId, ) -> Result<(Bytes, Bytes), DatabaseError> { - let key = TableCodec::encode_index_key(name, index)?; + let key = TableCodec::encode_index_key(name, index, Some(tuple_id))?; Ok((Bytes::from(key), Bytes::from(tuple_id.to_raw()))) } - pub fn encode_index_key(name: &str, index: &Index) -> Result, DatabaseError> { + fn _encode_index_key(name: &str, index: &Index) -> Result, DatabaseError> { let mut key_prefix = Self::key_prefix(CodecType::Index, name); key_prefix.push(BOUND_MIN_TAG); - key_prefix.append(&mut index.id.to_be_bytes().to_vec()); + key_prefix.extend_from_slice(&index.id.to_be_bytes()); key_prefix.push(BOUND_MIN_TAG); - for col_v in &index.column_values { + for col_v in index.column_values { col_v.memcomparable_encode(&mut key_prefix)?; key_prefix.push(BOUND_MIN_TAG); } + Ok(key_prefix) + } + + pub fn encode_index_bound_key( + name: &str, + index: &Index, + is_upper: bool, + ) -> Result, DatabaseError> { + let mut key_prefix = Self::_encode_index_key(name, index)?; + + if is_upper { + if let Some(last) = key_prefix.last_mut() { + *last = BOUND_MAX_TAG + } + } + Ok(key_prefix) + } + + pub fn encode_index_key( + name: &str, + index: &Index, + tuple_id: Option<&TupleId>, + ) -> Result, DatabaseError> { + let mut key_prefix = Self::_encode_index_key(name, index)?; + if let Some(tuple_id) = tuple_id { + if matches!(index.ty, IndexType::Normal | IndexType::Composite) { + key_prefix.append(&mut tuple_id.to_raw()); + } + } Ok(key_prefix) } @@ -282,7 +311,7 @@ mod tests { use crate::catalog::{ColumnCatalog, ColumnDesc, TableCatalog, TableMeta}; use crate::errors::DatabaseError; use crate::storage::table_codec::TableCodec; - use crate::types::index::{Index, IndexMeta}; + use crate::types::index::{Index, IndexMeta, IndexType}; use crate::types::tuple::Tuple; use crate::types::value::DataValue; use crate::types::LogicalType; @@ -291,6 +320,7 @@ mod tests { use rust_decimal::Decimal; use std::collections::BTreeSet; use std::ops::Bound; + use std::slice; use std::sync::Arc; fn build_table_codec() -> TableCatalog { @@ -354,8 +384,7 @@ mod tests { table_name: Arc::new("T1".to_string()), pk_ty: LogicalType::Integer, name: "index_1".to_string(), - is_unique: false, - is_primary: false, + ty: IndexType::PrimaryKey, }; let (_, bytes) = TableCodec::encode_index_meta(&"T1".to_string(), &index_meta)?; @@ -367,11 +396,8 @@ mod tests { #[test] fn test_table_codec_index() -> Result<(), DatabaseError> { let table_catalog = build_table_codec(); - - let index = Index { - id: 0, - column_values: vec![Arc::new(DataValue::Int32(Some(0)))], - }; + let value = Arc::new(DataValue::Int32(Some(0))); + let index = Index::new(0, slice::from_ref(&value), IndexType::PrimaryKey); let tuple_id = Arc::new(DataValue::Int32(Some(0))); let (_, bytes) = TableCodec::encode_index(&table_catalog.name, &index, &tuple_id)?; @@ -453,8 +479,7 @@ mod tests { table_name: Arc::new(table_name.to_string()), pk_ty: LogicalType::Integer, name: "".to_string(), - is_unique: false, - is_primary: false, + ty: IndexType::PrimaryKey, }; let (key, _) = @@ -501,12 +526,14 @@ mod tests { let table_catalog = TableCatalog::new(Arc::new("T0".to_string()), vec![column]).unwrap(); let op = |value: DataValue, index_id: usize, table_name: &String| { - let index = Index { - id: index_id as u32, - column_values: vec![Arc::new(value)], - }; + let value = Arc::new(value); + let index = Index::new( + index_id as u32, + slice::from_ref(&value), + IndexType::PrimaryKey, + ); - TableCodec::encode_index_key(table_name, &index).unwrap() + TableCodec::encode_index_key(table_name, &index, None).unwrap() }; set.insert(op(DataValue::Int32(Some(0)), 0, &table_catalog.name)); @@ -555,12 +582,14 @@ mod tests { fn test_table_codec_index_all_bound() { let mut set = BTreeSet::new(); let op = |value: DataValue, index_id: usize, table_name: &str| { - let index = Index { - id: index_id as u32, - column_values: vec![Arc::new(value)], - }; + let value = Arc::new(value); + let index = Index::new( + index_id as u32, + slice::from_ref(&value), + IndexType::PrimaryKey, + ); - TableCodec::encode_index_key(&table_name.to_string(), &index).unwrap() + TableCodec::encode_index_key(&table_name.to_string(), &index, None).unwrap() }; set.insert(op(DataValue::Int32(Some(0)), 0, "T0")); diff --git a/src/types/index.rs b/src/types/index.rs index 5adc85ee..72e44bb7 100644 --- a/src/types/index.rs +++ b/src/types/index.rs @@ -1,5 +1,7 @@ -use crate::catalog::TableName; +use crate::catalog::{TableCatalog, TableName}; +use crate::errors::DatabaseError; use crate::expression::range_detacher::Range; +use crate::expression::ScalarExpression; use crate::types::value::ValueRef; use crate::types::{ColumnId, LogicalType}; use serde::{Deserialize, Serialize}; @@ -10,6 +12,14 @@ use std::sync::Arc; pub type IndexId = u32; pub type IndexMetaRef = Arc; +#[derive(Debug, Clone, Copy, Serialize, Deserialize, Eq, PartialEq, Hash)] +pub enum IndexType { + PrimaryKey, + Unique, + Normal, + Composite, +} + #[derive(Debug, Clone, Eq, PartialEq, Hash)] pub struct IndexInfo { pub(crate) meta: IndexMetaRef, @@ -23,18 +33,44 @@ pub struct IndexMeta { pub table_name: TableName, pub pk_ty: LogicalType, pub name: String, - pub is_unique: bool, - pub is_primary: bool, + pub ty: IndexType, +} + +impl IndexMeta { + pub(crate) fn column_exprs( + &self, + table: &TableCatalog, + ) -> Result, DatabaseError> { + let mut exprs = Vec::with_capacity(self.column_ids.len()); + + for column_id in self.column_ids.iter() { + if let Some(column) = table.get_column_by_id(column_id) { + exprs.push(ScalarExpression::ColumnRef(column.clone())); + } else { + return Err(DatabaseError::NotFound( + "Column by id", + column_id.to_string(), + )); + } + } + Ok(exprs) + } } -pub struct Index { +#[derive(Debug)] +pub struct Index<'a> { pub id: IndexId, - pub column_values: Vec, + pub column_values: &'a [ValueRef], + pub ty: IndexType, } -impl Index { - pub fn new(id: IndexId, column_values: Vec) -> Self { - Index { id, column_values } +impl<'a> Index<'a> { + pub fn new(id: IndexId, column_values: &'a [ValueRef], ty: IndexType) -> Self { + Index { + id, + column_values, + ty, + } } } diff --git a/src/types/value.rs b/src/types/value.rs index 8c654c19..fb09aea5 100644 --- a/src/types/value.rs +++ b/src/types/value.rs @@ -571,6 +571,11 @@ impl DataValue { } DataValue::Null => (), DataValue::Decimal(Some(_v)) => todo!(), + DataValue::Tuple(Some(values)) => { + for v in values.iter() { + v.memcomparable_encode(b)?; + } + } value => { if !value.is_null() { return Err(DatabaseError::InvalidType); diff --git a/tests/slt/create_index.slt b/tests/slt/create_index.slt new file mode 100644 index 00000000..546fe9a2 --- /dev/null +++ b/tests/slt/create_index.slt @@ -0,0 +1,28 @@ +statement ok +create table t(id int primary key, v1 int, v2 int, v3 int); + +statement ok +create index index_1 on t (v1); + +statement error +create index index_1 on t (v1); + +statement ok +create index index_2 on t (v1, v2); + +statement ok +create unique index index_3 on t (v2, v3); + +statement ok +insert into t values (0, 0, 0, 0); + +statement error +insert into t values (1, 1, 0, 0); + +query IIII +select * from t; +---- +0 0 0 0 + +statement ok +drop table t \ No newline at end of file diff --git a/tests/slt/dummy.slt b/tests/slt/dummy.slt index 1ff5e0c0..febf308d 100644 --- a/tests/slt/dummy.slt +++ b/tests/slt/dummy.slt @@ -16,6 +16,26 @@ SELECT 1.01=1.01 ---- true +query B +SELECT (0, 1) = (0, 1) +---- +true + +query B +SELECT (0, 1) != (0, 1) +---- +false + +query B +SELECT (0, 1, 2) = (0, 1) +---- +false + +query B +SELECT (0, 1, 2) >= (0, 1) +---- +true + query B SELECT NULL=NULL ---- diff --git a/tests/slt/where_by_index.slt b/tests/slt/where_by_index.slt index d144532f..7f05ad48 100644 --- a/tests/slt/where_by_index.slt +++ b/tests/slt/where_by_index.slt @@ -4,6 +4,18 @@ create table t1(id int primary key, c1 int, c2 int); statement ok copy t1 from 'tests/data/row_20000.csv' ( DELIMITER '|' ); +statement ok +insert into t1 values(100000000, null, null); + +statement ok +create unique index u_c1_index on t1 (c1); + +statement ok +create index c2_index on t1 (c2); + +statement ok +create index p_index on t1 (c1, c2); + statement ok analyze table t1; @@ -128,5 +140,58 @@ select * from t1 where id = 5 or (id > 5 and (id > 6 or id < 8) and id < 12); 6 7 8 9 10 11 +query IIT +select * from t1 where c1 = 7 and c2 = 8; +---- +6 7 8 + +query IIT +select * from t1 where c1 = 7 and c2 < 9; +---- +6 7 8 + +query IIT +select * from t1 where (c1 = 7 or c1 = 10) and c2 < 9; +---- +6 7 8 + +query IIT +select * from t1 where c1 is null and c2 is null; +---- +100000000 null null + +query IIT +select * from t1 where c1 > 0 and c1 < 8; +---- +0 1 2 +3 4 5 +6 7 8 + +query IIT +select * from t1 where c2 > 0 and c2 < 9; +---- +0 1 2 +3 4 5 +6 7 8 + +statement ok +update t1 set c2 = 9 where c1 = 1 + +query IIT rowsort +select * from t1 where c2 > 0 and c2 < 10; +---- +0 1 9 +3 4 5 +6 7 8 + +statement ok +delete from t1 where c1 = 4 + +query IIT rowsort +select * from t1 where c2 > 0 and c2 < 10; +---- +0 1 9 +6 7 8 + statement ok drop table t1; \ No newline at end of file