From 81268ec219b0b48e58801d5944bc30c077a44b29 Mon Sep 17 00:00:00 2001 From: Kould Date: Wed, 7 Aug 2024 20:00:10 +0800 Subject: [PATCH 1/4] refactor: remove async & kipdb --- Cargo.toml | 78 ++-- README.md | 32 +- examples/hello_world.rs | 17 +- examples/transaction.rs | 21 +- src/bin/server.rs | 331 -------------- src/binder/create_table.rs | 10 +- src/binder/mod.rs | 18 +- src/binder/select.rs | 26 +- src/db.rs | 143 +++--- src/errors.rs | 23 +- src/execution/ddl/add_column.rs | 89 ++++ src/execution/ddl/create_index.rs | 87 ++++ src/execution/ddl/create_table.rs | 35 ++ src/execution/ddl/drop_column.rs | 79 ++++ src/execution/ddl/drop_table.rs | 33 ++ src/execution/{volcano => }/ddl/mod.rs | 0 src/execution/ddl/truncate.rs | 30 ++ src/execution/dml/analyze.rs | 132 ++++++ .../{volcano => }/dml/copy_from_file.rs | 92 ++-- .../{volcano => }/dml/copy_to_file.rs | 0 src/execution/dml/delete.rs | 106 +++++ src/execution/dml/insert.rs | 123 +++++ src/execution/{volcano => }/dml/mod.rs | 0 src/execution/dml/update.rs | 120 +++++ .../{volcano => }/dql/aggregate/avg.rs | 4 +- .../{volcano => }/dql/aggregate/count.rs | 2 +- .../{volcano => }/dql/aggregate/hash_agg.rs | 79 ++-- .../{volcano => }/dql/aggregate/min_max.rs | 2 +- .../{volcano => }/dql/aggregate/mod.rs | 8 +- src/execution/dql/aggregate/simple_agg.rs | 65 +++ .../{volcano => }/dql/aggregate/sum.rs | 2 +- src/execution/dql/describe.rs | 102 +++++ src/execution/dql/dummy.rs | 19 + src/execution/dql/explain.rs | 34 ++ src/execution/dql/filter.rs | 46 ++ src/execution/dql/index_scan.rs | 51 +++ .../{volcano => }/dql/join/hash_join.rs | 426 +++++++++-------- src/execution/{volcano => }/dql/join/mod.rs | 0 .../dql/join/nested_loop_join.rs | 307 +++++++------ src/execution/dql/limit.rs | 61 +++ src/execution/{volcano => }/dql/mod.rs | 0 src/execution/{volcano => }/dql/projection.rs | 41 +- src/execution/dql/seq_scan.rs | 36 ++ src/execution/dql/show_table.rs | 31 ++ src/execution/{volcano => }/dql/sort.rs | 57 +-- src/execution/dql/union.rs | 44 ++ src/execution/dql/values.rs | 29 ++ src/execution/marco.rs | 12 + src/execution/mod.rs | 208 ++++++++- src/execution/volcano/ddl/add_column.rs | 84 ---- src/execution/volcano/ddl/create_index.rs | 81 ---- src/execution/volcano/ddl/create_table.rs | 37 -- src/execution/volcano/ddl/drop_column.rs | 76 ---- src/execution/volcano/ddl/drop_table.rs | 36 -- src/execution/volcano/ddl/truncate.rs | 34 -- src/execution/volcano/dml/analyze.rs | 129 ------ src/execution/volcano/dml/delete.rs | 100 ---- src/execution/volcano/dml/insert.rs | 111 ----- src/execution/volcano/dml/update.rs | 103 ----- .../volcano/dql/aggregate/simple_agg.rs | 62 --- src/execution/volcano/dql/describe.rs | 104 ----- src/execution/volcano/dql/dummy.rs | 23 - src/execution/volcano/dql/explain.rs | 38 -- src/execution/volcano/dql/filter.rs | 45 -- src/execution/volcano/dql/index_scan.rs | 53 --- src/execution/volcano/dql/limit.rs | 59 --- src/execution/volcano/dql/seq_scan.rs | 39 -- src/execution/volcano/dql/show_table.rs | 34 -- src/execution/volcano/dql/union.rs | 45 -- src/execution/volcano/dql/values.rs | 33 -- src/execution/volcano/mod.rs | 205 --------- src/expression/range_detacher.rs | 123 +++-- src/lib.rs | 2 + src/main.rs | 2 + src/marcos/mod.rs | 2 +- src/optimizer/core/histogram.rs | 2 +- src/optimizer/core/memo.rs | 26 +- src/optimizer/core/statistics_meta.rs | 7 +- src/optimizer/heuristic/graph.rs | 48 +- src/optimizer/heuristic/matcher.rs | 6 +- .../rule/normalization/column_pruning.rs | 10 +- .../rule/normalization/combine_operators.rs | 26 +- .../rule/normalization/pushdown_limit.rs | 34 +- .../rule/normalization/pushdown_predicates.rs | 37 +- .../rule/normalization/simplification.rs | 98 ++-- src/storage/mod.rs | 55 +-- src/storage/{kipdb.rs => rocksdb.rs} | 172 ++++--- src/utils/bit_vector.rs | 93 ++++ src/utils/lru.rs | 430 ++++++++++++++++++ src/utils/mod.rs | 2 + tests/sqllogictest/Cargo.toml | 11 +- tests/sqllogictest/src/lib.rs | 13 +- tests/sqllogictest/src/main.rs | 6 +- 93 files changed, 3130 insertions(+), 2897 deletions(-) delete mode 100644 src/bin/server.rs create mode 100644 src/execution/ddl/add_column.rs create mode 100644 src/execution/ddl/create_index.rs create mode 100644 src/execution/ddl/create_table.rs create mode 100644 src/execution/ddl/drop_column.rs create mode 100644 src/execution/ddl/drop_table.rs rename src/execution/{volcano => }/ddl/mod.rs (100%) create mode 100644 src/execution/ddl/truncate.rs create mode 100644 src/execution/dml/analyze.rs rename src/execution/{volcano => }/dml/copy_from_file.rs (69%) rename src/execution/{volcano => }/dml/copy_to_file.rs (100%) create mode 100644 src/execution/dml/delete.rs create mode 100644 src/execution/dml/insert.rs rename src/execution/{volcano => }/dml/mod.rs (100%) create mode 100644 src/execution/dml/update.rs rename src/execution/{volcano => }/dql/aggregate/avg.rs (92%) rename src/execution/{volcano => }/dql/aggregate/count.rs (95%) rename src/execution/{volcano => }/dql/aggregate/hash_agg.rs (80%) rename src/execution/{volcano => }/dql/aggregate/min_max.rs (96%) rename src/execution/{volcano => }/dql/aggregate/mod.rs (86%) create mode 100644 src/execution/dql/aggregate/simple_agg.rs rename src/execution/{volcano => }/dql/aggregate/sum.rs (96%) create mode 100644 src/execution/dql/describe.rs create mode 100644 src/execution/dql/dummy.rs create mode 100644 src/execution/dql/explain.rs create mode 100644 src/execution/dql/filter.rs create mode 100644 src/execution/dql/index_scan.rs rename src/execution/{volcano => }/dql/join/hash_join.rs (62%) rename src/execution/{volcano => }/dql/join/mod.rs (100%) rename src/execution/{volcano => }/dql/join/nested_loop_join.rs (70%) create mode 100644 src/execution/dql/limit.rs rename src/execution/{volcano => }/dql/mod.rs (100%) rename src/execution/{volcano => }/dql/projection.rs (53%) create mode 100644 src/execution/dql/seq_scan.rs create mode 100644 src/execution/dql/show_table.rs rename src/execution/{volcano => }/dql/sort.rs (70%) create mode 100644 src/execution/dql/union.rs create mode 100644 src/execution/dql/values.rs create mode 100644 src/execution/marco.rs delete mode 100644 src/execution/volcano/ddl/add_column.rs delete mode 100644 src/execution/volcano/ddl/create_index.rs delete mode 100644 src/execution/volcano/ddl/create_table.rs delete mode 100644 src/execution/volcano/ddl/drop_column.rs delete mode 100644 src/execution/volcano/ddl/drop_table.rs delete mode 100644 src/execution/volcano/ddl/truncate.rs delete mode 100644 src/execution/volcano/dml/analyze.rs delete mode 100644 src/execution/volcano/dml/delete.rs delete mode 100644 src/execution/volcano/dml/insert.rs delete mode 100644 src/execution/volcano/dml/update.rs delete mode 100644 src/execution/volcano/dql/aggregate/simple_agg.rs delete mode 100644 src/execution/volcano/dql/describe.rs delete mode 100644 src/execution/volcano/dql/dummy.rs delete mode 100644 src/execution/volcano/dql/explain.rs delete mode 100644 src/execution/volcano/dql/filter.rs delete mode 100644 src/execution/volcano/dql/index_scan.rs delete mode 100644 src/execution/volcano/dql/limit.rs delete mode 100644 src/execution/volcano/dql/seq_scan.rs delete mode 100644 src/execution/volcano/dql/show_table.rs delete mode 100644 src/execution/volcano/dql/union.rs delete mode 100644 src/execution/volcano/dql/values.rs delete mode 100644 src/execution/volcano/mod.rs create mode 100644 src/main.rs rename src/storage/{kipdb.rs => rocksdb.rs} (65%) create mode 100644 src/utils/bit_vector.rs create mode 100644 src/utils/lru.rs create mode 100644 src/utils/mod.rs diff --git a/Cargo.toml b/Cargo.toml index 6b148e0d..c5dcf981 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -9,22 +9,16 @@ description = "SQL as a Function for Rust" license = "Apache-2.0" repository = "https://github.com/KipData/KipSQL" readme = "README.md" -keywords = ["async", "sql", "sqlite", "database", "mysql"] +keywords = ["sql", "sqlite", "database", "mysql"] categories = ["development-tools", "database"] default-run = "fnck_sql" -[[bin]] -name = "fnck_sql" -path = "src/bin/server.rs" -required-features = ["net"] - [lib] doctest = false [features] -default = ["marcos", "net"] +default = ["marcos"] marcos = [] -net = ["dep:pgwire", "dep:async-trait", "dep:env_logger", "dep:log"] [[bench]] name = "query_bench" @@ -32,49 +26,41 @@ path = "benchmarks/query_benchmark.rs" harness = false [dependencies] -ahash = { version = "0.8.11" } -async-lock = { version = "3.3.0" } -async-trait = { version = "0.1.77", optional = true } -bincode = { version = "1.3.3" } -bytes = { version = "1.5.0" } -chrono = { version = "0.4.26" } -clap = { version = "4.5.2" } -comfy-table = { version = "7.1.0" } -csv = { version = "1.3.0" } -dirs = { version = "5.0.1" } -env_logger = { version = "0.11.3", optional = true } -futures = { version = "0.3.30" } -futures-async-stream = { version = "0.2.11" } -integer-encoding = { version = "3.0.4" } -itertools = { version = "0.12.1" } -kip_db = { version = "0.1.2-alpha.26.fix1" } -lazy_static = { version = "1.4.0" } -log = { version = "0.4.21", optional = true } -ordered-float = { version = "4.2.0" } -paste = { version = "1.0.14" } -petgraph = { version = "0.6.4" } -pgwire = { version = "0.19.2", optional = true } -rand = { version = "0.9.0-alpha.0" } -regex = { version = "1.10.3" } -rust_decimal = { version = "1.34.3" } -serde = { version = "1.0.197", features = ["derive", "rc"] } -siphasher = { version = "1.0.0", features = ["serde"] } -sqlparser = { version = "0.34.0", features = ["serde"] } +ahash = { version = "0.8" } +bincode = { version = "1" } +bytes = { version = "1" } +chrono = { version = "0.4" } +clap = { version = "4" } +comfy-table = { version = "7" } +csv = { version = "1" } +dirs = { version = "5" } +integer-encoding = { version = "3" } +itertools = { version = "0.12" } +lazy_static = { version = "1" } +ordered-float = { version = "4" } +paste = { version = "1" } +parking_lot = { version = "0.12", features = ["arc_lock"] } +petgraph = { version = "0.6" } +rand = { version = "0.9.0-alpha" } +regex = { version = "1" } +rocksdb = { git = "https://github.com/rust-rocksdb/rust-rocksdb", rev = "1cf906dc4087f06631820f13855e6b27bd21b972" } +rust_decimal = { version = "1" } +serde = { version = "1", features = ["derive", "rc"] } +siphasher = { version = "1", features = ["serde"] } +sqlparser = { version = "0.34", features = ["serde"] } strum_macros = { version = "0.26.2" } -thiserror = { version = "1.0.58" } -tokio = { version = "1.36.0", features = ["full"] } -tracing = { version = "0.1.40" } +thiserror = { version = "1" } +tracing = { version = "0.1" } typetag = { version = "0.2" } [dev-dependencies] -cargo-tarpaulin = { version = "0.27.1" } -criterion = { version = "0.5.1", features = ["async_tokio", "html_reports"] } -indicatif = { version = "0.17.8" } -rand_distr = { version = "0.4.3" } -tempfile = { version = "3.10.1" } -tokio-test = { version = "0.4.3" } +cargo-tarpaulin = { version = "0.27" } +criterion = { version = "0.5", features = ["async_tokio", "html_reports"] } +indicatif = { version = "0.17" } +rand_distr = { version = "0.4" } +tempfile = { version = "3.10" } # Benchmark -sqlite = { version = "0.34.0" } +sqlite = { version = "0.34" } [target.'cfg(unix)'.dev-dependencies] pprof = { version = "0.13", features = ["flamegraph", "criterion"] } diff --git a/README.md b/README.md index a3e7f510..e9301704 100755 --- a/README.md +++ b/README.md @@ -25,7 +25,6 @@ Built by @KipData

CI -

@@ -53,18 +52,12 @@ then use `psql` to enter sql Using FnckSQL in code ```rust let fnck_sql = DataBaseBuilder::path("./data") - .build() - .await?; -let tuples = fnck_sql.run("select * from t1").await?; + .build()?; +let tuples = fnck_sql.run("select * from t1")?; ``` Storage Support: -- KipDB +- RocksDB -### Docker -#### Pull Image -```shell -docker pull kould23333/fncksql:latest -``` #### Build From Source ~~~shell git clone https://github.com/KipData/FnckSQL.git @@ -72,21 +65,6 @@ cd FnckSQL docker build -t kould23333/fncksql:latest . ~~~ -#### Run -We installed the `psql` tool in the image for easy debug. - -You can use `psql -h 127.0.0.1 -p 5432` to do this. - -~~~shell -docker run -d \ ---name=fncksql \ --p 5432:5432 \ ---restart=always \ --v fncksql-data:/fnck_sql/fncksql_data \ --v /etc/localtime:/etc/localtime:ro \ -kould23333/fncksql:latest -~~~ - ### Features - ORM Mapping: `features = ["marcos"]` ```rust @@ -123,15 +101,13 @@ function!(TestFunction::test(LogicalType::Integer, LogicalType::Integer) -> Logi let fnck_sql = DataBaseBuilder::path("./data") .register_function(TestFunction::new()) - .build() - .await?; + .build()?; ``` - Optimizer - RBO - CBO based on RBO(Physical Selection) - Execute - Volcano - - Codegen on LuaJIT: `features = ["codegen_execute"]` - MVCC Transaction - Optimistic - Field options diff --git a/examples/hello_world.rs b/examples/hello_world.rs index 15cf1018..ea72f426 100644 --- a/examples/hello_world.rs +++ b/examples/hello_world.rs @@ -28,17 +28,12 @@ implement_from_tuple!( ); #[cfg(feature = "marcos")] -#[tokio::main] -async fn main() -> Result<(), DatabaseError> { - let database = DataBaseBuilder::path("./hello_world").build().await?; +fn main() -> Result<(), DatabaseError> { + let database = DataBaseBuilder::path("./hello_world").build()?; - let _ = database - .run("create table if not exists my_struct (c1 int primary key, c2 int)") - .await?; - let _ = database - .run("insert into my_struct values(0, 0), (1, 1)") - .await?; - let (schema, tuples) = database.run("select * from my_struct").await?; + let _ = database.run("create table if not exists my_struct (c1 int primary key, c2 int)")?; + let _ = database.run("insert into my_struct values(0, 0), (1, 1)")?; + let (schema, tuples) = database.run("select * from my_struct")?; let tuples = tuples .into_iter() .map(|tuple| MyStruct::from((&schema, tuple))) @@ -46,7 +41,7 @@ async fn main() -> Result<(), DatabaseError> { println!("{:#?}", tuples); - let _ = database.run("drop table my_struct").await?; + let _ = database.run("drop table my_struct")?; Ok(()) } diff --git a/examples/transaction.rs b/examples/transaction.rs index d1cf38ea..97d6df4a 100644 --- a/examples/transaction.rs +++ b/examples/transaction.rs @@ -1,23 +1,20 @@ use fnck_sql::db::DataBaseBuilder; use fnck_sql::errors::DatabaseError; -#[tokio::main] -async fn main() -> Result<(), DatabaseError> { - let database = DataBaseBuilder::path("./transaction").build().await?; - let mut tx_1 = database.new_transaction().await?; +fn main() -> Result<(), DatabaseError> { + let database = DataBaseBuilder::path("./transaction").build()?; + let mut tx_1 = database.new_transaction()?; - let _ = tx_1 - .run("create table if not exists t1 (c1 int primary key, c2 int)") - .await?; - let _ = tx_1.run("insert into t1 values(0, 0), (1, 1)").await?; + let _ = tx_1.run("create table if not exists t1 (c1 int primary key, c2 int)")?; + let _ = tx_1.run("insert into t1 values(0, 0), (1, 1)")?; - assert!(database.run("select * from t1").await.is_err()); + assert!(database.run("select * from t1").is_err()); - tx_1.commit().await?; + tx_1.commit()?; - assert!(database.run("select * from t1").await.is_ok()); + assert!(database.run("select * from t1").is_ok()); - let _ = database.run("drop table t1").await?; + let _ = database.run("drop table t1")?; Ok(()) } diff --git a/src/bin/server.rs b/src/bin/server.rs deleted file mode 100644 index 1e23dc89..00000000 --- a/src/bin/server.rs +++ /dev/null @@ -1,331 +0,0 @@ -use async_trait::async_trait; -use clap::Parser; -use fnck_sql::db::{DBTransaction, DataBaseBuilder, Database}; -use fnck_sql::errors::DatabaseError; -use fnck_sql::storage::kipdb::KipStorage; -use fnck_sql::types::tuple::{Schema, Tuple}; -use fnck_sql::types::LogicalType; -use futures::stream; -use log::{error, info, LevelFilter}; -use pgwire::api::auth::noop::NoopStartupHandler; -use pgwire::api::auth::StartupHandler; -use pgwire::api::query::{ - ExtendedQueryHandler, PlaceholderExtendedQueryHandler, SimpleQueryHandler, -}; -use pgwire::api::results::{DataRowEncoder, FieldFormat, FieldInfo, QueryResponse, Response, Tag}; -use pgwire::api::MakeHandler; -use pgwire::api::{ClientInfo, StatelessMakeHandler, Type}; -use pgwire::error::{ErrorInfo, PgWireError, PgWireResult}; -use pgwire::tokio::process_socket; -use std::fmt::Debug; -use std::io; -use std::path::PathBuf; -use std::sync::Arc; -use tokio::net::TcpListener; -use tokio::sync::Mutex; - -pub(crate) const BANNER: &str = " -███████╗███╗ ██╗ ██████╗██╗ ██╗ ███████╗ ██████╗ ██╗ -██╔════╝████╗ ██║██╔════╝██║ ██╔╝ ██╔════╝██╔═══██╗██║ -█████╗ ██╔██╗ ██║██║ █████╔╝ ███████╗██║ ██║██║ -██╔══╝ ██║╚██╗██║██║ ██╔═██╗ ╚════██║██║▄▄ ██║██║ -██║ ██║ ╚████║╚██████╗██║ ██╗ ███████║╚██████╔╝███████╗ -╚═╝ ╚═╝ ╚═══╝ ╚═════╝╚═╝ ╚═╝ ╚══════╝ ╚══▀▀═╝ ╚══════╝ - -"; - -pub const BLOOM: &str = " - _ ._ _ , _ ._ - (_ ' ( ` )_ .__) - ( ( ( ) `) ) _) -- --=(;;(----(-----)-----);;)==-- - - (__ (_ (_ . _) _) ,__) - `~~`\\ ' . /`~~` - ; ; - / \\ -_____________/_ __ \\_____________ -"; - -#[derive(Parser, Debug)] -#[command(author, version, about, long_about = None)] -struct Args { - #[clap(long, default_value = "127.0.0.1")] - ip: String, - #[clap(long, default_value = "5432")] - port: u16, - #[clap(long, default_value = "./fncksql_data")] - path: String, -} - -pub struct FnckSQLBackend { - inner: Arc>, -} - -pub struct SessionBackend { - inner: Arc>, - tx: Mutex>>, -} - -impl MakeHandler for FnckSQLBackend { - type Handler = Arc; - - fn make(&self) -> Self::Handler { - Arc::new(SessionBackend { - inner: Arc::clone(&self.inner), - tx: Mutex::new(None), - }) - } -} - -impl FnckSQLBackend { - pub async fn new(path: impl Into + Send) -> Result { - let database = DataBaseBuilder::path(path).build().await?; - - Ok(FnckSQLBackend { - inner: Arc::new(database), - }) - } -} - -#[async_trait] -impl SimpleQueryHandler for SessionBackend { - async fn do_query<'a, 'b: 'a, C>( - &'b self, - _client: &mut C, - query: &'a str, - ) -> PgWireResult>> - where - C: ClientInfo + Unpin + Send + Sync, - { - match query.to_uppercase().as_str() { - "BEGIN;" | "BEGIN" | "START TRANSACTION;" | "START TRANSACTION" => { - let mut guard = self.tx.lock().await; - - if guard.is_some() { - return Err(PgWireError::ApiError(Box::new( - DatabaseError::TransactionAlreadyExists, - ))); - } - let transaction = self - .inner - .new_transaction() - .await - .map_err(|e| PgWireError::ApiError(Box::new(e)))?; - guard.replace(transaction); - - Ok(vec![Response::Execution(Tag::new("OK"))]) - } - "COMMIT;" | "COMMIT" | "COMMIT WORK;" | "COMMIT WORK" => { - let mut guard = self.tx.lock().await; - - if let Some(transaction) = guard.take() { - transaction - .commit() - .await - .map_err(|e| PgWireError::ApiError(Box::new(e)))?; - - Ok(vec![Response::Execution(Tag::new("OK"))]) - } else { - Err(PgWireError::ApiError(Box::new( - DatabaseError::NoTransactionBegin, - ))) - } - } - "ROLLBACK;" | "ROLLBACK" => { - let mut guard = self.tx.lock().await; - - if guard.is_none() { - return Err(PgWireError::ApiError(Box::new( - DatabaseError::NoTransactionBegin, - ))); - } - drop(guard.take()); - - Ok(vec![Response::Execution(Tag::new("OK"))]) - } - _ => { - let mut guard = self.tx.lock().await; - - let (schema, tuples) = if let Some(transaction) = guard.as_mut() { - transaction.run(query).await - } else { - self.inner.run(query).await - } - .map_err(|e| PgWireError::ApiError(Box::new(e)))?; - - Ok(vec![Response::Query(encode_tuples(&schema, tuples)?)]) - } - } - } -} - -fn encode_tuples<'a>(schema: &Schema, tuples: Vec) -> PgWireResult> { - if tuples.is_empty() { - return Ok(QueryResponse::new(Arc::new(vec![]), stream::empty())); - } - - let mut results = Vec::with_capacity(tuples.len()); - let schema = Arc::new( - schema - .iter() - .map(|column| { - let pg_type = into_pg_type(column.datatype())?; - - Ok(FieldInfo::new( - column.name().into(), - None, - None, - pg_type, - FieldFormat::Text, - )) - }) - .collect::>>()?, - ); - - for tuple in tuples { - let mut encoder = DataRowEncoder::new(schema.clone()); - for value in tuple.values { - match value.logical_type() { - LogicalType::SqlNull => encoder.encode_field(&None::), - LogicalType::Boolean => encoder.encode_field(&value.bool()), - LogicalType::Tinyint => encoder.encode_field(&value.i8()), - LogicalType::UTinyint => encoder.encode_field(&value.u8().map(|v| v as i8)), - LogicalType::Smallint => encoder.encode_field(&value.i16()), - LogicalType::USmallint => encoder.encode_field(&value.u16().map(|v| v as i16)), - LogicalType::Integer => encoder.encode_field(&value.i32()), - LogicalType::UInteger => encoder.encode_field(&value.u32()), - LogicalType::Bigint => encoder.encode_field(&value.i64()), - LogicalType::UBigint => encoder.encode_field(&value.u64().map(|v| v as i64)), - LogicalType::Float => encoder.encode_field(&value.float()), - LogicalType::Double => encoder.encode_field(&value.double()), - LogicalType::Char(..) | LogicalType::Varchar(..) => { - encoder.encode_field(&value.utf8()) - } - LogicalType::Date => encoder.encode_field(&value.date()), - LogicalType::DateTime => encoder.encode_field(&value.datetime()), - LogicalType::Time => encoder.encode_field(&value.time()), - LogicalType::Decimal(_, _) => todo!(), - _ => unreachable!(), - }?; - } - - results.push(encoder.finish()); - } - - Ok(QueryResponse::new(schema, stream::iter(results))) -} - -fn into_pg_type(data_type: &LogicalType) -> PgWireResult { - Ok(match data_type { - LogicalType::SqlNull => Type::UNKNOWN, - LogicalType::Boolean => Type::BOOL, - LogicalType::Tinyint | LogicalType::UTinyint => Type::CHAR, - LogicalType::Smallint | LogicalType::USmallint => Type::INT2, - LogicalType::Integer | LogicalType::UInteger => Type::INT4, - LogicalType::Bigint | LogicalType::UBigint => Type::INT8, - LogicalType::Float => Type::FLOAT4, - LogicalType::Double => Type::FLOAT8, - LogicalType::Varchar(..) => Type::VARCHAR, - LogicalType::Date | LogicalType::DateTime => Type::DATE, - LogicalType::Char(..) => Type::CHAR, - LogicalType::Time => Type::TIME, - LogicalType::Decimal(_, _) => todo!(), - _ => { - return Err(PgWireError::UserError(Box::new(ErrorInfo::new( - "ERROR".to_owned(), - "XX000".to_owned(), - format!("Unsupported Datatype {data_type}"), - )))); - } - }) -} - -async fn quit() -> io::Result<()> { - #[cfg(unix)] - { - let mut interrupt = - tokio::signal::unix::signal(tokio::signal::unix::SignalKind::interrupt())?; - let mut terminate = - tokio::signal::unix::signal(tokio::signal::unix::SignalKind::terminate())?; - tokio::select! { - _ = interrupt.recv() => (), - _ = terminate.recv() => (), - } - Ok(()) - } - #[cfg(windows)] - { - let mut signal = tokio::signal::windows::ctrl_c()?; - let _ = signal.recv().await; - - Ok(()) - } -} - -#[tokio::main(worker_threads = 8)] -async fn main() { - env_logger::Builder::new() - .filter_level(LevelFilter::Info) - .init(); - - let args = Args::parse(); - info!("{} \nVersion: {}\n", BANNER, env!("CARGO_PKG_VERSION")); - info!(":) Welcome to the FnckSQL🖕"); - info!("Listen on port {}", args.port); - info!("Tips🔞: "); - info!( - "1. all data is in the \'{}\' folder in the directory where the application is run", - args.path - ); - - let backend = FnckSQLBackend::new(args.path).await.unwrap(); - let processor = Arc::new(backend); - // We have not implemented extended query in this server, use placeholder instead - let placeholder = Arc::new(StatelessMakeHandler::new(Arc::new( - PlaceholderExtendedQueryHandler, - ))); - let authenticator = Arc::new(StatelessMakeHandler::new(Arc::new(NoopStartupHandler))); - let server_addr = format!("{}:{}", args.ip, args.port); - let listener = TcpListener::bind(server_addr).await.unwrap(); - - tokio::select! { - res = server_run(processor, placeholder, authenticator, listener) => { - if let Err(err) = res { - error!("[Listener][Failed To Accept]: {}", err); - } - } - _ = quit() => info!("{BLOOM}") - } -} - -async fn server_run< - A: MakeHandler>, - Q: MakeHandler>, - EQ: MakeHandler>, ->( - processor: Arc, - placeholder: Arc, - authenticator: Arc, - listener: TcpListener, -) -> io::Result<()> { - loop { - let incoming_socket = listener.accept().await?; - let authenticator_ref = authenticator.make(); - let processor_ref = processor.make(); - let placeholder_ref = placeholder.make(); - - tokio::spawn(async move { - if let Err(err) = process_socket( - incoming_socket.0, - None, - authenticator_ref, - processor_ref, - placeholder_ref, - ) - .await - { - error!("Failed To Process: {}", err); - } - }); - } -} diff --git a/src/binder/create_table.rs b/src/binder/create_table.rs index 2a2fba8e..f7060304 100644 --- a/src/binder/create_table.rs +++ b/src/binder/create_table.rs @@ -143,18 +143,18 @@ mod tests { use super::*; use crate::binder::BinderContext; use crate::catalog::ColumnDesc; - use crate::storage::kipdb::KipStorage; + use crate::storage::rocksdb::RocksStorage; use crate::storage::Storage; use crate::types::LogicalType; use sqlparser::ast::CharLengthUnits; use std::sync::atomic::AtomicUsize; use tempfile::TempDir; - #[tokio::test] - async fn test_create_bind() -> Result<(), DatabaseError> { + #[test] + fn test_create_bind() -> Result<(), DatabaseError> { let temp_dir = TempDir::new().expect("unable to create temporary working directory"); - let storage = KipStorage::new(temp_dir.path()).await?; - let transaction = storage.transaction().await?; + let storage = RocksStorage::new(temp_dir.path())?; + let transaction = storage.transaction()?; let functions = Default::default(); let sql = "create table t1 (id int primary key, name varchar(10) null)"; diff --git a/src/binder/mod.rs b/src/binder/mod.rs index db1cea2b..205acc3c 100644 --- a/src/binder/mod.rs +++ b/src/binder/mod.rs @@ -379,7 +379,7 @@ pub mod test { use crate::catalog::{ColumnCatalog, ColumnDesc}; use crate::errors::DatabaseError; use crate::planner::LogicalPlan; - use crate::storage::kipdb::KipStorage; + use crate::storage::rocksdb::RocksStorage; use crate::storage::{Storage, Transaction}; use crate::types::LogicalType::Integer; use std::path::PathBuf; @@ -387,11 +387,11 @@ pub mod test { use std::sync::Arc; use tempfile::TempDir; - pub(crate) async fn build_test_catalog( + pub(crate) fn build_test_catalog( path: impl Into + Send, - ) -> Result { - let storage = KipStorage::new(path).await?; - let mut transaction = storage.transaction().await?; + ) -> Result { + let storage = RocksStorage::new(path)?; + let mut transaction = storage.transaction()?; let _ = transaction.create_table( Arc::new("t1".to_string()), @@ -427,15 +427,15 @@ pub mod test { false, )?; - transaction.commit().await?; + transaction.commit()?; Ok(storage) } - pub async fn select_sql_run>(sql: S) -> Result { + pub fn select_sql_run>(sql: S) -> Result { let temp_dir = TempDir::new().expect("unable to create temporary working directory"); - let storage = build_test_catalog(temp_dir.path()).await?; - let transaction = storage.transaction().await?; + let storage = build_test_catalog(temp_dir.path())?; + let transaction = storage.transaction()?; let functions = Default::default(); let mut binder = Binder::new( BinderContext::new(&transaction, &functions, Arc::new(AtomicUsize::new(0))), diff --git a/src/binder/select.rs b/src/binder/select.rs index 5aa69564..94ab2300 100644 --- a/src/binder/select.rs +++ b/src/binder/select.rs @@ -18,7 +18,7 @@ use super::{lower_case_name, lower_ident, Binder, BinderContext, QueryBindStep, use crate::catalog::{ColumnCatalog, ColumnSummary, TableName}; use crate::errors::DatabaseError; -use crate::execution::volcano::dql::join::joins_nullable; +use crate::execution::dql::join::joins_nullable; use crate::expression::{AliasType, BinaryOperator}; use crate::planner::operator::insert::InsertOperator; use crate::planner::operator::join::JoinCondition; @@ -942,30 +942,28 @@ mod tests { use crate::binder::test::select_sql_run; use crate::errors::DatabaseError; - #[tokio::test] - async fn test_select_bind() -> Result<(), DatabaseError> { - let plan_1 = select_sql_run("select * from t1").await?; + #[test] + fn test_select_bind() -> Result<(), DatabaseError> { + let plan_1 = select_sql_run("select * from t1")?; println!("just_col:\n {:#?}", plan_1); - let plan_2 = select_sql_run("select t1.c1, t1.c2 from t1").await?; + let plan_2 = select_sql_run("select t1.c1, t1.c2 from t1")?; println!("table_with_col:\n {:#?}", plan_2); - let plan_3 = select_sql_run("select t1.c1, t1.c2 from t1 where c1 > 2").await?; + let plan_3 = select_sql_run("select t1.c1, t1.c2 from t1 where c1 > 2")?; println!("table_with_col_and_c1_compare_constant:\n {:#?}", plan_3); - let plan_4 = select_sql_run("select t1.c1, t1.c2 from t1 where c1 > c2").await?; + let plan_4 = select_sql_run("select t1.c1, t1.c2 from t1 where c1 > c2")?; println!("table_with_col_and_c1_compare_c2:\n {:#?}", plan_4); - let plan_5 = select_sql_run("select avg(t1.c1) from t1").await?; + let plan_5 = select_sql_run("select avg(t1.c1) from t1")?; println!("table_with_col_and_c1_avg:\n {:#?}", plan_5); - let plan_6 = - select_sql_run("select t1.c1, t1.c2 from t1 where (t1.c1 - t1.c2) > 1").await?; + let plan_6 = select_sql_run("select t1.c1, t1.c2 from t1 where (t1.c1 - t1.c2) > 1")?; println!("table_with_col_nested:\n {:#?}", plan_6); - let plan_7 = select_sql_run("select * from t1 limit 1").await?; + let plan_7 = select_sql_run("select * from t1 limit 1")?; println!("limit:\n {:#?}", plan_7); - let plan_8 = select_sql_run("select * from t1 offset 2").await?; + let plan_8 = select_sql_run("select * from t1 offset 2")?; println!("offset:\n {:#?}", plan_8); - let plan_9 = - select_sql_run("select c1, c3 from t1 inner join t2 on c1 = c3 and c1 > 1").await?; + let plan_9 = select_sql_run("select c1, c3 from t1 inner join t2 on c1 = c3 and c1 > 1")?; println!("join:\n {:#?}", plan_9); Ok(()) diff --git a/src/db.rs b/src/db.rs index b459a5ca..d6f8df8c 100644 --- a/src/db.rs +++ b/src/db.rs @@ -1,13 +1,6 @@ -use ahash::HashMap; -use async_lock::{RwLock, RwLockReadGuardArc, RwLockWriteGuardArc}; -use sqlparser::ast::Statement; -use std::path::PathBuf; -use std::sync::atomic::AtomicUsize; -use std::sync::Arc; - use crate::binder::{command_type, Binder, BinderContext, CommandType}; use crate::errors::DatabaseError; -use crate::execution::volcano::{build_write, try_collect}; +use crate::execution::{build_write, try_collect}; use crate::expression::function::{FunctionSummary, ScalarFunctionImpl}; use crate::optimizer::heuristic::batch::HepBatchStrategy; use crate::optimizer::heuristic::optimizer::HepOptimizer; @@ -15,17 +8,23 @@ use crate::optimizer::rule::implementation::ImplementationRuleImpl; use crate::optimizer::rule::normalization::NormalizationRuleImpl; use crate::parser::parse_sql; use crate::planner::LogicalPlan; -use crate::storage::kipdb::KipStorage; +use crate::storage::rocksdb::RocksStorage; use crate::storage::{Storage, Transaction}; use crate::types::tuple::{SchemaRef, Tuple}; use crate::udf::current_date::CurrentDate; +use ahash::HashMap; +use parking_lot::{ArcRwLockReadGuard, ArcRwLockWriteGuard, RawRwLock, RwLock}; +use sqlparser::ast::Statement; +use std::path::PathBuf; +use std::sync::atomic::AtomicUsize; +use std::sync::Arc; pub(crate) type Functions = HashMap>; #[allow(dead_code)] pub(crate) enum MetaDataLock { - Read(RwLockReadGuardArc<()>), - Write(RwLockWriteGuardArc<()>), + Read(ArcRwLockReadGuard), + Write(ArcRwLockWriteGuard), } pub struct DataBaseBuilder { @@ -48,10 +47,10 @@ impl DataBaseBuilder { self } - pub async fn build(mut self) -> Result, DatabaseError> { + pub fn build(mut self) -> Result, DatabaseError> { self = self.register_function(CurrentDate::new()); - let storage = KipStorage::new(self.path).await?; + let storage = RocksStorage::new(self.path)?; Ok(Database { storage, @@ -69,10 +68,7 @@ pub struct Database { impl Database { /// Run SQL queries. - pub async fn run>( - &self, - sql: T, - ) -> Result<(SchemaRef, Vec), DatabaseError> { + pub fn run>(&self, sql: T) -> Result<(SchemaRef, Vec), DatabaseError> { // parse let stmts = parse_sql(sql)?; if stmts.is_empty() { @@ -80,33 +76,32 @@ impl Database { } let stmt = &stmts[0]; let _guard = if matches!(command_type(stmt)?, CommandType::DDL) { - MetaDataLock::Write(self.mdl.write_arc().await) + MetaDataLock::Write(self.mdl.write_arc()) } else { - MetaDataLock::Read(self.mdl.read_arc().await) + MetaDataLock::Read(self.mdl.read_arc()) }; - let transaction = self.storage.transaction().await?; + let transaction = self.storage.transaction()?; let plan = Self::build_plan(stmt, &transaction, &self.functions)?; - Self::run_volcano(transaction, plan).await + Self::run_volcano(transaction, plan) } - pub(crate) async fn run_volcano( - mut transaction: ::TransactionType, + pub(crate) fn run_volcano( + mut transaction: ::TransactionType<'_>, mut plan: LogicalPlan, ) -> Result<(SchemaRef, Vec), DatabaseError> { let schema = plan.output_schema().clone(); - let mut stream = build_write(plan, &mut transaction); - let tuples = try_collect(&mut stream).await?; + let iterator = build_write(plan, &mut transaction); + let tuples = try_collect(iterator)?; - drop(stream); - transaction.commit().await?; + transaction.commit()?; Ok((schema, tuples)) } - pub async fn new_transaction(&self) -> Result, DatabaseError> { - let guard = self.mdl.read_arc().await; - let transaction = self.storage.transaction().await?; + pub fn new_transaction(&self) -> Result, DatabaseError> { + let guard = self.mdl.read_arc(); + let transaction = self.storage.transaction()?; Ok(DBTransaction { inner: transaction, @@ -117,7 +112,7 @@ impl Database { pub(crate) fn build_plan( stmt: &Statement, - transaction: &::TransactionType, + transaction: &::TransactionType<'_>, functions: &Functions, ) -> Result { let mut binder = Binder::new( @@ -222,17 +217,14 @@ impl Database { } } -pub struct DBTransaction { - inner: S::TransactionType, +pub struct DBTransaction<'a, S: Storage + 'a> { + inner: S::TransactionType<'a>, functions: Arc, - _guard: RwLockReadGuardArc<()>, + _guard: ArcRwLockReadGuard, } -impl DBTransaction { - pub async fn run>( - &mut self, - sql: T, - ) -> Result<(SchemaRef, Vec), DatabaseError> { +impl DBTransaction<'_, S> { + pub fn run>(&mut self, sql: T) -> Result<(SchemaRef, Vec), DatabaseError> { let stmts = parse_sql(sql)?; if stmts.is_empty() { return Err(DatabaseError::EmptyStatement); @@ -246,13 +238,13 @@ impl DBTransaction { let mut plan = Database::::build_plan(stmt, &self.inner, &self.functions)?; let schema = plan.output_schema().clone(); - let mut stream = build_write(plan, &mut self.inner); + let executor = build_write(plan, &mut self.inner); - Ok((schema, try_collect(&mut stream).await?)) + Ok((schema, try_collect(executor)?)) } - pub async fn commit(self) -> Result<(), DatabaseError> { - self.inner.commit().await?; + pub fn commit(self) -> Result<(), DatabaseError> { + self.inner.commit()?; Ok(()) } @@ -276,7 +268,7 @@ mod test { use std::sync::Arc; use tempfile::TempDir; - async fn build_table(mut transaction: impl Transaction) -> Result<(), DatabaseError> { + fn build_table(mut transaction: impl Transaction) -> Result<(), DatabaseError> { let columns = vec![ ColumnCatalog::new( "c1".to_string(), @@ -290,19 +282,19 @@ mod test { ), ]; let _ = transaction.create_table(Arc::new("t1".to_string()), columns, false)?; - transaction.commit().await?; + transaction.commit()?; Ok(()) } - #[tokio::test] - async fn test_run_sql() -> Result<(), DatabaseError> { + #[test] + fn test_run_sql() -> Result<(), DatabaseError> { let temp_dir = TempDir::new().expect("unable to create temporary working directory"); - let database = DataBaseBuilder::path(temp_dir.path()).build().await?; - let transaction = database.storage.transaction().await?; - build_table(transaction).await?; + let database = DataBaseBuilder::path(temp_dir.path()).build()?; + let transaction = database.storage.transaction()?; + build_table(transaction)?; - let batch = database.run("select * from t1").await?; + let batch = database.run("select * from t1")?; println!("{:#?}", batch); Ok(()) @@ -316,45 +308,40 @@ mod test { Ok(plus_unary_evaluator.unary_eval(&value)) })); - #[tokio::test] - async fn test_udf() -> Result<(), DatabaseError> { + #[test] + fn test_udf() -> Result<(), DatabaseError> { let temp_dir = TempDir::new().expect("unable to create temporary working directory"); let fnck_sql = DataBaseBuilder::path(temp_dir.path()) .register_function(TestFunction::new()) - .build() - .await?; + .build()?; let _ = fnck_sql - .run("CREATE TABLE test (id int primary key, c1 int, c2 int default test(1, 2));") - .await?; + .run("CREATE TABLE test (id int primary key, c1 int, c2 int default test(1, 2));")?; let _ = fnck_sql - .run("INSERT INTO test VALUES (1, 2, 2), (0, 1, 1), (2, 1, 1), (3, 3, default);") - .await?; - let (schema, tuples) = fnck_sql.run("select test(c1, 1), c2 from test").await?; + .run("INSERT INTO test VALUES (1, 2, 2), (0, 1, 1), (2, 1, 1), (3, 3, default);")?; + let (schema, tuples) = fnck_sql.run("select test(c1, 1), c2 from test")?; println!("{}", create_table(&schema, &tuples)); Ok(()) } - #[tokio::test] - async fn test_transaction_sql() -> Result<(), DatabaseError> { + #[test] + fn test_transaction_sql() -> Result<(), DatabaseError> { let temp_dir = TempDir::new().expect("unable to create temporary working directory"); - let fnck_sql = DataBaseBuilder::path(temp_dir.path()).build().await?; + let fnck_sql = DataBaseBuilder::path(temp_dir.path()).build()?; - let _ = fnck_sql - .run("create table t1 (a int primary key, b int)") - .await?; + let _ = fnck_sql.run("create table t1 (a int primary key, b int)")?; - let mut tx_1 = fnck_sql.new_transaction().await?; - let mut tx_2 = fnck_sql.new_transaction().await?; + let mut tx_1 = fnck_sql.new_transaction()?; + let mut tx_2 = fnck_sql.new_transaction()?; - let _ = tx_1.run("insert into t1 values(0, 0)").await?; - let _ = tx_1.run("insert into t1 values(1, 1)").await?; + let _ = tx_1.run("insert into t1 values(0, 0)")?; + let _ = tx_1.run("insert into t1 values(1, 1)")?; - let _ = tx_2.run("insert into t1 values(0, 0)").await?; - let _ = tx_2.run("insert into t1 values(3, 3)").await?; + let _ = tx_2.run("insert into t1 values(0, 0)")?; + let _ = tx_2.run("insert into t1 values(3, 3)")?; - let (_, tuples_1) = tx_1.run("select * from t1").await?; - let (_, tuples_2) = tx_2.run("select * from t1").await?; + let (_, tuples_1) = tx_1.run("select * from t1")?; + let (_, tuples_2) = tx_2.run("select * from t1")?; assert_eq!(tuples_1.len(), 2); assert_eq!(tuples_2.len(), 2); @@ -389,12 +376,12 @@ mod test { ] ); - tx_1.commit().await?; + tx_1.commit()?; - assert!(tx_2.commit().await.is_err()); + assert!(tx_2.commit().is_err()); - let mut tx_3 = fnck_sql.new_transaction().await?; - let res = tx_3.run("create table t2 (a int primary key, b int)").await; + let mut tx_3 = fnck_sql.new_transaction()?; + let res = tx_3.run("create table t2 (a int primary key, b int)"); assert!(res.is_err()); Ok(()) diff --git a/src/errors.rs b/src/errors.rs index 79acfcb0..b09346a4 100644 --- a/src/errors.rs +++ b/src/errors.rs @@ -1,7 +1,6 @@ use crate::expression::{BinaryOperator, UnaryOperator}; use crate::types::LogicalType; use chrono::ParseError; -use kip_db::KernelError; use sqlparser::parser::ParserError; use std::num::{ParseFloatError, ParseIntError, TryFromIntError}; use std::str::ParseBoolError; @@ -17,6 +16,8 @@ pub enum DatabaseError { #[from] Box, ), + #[error("cache size overflow")] + CacheSizeOverFlow, #[error("cast fail")] CastFail, #[error("channel close")] @@ -67,18 +68,6 @@ pub enum DatabaseError { #[from] std::io::Error, ), - #[error("task join error: {0}")] - TaskJoinError( - #[from] - #[source] - tokio::task::JoinError, - ), - #[error("kipdb error: {0}")] - KipDBError( - #[source] - #[from] - KernelError, - ), #[error("{0} and {1} do not match")] MisMatch(&'static str, &'static str), #[error("add column must be nullable or specify a default value")] @@ -121,6 +110,14 @@ pub enum DatabaseError { ), #[error("must contain primary key!")] PrimaryKeyNotFound, + #[error("rocksdb: {0}")] + RocksDB( + #[source] + #[from] + rocksdb::Error, + ), + #[error("the number of caches cannot be divisible by the number of shards")] + ShardingNotAlign, #[error("the table already exists")] TableExists, #[error("the table not found")] diff --git a/src/execution/ddl/add_column.rs b/src/execution/ddl/add_column.rs new file mode 100644 index 00000000..0fe6d6c2 --- /dev/null +++ b/src/execution/ddl/add_column.rs @@ -0,0 +1,89 @@ +use crate::execution::{build_read, Executor, WriteExecutor}; +use crate::planner::LogicalPlan; +use crate::types::index::{Index, IndexType}; +use crate::types::tuple::Tuple; +use crate::types::tuple_builder::TupleBuilder; +use crate::types::value::DataValue; +use crate::{ + planner::operator::alter_table::add_column::AddColumnOperator, storage::Transaction, throw, +}; +use std::ops::Coroutine; +use std::ops::CoroutineState; +use std::pin::Pin; +use std::slice; +use std::sync::Arc; + +pub struct AddColumn { + op: AddColumnOperator, + input: LogicalPlan, +} + +impl From<(AddColumnOperator, LogicalPlan)> for AddColumn { + fn from((op, input): (AddColumnOperator, LogicalPlan)) -> Self { + Self { op, input } + } +} + +impl<'a, T: Transaction + 'a> WriteExecutor<'a, T> for AddColumn { + fn execute_mut(mut self, transaction: &'a mut T) -> Executor<'a> { + Box::new( + #[coroutine] + move || { + let AddColumnOperator { + table_name, + column, + if_not_exists, + } = &self.op; + + let mut unique_values = column.desc().is_unique.then(Vec::new); + let mut tuples = Vec::new(); + let schema = self.input.output_schema(); + let mut types = Vec::with_capacity(schema.len() + 1); + + for column_ref in schema.iter() { + types.push(*column_ref.datatype()); + } + types.push(*column.datatype()); + + let mut coroutine = build_read(self.input, transaction); + + while let CoroutineState::Yielded(tuple) = Pin::new(&mut coroutine).resume(()) { + let mut tuple: Tuple = throw!(tuple); + + if let Some(value) = throw!(column.default_value()) { + if let Some(unique_values) = &mut unique_values { + unique_values.push((tuple.id.clone().unwrap(), value.clone())); + } + tuple.values.push(value); + } else { + tuple.values.push(Arc::new(DataValue::Null)); + } + tuples.push(tuple); + } + drop(coroutine); + + for tuple in tuples { + throw!(transaction.append(table_name, tuple, &types, true)); + } + let col_id = throw!(transaction.add_column(table_name, column, *if_not_exists)); + + // Unique Index + if let (Some(unique_values), Some(unique_meta)) = ( + unique_values, + transaction + .table(table_name.clone()) + .and_then(|table| table.get_unique_index(&col_id)) + .cloned(), + ) { + for (tuple_id, value) in unique_values { + let index = + Index::new(unique_meta.id, slice::from_ref(&value), IndexType::Unique); + throw!(transaction.add_index(table_name, index, &tuple_id)); + } + } + + yield Ok(TupleBuilder::build_result("1".to_string())); + }, + ) + } +} diff --git a/src/execution/ddl/create_index.rs b/src/execution/ddl/create_index.rs new file mode 100644 index 00000000..b2128dc0 --- /dev/null +++ b/src/execution/ddl/create_index.rs @@ -0,0 +1,87 @@ +use crate::execution::dql::projection::Projection; +use crate::execution::DatabaseError; +use crate::execution::{build_read, Executor, WriteExecutor}; +use crate::expression::ScalarExpression; +use crate::planner::operator::create_index::CreateIndexOperator; +use crate::planner::LogicalPlan; +use crate::storage::Transaction; +use crate::throw; +use crate::types::index::Index; +use crate::types::tuple::Tuple; +use crate::types::tuple_builder::TupleBuilder; +use crate::types::ColumnId; +use std::ops::Coroutine; +use std::ops::CoroutineState; +use std::pin::Pin; + +pub struct CreateIndex { + op: CreateIndexOperator, + input: LogicalPlan, +} + +impl From<(CreateIndexOperator, LogicalPlan)> for CreateIndex { + fn from((op, input): (CreateIndexOperator, LogicalPlan)) -> Self { + Self { op, input } + } +} + +impl<'a, T: Transaction + 'a> WriteExecutor<'a, T> for CreateIndex { + fn execute_mut(mut self, transaction: &'a mut T) -> Executor<'a> { + Box::new( + #[coroutine] + move || { + let CreateIndexOperator { + table_name, + index_name, + columns, + if_not_exists, + ty, + } = self.op; + + let (column_ids, column_exprs): (Vec, Vec) = columns + .into_iter() + .filter_map(|column| { + column + .id() + .map(|id| (id, ScalarExpression::ColumnRef(column))) + }) + .unzip(); + let schema = self.input.output_schema().clone(); + let index_id = + match transaction.add_index_meta(&table_name, index_name, column_ids, ty) { + Ok(index_id) => index_id, + Err(DatabaseError::DuplicateIndex(index_name)) => { + if if_not_exists { + return; + } else { + throw!(Err(DatabaseError::DuplicateIndex(index_name))) + } + } + err => throw!(err), + }; + let mut index_values = Vec::new(); + let mut coroutine = build_read(self.input, transaction); + + while let CoroutineState::Yielded(tuple) = Pin::new(&mut coroutine).resume(()) { + let mut tuple: Tuple = throw!(tuple); + + let tuple_id = if let Some(tuple_id) = tuple.id.take() { + tuple_id + } else { + continue; + }; + index_values.push(( + tuple_id, + throw!(Projection::projection(&tuple, &column_exprs, &schema)), + )); + } + drop(coroutine); + for (tuple_id, values) in index_values { + let index = Index::new(index_id, &values, ty); + throw!(transaction.add_index(table_name.as_str(), index, &tuple_id)); + } + yield Ok(TupleBuilder::build_result("1".to_string())); + }, + ) + } +} diff --git a/src/execution/ddl/create_table.rs b/src/execution/ddl/create_table.rs new file mode 100644 index 00000000..c7e987a0 --- /dev/null +++ b/src/execution/ddl/create_table.rs @@ -0,0 +1,35 @@ +use crate::execution::{Executor, WriteExecutor}; +use crate::planner::operator::create_table::CreateTableOperator; +use crate::storage::Transaction; +use crate::throw; +use crate::types::tuple_builder::TupleBuilder; + +pub struct CreateTable { + op: CreateTableOperator, +} + +impl From for CreateTable { + fn from(op: CreateTableOperator) -> Self { + CreateTable { op } + } +} + +impl<'a, T: Transaction + 'a> WriteExecutor<'a, T> for CreateTable { + fn execute_mut(self, transaction: &'a mut T) -> Executor<'a> { + Box::new( + #[coroutine] + move || { + let CreateTableOperator { + table_name, + columns, + if_not_exists, + } = self.op; + + let _ = + throw!(transaction.create_table(table_name.clone(), columns, if_not_exists)); + + yield Ok(TupleBuilder::build_result(format!("{}", table_name))); + }, + ) + } +} diff --git a/src/execution/ddl/drop_column.rs b/src/execution/ddl/drop_column.rs new file mode 100644 index 00000000..cb8a0e88 --- /dev/null +++ b/src/execution/ddl/drop_column.rs @@ -0,0 +1,79 @@ +use crate::errors::DatabaseError; +use crate::execution::{build_read, Executor, WriteExecutor}; +use crate::planner::operator::alter_table::drop_column::DropColumnOperator; +use crate::planner::LogicalPlan; +use crate::storage::Transaction; +use crate::throw; +use crate::types::tuple::Tuple; +use crate::types::tuple_builder::TupleBuilder; +use std::ops::Coroutine; +use std::ops::CoroutineState; +use std::pin::Pin; + +pub struct DropColumn { + op: DropColumnOperator, + input: LogicalPlan, +} + +impl From<(DropColumnOperator, LogicalPlan)> for DropColumn { + fn from((op, input): (DropColumnOperator, LogicalPlan)) -> Self { + Self { op, input } + } +} + +impl<'a, T: Transaction + 'a> WriteExecutor<'a, T> for DropColumn { + fn execute_mut(mut self, transaction: &'a mut T) -> Executor<'a> { + Box::new( + #[coroutine] + move || { + let DropColumnOperator { + table_name, + column_name, + if_exists, + } = self.op; + + let tuple_columns = self.input.output_schema(); + if let Some((column_index, is_primary)) = tuple_columns + .iter() + .enumerate() + .find(|(_, column)| column.name() == column_name) + .map(|(i, column)| (i, column.desc.is_primary)) + { + if is_primary { + throw!(Err(DatabaseError::InvalidColumn( + "drop of primary key column is not allowed.".to_owned(), + ))); + } + let mut tuples = Vec::new(); + let mut types = Vec::with_capacity(tuple_columns.len() - 1); + + for (i, column_ref) in tuple_columns.iter().enumerate() { + if i == column_index { + continue; + } + types.push(*column_ref.datatype()); + } + let mut coroutine = build_read(self.input, transaction); + + while let CoroutineState::Yielded(tuple) = Pin::new(&mut coroutine).resume(()) { + let mut tuple: Tuple = throw!(tuple); + let _ = tuple.values.remove(column_index); + + tuples.push(tuple); + } + drop(coroutine); + for tuple in tuples { + throw!(transaction.append(&table_name, tuple, &types, true)); + } + throw!(transaction.drop_column(&table_name, &column_name)); + + yield Ok(TupleBuilder::build_result("1".to_string())); + } else if if_exists { + return; + } else { + yield Err(DatabaseError::NotFound("drop column", column_name)); + } + }, + ) + } +} diff --git a/src/execution/ddl/drop_table.rs b/src/execution/ddl/drop_table.rs new file mode 100644 index 00000000..b38bb820 --- /dev/null +++ b/src/execution/ddl/drop_table.rs @@ -0,0 +1,33 @@ +use crate::execution::{Executor, WriteExecutor}; +use crate::planner::operator::drop_table::DropTableOperator; +use crate::storage::Transaction; +use crate::throw; +use crate::types::tuple_builder::TupleBuilder; + +pub struct DropTable { + op: DropTableOperator, +} + +impl From for DropTable { + fn from(op: DropTableOperator) -> Self { + DropTable { op } + } +} + +impl<'a, T: Transaction + 'a> WriteExecutor<'a, T> for DropTable { + fn execute_mut(self, transaction: &'a mut T) -> Executor<'a> { + Box::new( + #[coroutine] + move || { + let DropTableOperator { + table_name, + if_exists, + } = self.op; + + throw!(transaction.drop_table(&table_name, if_exists)); + + yield Ok(TupleBuilder::build_result(format!("{}", table_name))); + }, + ) + } +} diff --git a/src/execution/volcano/ddl/mod.rs b/src/execution/ddl/mod.rs similarity index 100% rename from src/execution/volcano/ddl/mod.rs rename to src/execution/ddl/mod.rs diff --git a/src/execution/ddl/truncate.rs b/src/execution/ddl/truncate.rs new file mode 100644 index 00000000..c0160d7e --- /dev/null +++ b/src/execution/ddl/truncate.rs @@ -0,0 +1,30 @@ +use crate::execution::{Executor, WriteExecutor}; +use crate::planner::operator::truncate::TruncateOperator; +use crate::storage::Transaction; +use crate::throw; +use crate::types::tuple_builder::TupleBuilder; + +pub struct Truncate { + op: TruncateOperator, +} + +impl From for Truncate { + fn from(op: TruncateOperator) -> Self { + Truncate { op } + } +} + +impl<'a, T: Transaction + 'a> WriteExecutor<'a, T> for Truncate { + fn execute_mut(self, transaction: &'a mut T) -> Executor<'a> { + Box::new( + #[coroutine] + move || { + let TruncateOperator { table_name } = self.op; + + throw!(transaction.drop_data(&table_name)); + + yield Ok(TupleBuilder::build_result(format!("{}", table_name))); + }, + ) + } +} diff --git a/src/execution/dml/analyze.rs b/src/execution/dml/analyze.rs new file mode 100644 index 00000000..9d5d31ff --- /dev/null +++ b/src/execution/dml/analyze.rs @@ -0,0 +1,132 @@ +use crate::catalog::TableName; +use crate::errors::DatabaseError; +use crate::execution::dql::projection::Projection; +use crate::execution::{build_read, Executor, WriteExecutor}; +use crate::optimizer::core::histogram::HistogramBuilder; +use crate::optimizer::core::statistics_meta::StatisticsMeta; +use crate::planner::operator::analyze::AnalyzeOperator; +use crate::planner::LogicalPlan; +use crate::storage::Transaction; +use crate::throw; +use crate::types::index::IndexMetaRef; +use crate::types::tuple::Tuple; +use crate::types::value::{DataValue, Utf8Type}; +use itertools::Itertools; +use sqlparser::ast::CharLengthUnits; +use std::fmt::Formatter; +use std::ops::Coroutine; +use std::ops::CoroutineState; +use std::pin::Pin; +use std::sync::Arc; +use std::time::{SystemTime, UNIX_EPOCH}; +use std::{fmt, fs}; + +const DEFAULT_NUM_OF_BUCKETS: usize = 100; +const DEFAULT_STATISTICS_META_PATH: &str = "fnck_sql_statistics_metas"; + +pub struct Analyze { + table_name: TableName, + input: LogicalPlan, + index_metas: Vec, +} + +impl From<(AnalyzeOperator, LogicalPlan)> for Analyze { + fn from( + ( + AnalyzeOperator { + table_name, + index_metas, + }, + input, + ): (AnalyzeOperator, LogicalPlan), + ) -> Self { + Analyze { + table_name, + input, + index_metas, + } + } +} + +impl<'a, T: Transaction + 'a> WriteExecutor<'a, T> for Analyze { + fn execute_mut(self, transaction: &'a mut T) -> Executor<'a> { + Box::new( + #[coroutine] + move || { + let Analyze { + table_name, + mut input, + index_metas, + } = self; + + let schema = input.output_schema().clone(); + let mut builders = Vec::with_capacity(index_metas.len()); + let table = throw!(transaction + .table(table_name.clone()) + .cloned() + .ok_or(DatabaseError::TableNotFound)); + + for index in table.indexes() { + builders.push(( + index.id, + throw!(index.column_exprs(&table)), + throw!(HistogramBuilder::new(index, None)), + )); + } + + let mut coroutine = build_read(input, transaction); + + while let CoroutineState::Yielded(tuple) = Pin::new(&mut coroutine).resume(()) { + let tuple = throw!(tuple); + + for (_, exprs, builder) in builders.iter_mut() { + let values = throw!(Projection::projection(&tuple, exprs, &schema)); + + if values.len() == 1 { + throw!(builder.append(&values[0])); + } else { + throw!(builder.append(&Arc::new(DataValue::Tuple(Some(values))))); + } + } + } + drop(coroutine); + let mut values = Vec::with_capacity(builders.len()); + let ts = SystemTime::now() + .duration_since(UNIX_EPOCH) + .expect("It's the end of the world!") + .as_secs(); + let dir_path = dirs::config_dir() + .expect("Your system does not have a Config directory!") + .join(DEFAULT_STATISTICS_META_PATH) + .join(table_name.as_str()) + .join(ts.to_string()); + throw!(fs::create_dir_all(&dir_path).map_err(DatabaseError::IO)); + + for (index_id, _, builder) in builders { + let path: String = dir_path.join(index_id.to_string()).to_string_lossy().into(); + let (histogram, sketch) = throw!(builder.build(DEFAULT_NUM_OF_BUCKETS)); + let meta = StatisticsMeta::new(histogram, sketch); + + throw!(meta.to_file(&path)); + values.push(Arc::new(DataValue::Utf8 { + value: Some(path.clone()), + ty: Utf8Type::Variable(None), + unit: CharLengthUnits::Characters, + })); + throw!(transaction.save_table_meta(&table_name, path, meta)); + } + yield Ok(Tuple { id: None, values }); + }, + ) + } +} + +impl fmt::Display for AnalyzeOperator { + fn fmt(&self, f: &mut Formatter) -> fmt::Result { + let indexes = self.index_metas.iter().map(|index| &index.name).join(", "); + + write!(f, "Analyze {} -> [{}]", self.table_name, indexes)?; + + Ok(()) + } +} diff --git a/src/execution/volcano/dml/copy_from_file.rs b/src/execution/dml/copy_from_file.rs similarity index 69% rename from src/execution/volcano/dml/copy_from_file.rs rename to src/execution/dml/copy_from_file.rs index 3a8e7d4e..ef3ffce6 100644 --- a/src/execution/volcano/dml/copy_from_file.rs +++ b/src/execution/dml/copy_from_file.rs @@ -1,14 +1,16 @@ use crate::binder::copy::FileFormat; use crate::errors::DatabaseError; -use crate::execution::volcano::{BoxedExecutor, WriteExecutor}; +use crate::execution::{Executor, WriteExecutor}; use crate::planner::operator::copy_from_file::CopyFromFileOperator; use crate::storage::Transaction; +use crate::throw; use crate::types::tuple::{types, Tuple}; use crate::types::tuple_builder::TupleBuilder; -use futures_async_stream::try_stream; use std::fs::File; use std::io::BufReader; -use tokio::sync::mpsc::Sender; +use std::sync::mpsc; +use std::sync::mpsc::Sender; +use std::thread; pub struct CopyFromFile { op: CopyFromFileOperator, @@ -21,36 +23,37 @@ impl From for CopyFromFile { } } -impl WriteExecutor for CopyFromFile { - fn execute_mut(self, transaction: &mut T) -> BoxedExecutor { - self._execute(transaction) +impl<'a, T: Transaction + 'a> WriteExecutor<'a, T> for CopyFromFile { + fn execute_mut(self, transaction: &'a mut T) -> Executor<'a> { + Box::new( + #[coroutine] + move || { + let types = types(&self.op.schema_ref); + let (tx, rx) = mpsc::channel(); + let (tx1, rx1) = mpsc::channel(); + // # Cancellation + // When this stream is dropped, the `rx` is dropped, the spawned task will fail to send to + // `tx`, then the task will finish. + let table_name = self.op.table.clone(); + let handle = thread::spawn(|| self.read_file_blocking(tx)); + let mut size = 0_usize; + while let Ok(chunk) = rx.recv() { + throw!(transaction.append(&table_name, chunk, &types, false)); + size += 1; + } + throw!(handle.join().unwrap()); + + let handle = thread::spawn(move || return_result(size, tx1)); + while let Ok(chunk) = rx1.recv() { + yield Ok(chunk); + } + throw!(handle.join().unwrap()) + }, + ) } } impl CopyFromFile { - #[try_stream(boxed, ok = Tuple, error = DatabaseError)] - pub async fn _execute(self, transaction: &mut T) { - let types = types(&self.op.schema_ref); - let (tx, mut rx) = tokio::sync::mpsc::channel(1); - let (tx1, mut rx1) = tokio::sync::mpsc::channel(1); - // # Cancellation - // When this stream is dropped, the `rx` is dropped, the spawned task will fail to send to - // `tx`, then the task will finish. - let table_name = self.op.table.clone(); - let handle = tokio::task::spawn_blocking(|| self.read_file_blocking(tx)); - let mut size = 0_usize; - while let Some(chunk) = rx.recv().await { - transaction.append(&table_name, chunk, &types, false)?; - size += 1; - } - handle.await??; - - let handle = tokio::task::spawn_blocking(move || return_result(size, tx1)); - while let Some(chunk) = rx1.recv().await { - yield chunk; - } - handle.await??; - } /// Read records from file using blocking IO. /// /// The read data chunks will be sent through `tx`. @@ -85,7 +88,7 @@ impl CopyFromFile { } self.size += 1; - tx.blocking_send(tuple_builder.build_with_row(record.iter())?) + tx.send(tuple_builder.build_with_row(record.iter())?) .map_err(|_| DatabaseError::ChannelClose)?; } Ok(()) @@ -95,8 +98,7 @@ impl CopyFromFile { fn return_result(size: usize, tx: Sender) -> Result<(), DatabaseError> { let tuple = TupleBuilder::build_result(format!("import {} rows", size)); - tx.blocking_send(tuple) - .map_err(|_| DatabaseError::ChannelClose)?; + tx.send(tuple).map_err(|_| DatabaseError::ChannelClose)?; Ok(()) } @@ -104,9 +106,10 @@ fn return_result(size: usize, tx: Sender) -> Result<(), DatabaseError> { mod tests { use crate::catalog::{ColumnCatalog, ColumnDesc, ColumnSummary}; use crate::db::DataBaseBuilder; - use futures::StreamExt; use sqlparser::ast::CharLengthUnits; use std::io::Write; + use std::ops::{Coroutine, CoroutineState}; + use std::pin::Pin; use std::sync::Arc; use tempfile::TempDir; @@ -116,8 +119,8 @@ mod tests { use crate::storage::Storage; use crate::types::LogicalType; - #[tokio::test] - async fn read_csv() -> Result<(), DatabaseError> { + #[test] + fn read_csv() -> Result<(), DatabaseError> { let csv = "1,1.5,one\n2,2.5,two\n"; let mut file = tempfile::NamedTempFile::new().expect("failed to create temp file"); @@ -177,18 +180,17 @@ mod tests { }; let temp_dir = TempDir::new().unwrap(); - let db = DataBaseBuilder::path(temp_dir.path()).build().await?; - let _ = db - .run("create table test_copy (a int primary key, b float, c varchar(10))") - .await; + let db = DataBaseBuilder::path(temp_dir.path()).build()?; + let _ = db.run("create table test_copy (a int primary key, b float, c varchar(10))"); let storage = db.storage; - let mut transaction = storage.transaction().await?; + let mut transaction = storage.transaction()?; - let tuple = executor - .execute_mut(&mut transaction) - .next() - .await - .unwrap()?; + let mut coroutine = executor.execute_mut(&mut transaction); + let tuple = match Pin::new(&mut coroutine).resume(()) { + CoroutineState::Yielded(tuple) => tuple, + CoroutineState::Complete(()) => unreachable!(), + } + .unwrap(); assert_eq!( tuple, TupleBuilder::build_result(format!("import {} rows", 2)) diff --git a/src/execution/volcano/dml/copy_to_file.rs b/src/execution/dml/copy_to_file.rs similarity index 100% rename from src/execution/volcano/dml/copy_to_file.rs rename to src/execution/dml/copy_to_file.rs diff --git a/src/execution/dml/delete.rs b/src/execution/dml/delete.rs new file mode 100644 index 00000000..346e582f --- /dev/null +++ b/src/execution/dml/delete.rs @@ -0,0 +1,106 @@ +use crate::catalog::TableName; +use crate::errors::DatabaseError; +use crate::execution::dql::projection::Projection; +use crate::execution::{build_read, Executor, WriteExecutor}; +use crate::expression::ScalarExpression; +use crate::planner::operator::delete::DeleteOperator; +use crate::planner::LogicalPlan; +use crate::storage::Transaction; +use crate::throw; +use crate::types::index::{Index, IndexId, IndexType}; +use crate::types::tuple::Tuple; +use crate::types::tuple_builder::TupleBuilder; +use crate::types::value::ValueRef; +use std::collections::HashMap; +use std::ops::Coroutine; +use std::ops::CoroutineState; +use std::pin::Pin; + +pub struct Delete { + table_name: TableName, + input: LogicalPlan, +} + +impl From<(DeleteOperator, LogicalPlan)> for Delete { + fn from((DeleteOperator { table_name, .. }, input): (DeleteOperator, LogicalPlan)) -> Self { + Delete { table_name, input } + } +} + +impl<'a, T: Transaction + 'a> WriteExecutor<'a, T> for Delete { + fn execute_mut(self, transaction: &'a mut T) -> Executor<'a> { + Box::new( + #[coroutine] + move || { + let Delete { + table_name, + mut input, + } = self; + + let schema = input.output_schema().clone(); + let table = throw!(transaction + .table(table_name.clone()) + .cloned() + .ok_or(DatabaseError::TableNotFound)); + let mut tuple_ids = Vec::new(); + let mut indexes: HashMap = HashMap::new(); + + let mut coroutine = build_read(input, transaction); + + while let CoroutineState::Yielded(tuple) = Pin::new(&mut coroutine).resume(()) { + let tuple: Tuple = throw!(tuple); + + for index_meta in table.indexes() { + if let Some(Value { + exprs, value_rows, .. + }) = indexes.get_mut(&index_meta.id) + { + value_rows.push(throw!(Projection::projection(&tuple, exprs, &schema))); + } else { + let exprs = throw!(index_meta.column_exprs(&table)); + let values = throw!(Projection::projection(&tuple, &exprs, &schema)); + + indexes.insert( + index_meta.id, + Value { + exprs, + value_rows: vec![values], + index_ty: index_meta.ty, + }, + ); + } + } + tuple_ids.push(tuple.id.unwrap()); + } + drop(coroutine); + for ( + index_id, + Value { + value_rows, + index_ty, + .. + }, + ) in indexes + { + for (i, values) in value_rows.into_iter().enumerate() { + throw!(transaction.del_index( + &table_name, + &Index::new(index_id, &values, index_ty), + Some(&tuple_ids[i]), + )); + } + } + for tuple_id in tuple_ids { + throw!(transaction.delete(&table_name, tuple_id)); + } + yield Ok(TupleBuilder::build_result("1".to_string())); + }, + ) + } +} + +struct Value { + exprs: Vec, + value_rows: Vec>, + index_ty: IndexType, +} diff --git a/src/execution/dml/insert.rs b/src/execution/dml/insert.rs new file mode 100644 index 00000000..5b279d37 --- /dev/null +++ b/src/execution/dml/insert.rs @@ -0,0 +1,123 @@ +use crate::catalog::TableName; +use crate::errors::DatabaseError; +use crate::execution::dql::projection::Projection; +use crate::execution::{build_read, Executor, WriteExecutor}; +use crate::planner::operator::insert::InsertOperator; +use crate::planner::LogicalPlan; +use crate::storage::Transaction; +use crate::throw; +use crate::types::index::Index; +use crate::types::tuple::Tuple; +use crate::types::tuple_builder::TupleBuilder; +use crate::types::value::DataValue; +use std::collections::HashMap; +use std::ops::Coroutine; +use std::ops::CoroutineState; +use std::pin::Pin; +use std::sync::Arc; + +pub struct Insert { + table_name: TableName, + input: LogicalPlan, + is_overwrite: bool, +} + +impl From<(InsertOperator, LogicalPlan)> for Insert { + fn from( + ( + InsertOperator { + table_name, + is_overwrite, + }, + input, + ): (InsertOperator, LogicalPlan), + ) -> Self { + Insert { + table_name, + input, + is_overwrite, + } + } +} + +impl<'a, T: Transaction + 'a> WriteExecutor<'a, T> for Insert { + fn execute_mut(self, transaction: &'a mut T) -> Executor<'a> { + Box::new( + #[coroutine] + move || { + let Insert { + table_name, + mut input, + is_overwrite, + } = self; + + let mut tuples = Vec::new(); + let schema = input.output_schema().clone(); + + let pk_index = throw!(schema + .iter() + .find(|col| col.desc.is_primary) + .map(|col| col.id()) + .ok_or_else(|| DatabaseError::NotNull)); + + if let Some(table_catalog) = transaction.table(table_name.clone()).cloned() { + let types = table_catalog.types(); + let mut coroutine = build_read(input, transaction); + + while let CoroutineState::Yielded(tuple) = Pin::new(&mut coroutine).resume(()) { + let Tuple { values, .. } = throw!(tuple); + + let mut tuple_map = HashMap::new(); + for (i, value) in values.into_iter().enumerate() { + tuple_map.insert(schema[i].id(), value); + } + let tuple_id = throw!(tuple_map + .get(&pk_index) + .cloned() + .ok_or(DatabaseError::NotNull)); + let mut values = Vec::with_capacity(table_catalog.columns_len()); + + for col in table_catalog.columns() { + let value = { + let mut value = tuple_map.remove(&col.id()); + + if value.is_none() { + value = throw!(col.default_value()); + } + value.unwrap_or_else(|| Arc::new(DataValue::none(col.datatype()))) + }; + if value.is_null() && !col.nullable { + yield Err(DatabaseError::NotNull); + return; + } + values.push(value) + } + tuples.push(Tuple { + id: Some(tuple_id), + values, + }); + } + drop(coroutine); + for index_meta in table_catalog.indexes() { + let exprs = throw!(index_meta.column_exprs(&table_catalog)); + + for tuple in tuples.iter() { + let values = throw!(Projection::projection(tuple, &exprs, &schema)); + let index = Index::new(index_meta.id, &values, index_meta.ty); + + throw!(transaction.add_index( + &table_name, + index, + tuple.id.as_ref().unwrap() + )); + } + } + for tuple in tuples { + throw!(transaction.append(&table_name, tuple, &types, is_overwrite)); + } + } + yield Ok(TupleBuilder::build_result("1".to_string())); + }, + ) + } +} diff --git a/src/execution/volcano/dml/mod.rs b/src/execution/dml/mod.rs similarity index 100% rename from src/execution/volcano/dml/mod.rs rename to src/execution/dml/mod.rs diff --git a/src/execution/dml/update.rs b/src/execution/dml/update.rs new file mode 100644 index 00000000..084983c4 --- /dev/null +++ b/src/execution/dml/update.rs @@ -0,0 +1,120 @@ +use crate::catalog::TableName; +use crate::execution::dql::projection::Projection; +use crate::execution::{build_read, Executor, WriteExecutor}; +use crate::planner::operator::update::UpdateOperator; +use crate::planner::LogicalPlan; +use crate::storage::Transaction; +use crate::throw; +use crate::types::index::Index; +use crate::types::tuple::types; +use crate::types::tuple::Tuple; +use crate::types::tuple_builder::TupleBuilder; +use std::collections::HashMap; +use std::ops::Coroutine; +use std::ops::CoroutineState; +use std::pin::Pin; + +pub struct Update { + table_name: TableName, + input: LogicalPlan, + values: LogicalPlan, +} + +impl From<(UpdateOperator, LogicalPlan, LogicalPlan)> for Update { + fn from( + (UpdateOperator { table_name }, input, values): (UpdateOperator, LogicalPlan, LogicalPlan), + ) -> Self { + Update { + table_name, + input, + values, + } + } +} + +impl<'a, T: Transaction + 'a> WriteExecutor<'a, T> for Update { + fn execute_mut(self, transaction: &'a mut T) -> Executor<'a> { + Box::new( + #[coroutine] + move || { + let Update { + table_name, + mut input, + mut values, + } = self; + + let values_schema = values.output_schema().clone(); + let input_schema = input.output_schema().clone(); + let types = types(&input_schema); + + if let Some(table_catalog) = transaction.table(table_name.clone()).cloned() { + let mut value_map = HashMap::new(); + let mut tuples = Vec::new(); + + // only once + let mut coroutine = build_read(values, transaction); + + while let CoroutineState::Yielded(tuple) = Pin::new(&mut coroutine).resume(()) { + let Tuple { values, .. } = throw!(tuple); + for i in 0..values.len() { + value_map.insert(values_schema[i].id(), values[i].clone()); + } + } + drop(coroutine); + let mut coroutine = build_read(input, transaction); + + while let CoroutineState::Yielded(tuple) = Pin::new(&mut coroutine).resume(()) { + let tuple: Tuple = throw!(tuple); + + tuples.push(tuple); + } + drop(coroutine); + let mut index_metas = Vec::new(); + for index_meta in table_catalog.indexes() { + let exprs = throw!(index_meta.column_exprs(&table_catalog)); + + for tuple in tuples.iter() { + let values = + throw!(Projection::projection(tuple, &exprs, &input_schema)); + let index = Index::new(index_meta.id, &values, index_meta.ty); + throw!(transaction.del_index( + &table_name, + &index, + Some(tuple.id.as_ref().unwrap()) + )); + } + index_metas.push((index_meta, exprs)); + } + for mut tuple in tuples { + let mut is_overwrite = true; + + for (i, column) in input_schema.iter().enumerate() { + if let Some(value) = value_map.get(&column.id()) { + if column.desc.is_primary { + let old_key = tuple.id.replace(value.clone()).unwrap(); + + throw!(transaction.delete(&table_name, old_key)); + is_overwrite = false; + } + tuple.values[i] = value.clone(); + } + } + for (index_meta, exprs) in index_metas.iter() { + let values = + throw!(Projection::projection(&tuple, exprs, &input_schema)); + let index = Index::new(index_meta.id, &values, index_meta.ty); + throw!(transaction.add_index( + &table_name, + index, + tuple.id.as_ref().unwrap() + )); + } + + throw!(transaction.append(&table_name, tuple, &types, is_overwrite)); + } + } + yield Ok(TupleBuilder::build_result("1".to_string())); + }, + ) + } +} diff --git a/src/execution/volcano/dql/aggregate/avg.rs b/src/execution/dql/aggregate/avg.rs similarity index 92% rename from src/execution/volcano/dql/aggregate/avg.rs rename to src/execution/dql/aggregate/avg.rs index a33343f4..7db19d30 100644 --- a/src/execution/volcano/dql/aggregate/avg.rs +++ b/src/execution/dql/aggregate/avg.rs @@ -1,6 +1,6 @@ use crate::errors::DatabaseError; -use crate::execution::volcano::dql::aggregate::sum::SumAccumulator; -use crate::execution::volcano::dql::aggregate::Accumulator; +use crate::execution::dql::aggregate::sum::SumAccumulator; +use crate::execution::dql::aggregate::Accumulator; use crate::expression::BinaryOperator; use crate::types::evaluator::EvaluatorFactory; use crate::types::value::{DataValue, ValueRef}; diff --git a/src/execution/volcano/dql/aggregate/count.rs b/src/execution/dql/aggregate/count.rs similarity index 95% rename from src/execution/volcano/dql/aggregate/count.rs rename to src/execution/dql/aggregate/count.rs index 4eef2637..023e02e1 100644 --- a/src/execution/volcano/dql/aggregate/count.rs +++ b/src/execution/dql/aggregate/count.rs @@ -1,5 +1,5 @@ use crate::errors::DatabaseError; -use crate::execution::volcano::dql::aggregate::Accumulator; +use crate::execution::dql::aggregate::Accumulator; use crate::types::value::{DataValue, ValueRef}; use ahash::RandomState; use std::collections::HashSet; diff --git a/src/execution/volcano/dql/aggregate/hash_agg.rs b/src/execution/dql/aggregate/hash_agg.rs similarity index 80% rename from src/execution/volcano/dql/aggregate/hash_agg.rs rename to src/execution/dql/aggregate/hash_agg.rs index 40c84471..d191d979 100644 --- a/src/execution/volcano/dql/aggregate/hash_agg.rs +++ b/src/execution/dql/aggregate/hash_agg.rs @@ -1,16 +1,18 @@ use crate::catalog::ColumnRef; use crate::errors::DatabaseError; -use crate::execution::volcano::dql::aggregate::{create_accumulators, Accumulator}; -use crate::execution::volcano::{build_read, BoxedExecutor, ReadExecutor}; +use crate::execution::dql::aggregate::{create_accumulators, Accumulator}; +use crate::execution::{build_read, Executor, ReadExecutor}; use crate::expression::ScalarExpression; use crate::planner::operator::aggregate::AggregateOperator; use crate::planner::LogicalPlan; use crate::storage::Transaction; +use crate::throw; use crate::types::tuple::{SchemaRef, Tuple}; use crate::types::value::ValueRef; use ahash::HashMap; -use futures_async_stream::try_stream; use itertools::Itertools; +use std::ops::{Coroutine, CoroutineState}; +use std::pin::Pin; pub struct HashAggExecutor { agg_calls: Vec, @@ -37,9 +39,31 @@ impl From<(AggregateOperator, LogicalPlan)> for HashAggExecutor { } } -impl ReadExecutor for HashAggExecutor { - fn execute(self, transaction: &T) -> BoxedExecutor { - self._execute(transaction) +impl<'a, T: Transaction + 'a> ReadExecutor<'a, T> for HashAggExecutor { + fn execute(self, transaction: &'a T) -> Executor<'a> { + Box::new( + #[coroutine] + || { + let HashAggExecutor { + agg_calls, + groupby_exprs, + mut input, + } = self; + + let mut agg_status = + HashAggStatus::new(input.output_schema().clone(), agg_calls, groupby_exprs); + + let mut coroutine = build_read(input, transaction); + + while let CoroutineState::Yielded(result) = Pin::new(&mut coroutine).resume(()) { + throw!(agg_status.update(throw!(result))); + } + + for tuple in throw!(agg_status.as_tuples()) { + yield Ok(tuple); + } + }, + ) } } @@ -129,43 +153,20 @@ impl HashAggStatus { } } -impl HashAggExecutor { - #[try_stream(boxed, ok = Tuple, error = DatabaseError)] - pub async fn _execute(self, transaction: &T) { - let HashAggExecutor { - agg_calls, - groupby_exprs, - mut input, - } = self; - - let mut agg_status = - HashAggStatus::new(input.output_schema().clone(), agg_calls, groupby_exprs); - - #[for_await] - for tuple in build_read(input, transaction) { - agg_status.update(tuple?)?; - } - - for tuple in agg_status.as_tuples()? { - yield tuple; - } - } -} - #[cfg(test)] mod test { use crate::catalog::{ColumnCatalog, ColumnDesc}; use crate::errors::DatabaseError; - use crate::execution::volcano::dql::aggregate::hash_agg::HashAggExecutor; - use crate::execution::volcano::dql::test::build_integers; - use crate::execution::volcano::{try_collect, ReadExecutor}; + use crate::execution::dql::aggregate::hash_agg::HashAggExecutor; + use crate::execution::dql::test::build_integers; + use crate::execution::{try_collect, ReadExecutor}; use crate::expression::agg::AggKind; use crate::expression::ScalarExpression; use crate::planner::operator::aggregate::AggregateOperator; use crate::planner::operator::values::ValuesOperator; use crate::planner::operator::Operator; use crate::planner::LogicalPlan; - use crate::storage::kipdb::KipStorage; + use crate::storage::rocksdb::RocksStorage; use crate::storage::Storage; use crate::types::tuple::create_table; use crate::types::value::DataValue; @@ -174,11 +175,11 @@ mod test { use std::sync::Arc; use tempfile::TempDir; - #[tokio::test] - async fn test_hash_agg() -> Result<(), DatabaseError> { + #[test] + fn test_hash_agg() -> Result<(), DatabaseError> { let temp_dir = TempDir::new().expect("unable to create temporary working directory"); - let storage = KipStorage::new(temp_dir.path()).await.unwrap(); - let transaction = storage.transaction().await?; + let storage = RocksStorage::new(temp_dir.path()).unwrap(); + let transaction = storage.transaction()?; let desc = ColumnDesc::new(LogicalType::Integer, false, false, None); let t1_schema = Arc::new(vec![ @@ -229,9 +230,7 @@ mod test { _output_schema_ref: None, }; - let tuples = - try_collect(&mut HashAggExecutor::from((operator, input)).execute(&transaction)) - .await?; + let tuples = try_collect(HashAggExecutor::from((operator, input)).execute(&transaction))?; println!( "hash_agg_test: \n{}", diff --git a/src/execution/volcano/dql/aggregate/min_max.rs b/src/execution/dql/aggregate/min_max.rs similarity index 96% rename from src/execution/volcano/dql/aggregate/min_max.rs rename to src/execution/dql/aggregate/min_max.rs index 14307449..39f64278 100644 --- a/src/execution/volcano/dql/aggregate/min_max.rs +++ b/src/execution/dql/aggregate/min_max.rs @@ -1,5 +1,5 @@ use crate::errors::DatabaseError; -use crate::execution::volcano::dql::aggregate::Accumulator; +use crate::execution::dql::aggregate::Accumulator; use crate::expression::BinaryOperator; use crate::types::evaluator::EvaluatorFactory; use crate::types::value::{DataValue, ValueRef}; diff --git a/src/execution/volcano/dql/aggregate/mod.rs b/src/execution/dql/aggregate/mod.rs similarity index 86% rename from src/execution/volcano/dql/aggregate/mod.rs rename to src/execution/dql/aggregate/mod.rs index 46b27ce6..28af5aee 100644 --- a/src/execution/volcano/dql/aggregate/mod.rs +++ b/src/execution/dql/aggregate/mod.rs @@ -6,12 +6,12 @@ pub mod simple_agg; mod sum; use crate::errors::DatabaseError; -use crate::execution::volcano::dql::aggregate::avg::AvgAccumulator; -use crate::execution::volcano::dql::aggregate::count::{ +use crate::execution::dql::aggregate::avg::AvgAccumulator; +use crate::execution::dql::aggregate::count::{ CountAccumulator, DistinctCountAccumulator, }; -use crate::execution::volcano::dql::aggregate::min_max::MinMaxAccumulator; -use crate::execution::volcano::dql::aggregate::sum::{DistinctSumAccumulator, SumAccumulator}; +use crate::execution::dql::aggregate::min_max::MinMaxAccumulator; +use crate::execution::dql::aggregate::sum::{DistinctSumAccumulator, SumAccumulator}; use crate::expression::agg::AggKind; use crate::expression::ScalarExpression; use crate::types::value::ValueRef; diff --git a/src/execution/dql/aggregate/simple_agg.rs b/src/execution/dql/aggregate/simple_agg.rs new file mode 100644 index 00000000..ca7c4f9f --- /dev/null +++ b/src/execution/dql/aggregate/simple_agg.rs @@ -0,0 +1,65 @@ +use crate::execution::dql::aggregate::create_accumulators; +use crate::execution::{build_read, Executor, ReadExecutor}; +use crate::expression::ScalarExpression; +use crate::planner::operator::aggregate::AggregateOperator; +use crate::planner::LogicalPlan; +use crate::storage::Transaction; +use crate::throw; +use crate::types::tuple::Tuple; +use crate::types::value::ValueRef; +use itertools::Itertools; +use std::ops::Coroutine; +use std::ops::CoroutineState; +use std::pin::Pin; + +pub struct SimpleAggExecutor { + agg_calls: Vec, + input: LogicalPlan, +} + +impl From<(AggregateOperator, LogicalPlan)> for SimpleAggExecutor { + fn from( + (AggregateOperator { agg_calls, .. }, input): (AggregateOperator, LogicalPlan), + ) -> Self { + SimpleAggExecutor { agg_calls, input } + } +} + +impl<'a, T: Transaction + 'a> ReadExecutor<'a, T> for SimpleAggExecutor { + fn execute(self, transaction: &'a T) -> Executor<'a> { + Box::new( + #[coroutine] + move || { + let SimpleAggExecutor { + agg_calls, + mut input, + } = self; + + let mut accs = throw!(create_accumulators(&agg_calls)); + let schema = input.output_schema().clone(); + + let mut coroutine = build_read(input, transaction); + + while let CoroutineState::Yielded(tuple) = Pin::new(&mut coroutine).resume(()) { + let tuple = throw!(tuple); + + let values: Vec = throw!(agg_calls + .iter() + .map(|expr| match expr { + ScalarExpression::AggCall { args, .. } => args[0].eval(&tuple, &schema), + _ => unreachable!(), + }) + .try_collect()); + + for (acc, value) in accs.iter_mut().zip_eq(values.iter()) { + throw!(acc.update_value(value)); + } + } + let values: Vec = + throw!(accs.into_iter().map(|acc| acc.evaluate()).try_collect()); + + yield Ok(Tuple { id: None, values }); + }, + ) + } +} diff --git a/src/execution/volcano/dql/aggregate/sum.rs b/src/execution/dql/aggregate/sum.rs similarity index 96% rename from src/execution/volcano/dql/aggregate/sum.rs rename to src/execution/dql/aggregate/sum.rs index 938facb1..ae67a556 100644 --- a/src/execution/volcano/dql/aggregate/sum.rs +++ b/src/execution/dql/aggregate/sum.rs @@ -1,5 +1,5 @@ use crate::errors::DatabaseError; -use crate::execution::volcano::dql::aggregate::Accumulator; +use crate::execution::dql::aggregate::Accumulator; use crate::expression::BinaryOperator; use crate::types::evaluator::{BinaryEvaluatorBox, EvaluatorFactory}; use crate::types::value::{DataValue, ValueRef}; diff --git a/src/execution/dql/describe.rs b/src/execution/dql/describe.rs new file mode 100644 index 00000000..e15fc690 --- /dev/null +++ b/src/execution/dql/describe.rs @@ -0,0 +1,102 @@ +use crate::catalog::{ColumnCatalog, TableName}; +use crate::execution::DatabaseError; +use crate::execution::{Executor, ReadExecutor}; +use crate::planner::operator::describe::DescribeOperator; +use crate::storage::Transaction; +use crate::throw; +use crate::types::tuple::Tuple; +use crate::types::value::{DataValue, Utf8Type, ValueRef}; +use lazy_static::lazy_static; +use sqlparser::ast::CharLengthUnits; +use std::sync::Arc; + +lazy_static! { + static ref PRIMARY_KEY_TYPE: ValueRef = Arc::new(DataValue::Utf8 { + value: Some(String::from("PRIMARY")), + ty: Utf8Type::Variable(None), + unit: CharLengthUnits::Characters + }); + static ref UNIQUE_KEY_TYPE: ValueRef = Arc::new(DataValue::Utf8 { + value: Some(String::from("UNIQUE")), + ty: Utf8Type::Variable(None), + unit: CharLengthUnits::Characters + }); + static ref EMPTY_KEY_TYPE: ValueRef = Arc::new(DataValue::Utf8 { + value: Some(String::from("EMPTY")), + ty: Utf8Type::Variable(None), + unit: CharLengthUnits::Characters + }); +} + +pub struct Describe { + table_name: TableName, +} + +impl From for Describe { + fn from(op: DescribeOperator) -> Self { + Describe { + table_name: op.table_name, + } + } +} + +impl<'a, T: Transaction + 'a> ReadExecutor<'a, T> for Describe { + fn execute(self, transaction: &'a T) -> Executor<'a> { + Box::new( + #[coroutine] + move || { + let table = throw!(transaction + .table(self.table_name.clone()) + .ok_or(DatabaseError::TableNotFound)); + let key_fn = |column: &ColumnCatalog| { + if column.desc.is_primary { + PRIMARY_KEY_TYPE.clone() + } else if column.desc.is_unique { + UNIQUE_KEY_TYPE.clone() + } else { + EMPTY_KEY_TYPE.clone() + } + }; + + for column in table.columns() { + let datatype = column.datatype(); + let default = column + .desc + .default + .as_ref() + .map(|expr| format!("{}", expr)) + .unwrap_or_else(|| "null".to_string()); + let values = vec![ + Arc::new(DataValue::Utf8 { + value: Some(column.name().to_string()), + ty: Utf8Type::Variable(None), + unit: CharLengthUnits::Characters, + }), + Arc::new(DataValue::Utf8 { + value: Some(datatype.to_string()), + ty: Utf8Type::Variable(None), + unit: CharLengthUnits::Characters, + }), + Arc::new(DataValue::Utf8 { + value: datatype.raw_len().map(|len| len.to_string()), + ty: Utf8Type::Variable(None), + unit: CharLengthUnits::Characters, + }), + Arc::new(DataValue::Utf8 { + value: Some(column.nullable.to_string()), + ty: Utf8Type::Variable(None), + unit: CharLengthUnits::Characters, + }), + key_fn(column), + Arc::new(DataValue::Utf8 { + value: Some(default), + ty: Utf8Type::Variable(None), + unit: CharLengthUnits::Characters, + }), + ]; + yield Ok(Tuple { id: None, values }); + } + }, + ) + } +} diff --git a/src/execution/dql/dummy.rs b/src/execution/dql/dummy.rs new file mode 100644 index 00000000..d3b67740 --- /dev/null +++ b/src/execution/dql/dummy.rs @@ -0,0 +1,19 @@ +use crate::execution::{Executor, ReadExecutor}; +use crate::storage::Transaction; +use crate::types::tuple::Tuple; + +pub struct Dummy {} + +impl<'a, T: Transaction + 'a> ReadExecutor<'a, T> for Dummy { + fn execute(self, _: &T) -> Executor<'a> { + Box::new( + #[coroutine] + move || { + yield Ok(Tuple { + id: None, + values: vec![], + }); + }, + ) + } +} diff --git a/src/execution/dql/explain.rs b/src/execution/dql/explain.rs new file mode 100644 index 00000000..b3a025c6 --- /dev/null +++ b/src/execution/dql/explain.rs @@ -0,0 +1,34 @@ +use crate::execution::{Executor, ReadExecutor}; +use crate::planner::LogicalPlan; +use crate::storage::Transaction; +use crate::types::tuple::Tuple; +use crate::types::value::{DataValue, Utf8Type}; +use sqlparser::ast::CharLengthUnits; +use std::sync::Arc; + +pub struct Explain { + plan: LogicalPlan, +} + +impl From for Explain { + fn from(plan: LogicalPlan) -> Self { + Explain { plan } + } +} + +impl<'a, T: Transaction + 'a> ReadExecutor<'a, T> for Explain { + fn execute(self, _: &T) -> Executor<'a> { + Box::new( + #[coroutine] + move || { + let values = vec![Arc::new(DataValue::Utf8 { + value: Some(self.plan.explain(0)), + ty: Utf8Type::Variable(None), + unit: CharLengthUnits::Characters, + })]; + + yield Ok(Tuple { id: None, values }); + }, + ) + } +} diff --git a/src/execution/dql/filter.rs b/src/execution/dql/filter.rs new file mode 100644 index 00000000..42cf349a --- /dev/null +++ b/src/execution/dql/filter.rs @@ -0,0 +1,46 @@ +use crate::execution::{build_read, Executor, ReadExecutor}; +use crate::expression::ScalarExpression; +use crate::planner::operator::filter::FilterOperator; +use crate::planner::LogicalPlan; +use crate::storage::Transaction; +use crate::throw; +use std::ops::Coroutine; +use std::ops::CoroutineState; +use std::pin::Pin; + +pub struct Filter { + predicate: ScalarExpression, + input: LogicalPlan, +} + +impl From<(FilterOperator, LogicalPlan)> for Filter { + fn from((FilterOperator { predicate, .. }, input): (FilterOperator, LogicalPlan)) -> Self { + Filter { predicate, input } + } +} + +impl<'a, T: Transaction + 'a> ReadExecutor<'a, T> for Filter { + fn execute(self, transaction: &'a T) -> Executor<'a> { + Box::new( + #[coroutine] + move || { + let Filter { + predicate, + mut input, + } = self; + + let schema = input.output_schema().clone(); + + let mut coroutine = build_read(input, transaction); + + while let CoroutineState::Yielded(tuple) = Pin::new(&mut coroutine).resume(()) { + let tuple = throw!(tuple); + + if throw!(throw!(predicate.eval(&tuple, &schema)).is_true()) { + yield Ok(tuple); + } + } + }, + ) + } +} diff --git a/src/execution/dql/index_scan.rs b/src/execution/dql/index_scan.rs new file mode 100644 index 00000000..1f50cf0e --- /dev/null +++ b/src/execution/dql/index_scan.rs @@ -0,0 +1,51 @@ +use crate::execution::{Executor, ReadExecutor}; +use crate::expression::range_detacher::Range; +use crate::planner::operator::scan::ScanOperator; +use crate::storage::{Iter, Transaction}; +use crate::throw; +use crate::types::index::IndexMetaRef; + +pub(crate) struct IndexScan { + op: ScanOperator, + index_by: IndexMetaRef, + ranges: Vec, +} + +impl From<(ScanOperator, IndexMetaRef, Range)> for IndexScan { + fn from((op, index_by, range): (ScanOperator, IndexMetaRef, Range)) -> Self { + let ranges = match range { + Range::SortedRanges(ranges) => ranges, + range => vec![range], + }; + + IndexScan { + op, + index_by, + ranges, + } + } +} + +impl<'a, T: Transaction + 'a> ReadExecutor<'a, T> for IndexScan { + fn execute(self, transaction: &'a T) -> Executor<'a> { + Box::new( + #[coroutine] + move || { + let ScanOperator { + table_name, + columns, + limit, + .. + } = self.op; + + let mut iter = transaction + .read_by_index(table_name, limit, columns, self.index_by, self.ranges) + .unwrap(); + + while let Some(tuple) = throw!(iter.next_tuple()) { + yield Ok(tuple); + } + }, + ) + } +} diff --git a/src/execution/volcano/dql/join/hash_join.rs b/src/execution/dql/join/hash_join.rs similarity index 62% rename from src/execution/volcano/dql/join/hash_join.rs rename to src/execution/dql/join/hash_join.rs index 46d52808..b91602c7 100644 --- a/src/execution/volcano/dql/join/hash_join.rs +++ b/src/execution/dql/join/hash_join.rs @@ -1,17 +1,20 @@ use crate::catalog::{ColumnCatalog, ColumnRef}; use crate::errors::DatabaseError; -use crate::execution::volcano::dql::join::joins_nullable; -use crate::execution::volcano::{build_read, BoxedExecutor, ReadExecutor}; +use crate::execution::dql::join::joins_nullable; +use crate::execution::{build_read, Executor, ReadExecutor}; use crate::expression::ScalarExpression; use crate::planner::operator::join::{JoinCondition, JoinOperator, JoinType}; use crate::planner::LogicalPlan; use crate::storage::Transaction; +use crate::throw; use crate::types::tuple::{Schema, SchemaRef, Tuple}; use crate::types::value::{DataValue, ValueRef, NULL_VALUE}; +use crate::utils::bit_vector::BitVector; use ahash::HashMap; -use futures_async_stream::try_stream; use itertools::Itertools; -use kip_db::kernel::utils::bloom_filter::BitVector; +use std::ops::Coroutine; +use std::ops::CoroutineState; +use std::pin::Pin; use std::sync::Arc; pub struct HashJoin { @@ -38,9 +41,64 @@ impl From<(JoinOperator, LogicalPlan, LogicalPlan)> for HashJoin { } } -impl ReadExecutor for HashJoin { - fn execute(self, transaction: &T) -> BoxedExecutor { - self._execute(transaction) +impl<'a, T: Transaction + 'a> ReadExecutor<'a, T> for HashJoin { + fn execute(self, transaction: &'a T) -> Executor<'a> { + Box::new( + #[coroutine] + move || { + let HashJoin { + on, + ty, + mut left_input, + mut right_input, + } = self; + let mut join_status = HashJoinStatus::new( + on, + ty, + left_input.output_schema(), + right_input.output_schema(), + ); + let join_status_ptr: *mut HashJoinStatus = &mut join_status; + + // build phase: + // 1.construct hashtable, one hash key may contains multiple rows indices. + // 2.merged all left tuples. + let mut coroutine = build_read(left_input, transaction); + + while let CoroutineState::Yielded(tuple) = Pin::new(&mut coroutine).resume(()) { + let tuple: Tuple = throw!(tuple); + + throw!(unsafe { (*join_status_ptr).left_build(tuple) }); + } + + // probe phase + let mut coroutine = build_read(right_input, transaction); + + while let CoroutineState::Yielded(tuple) = Pin::new(&mut coroutine).resume(()) { + let tuple: Tuple = throw!(tuple); + + unsafe { + let mut coroutine = (*join_status_ptr).right_probe(tuple); + + while let CoroutineState::Yielded(tuple) = + Pin::new(&mut coroutine).resume(()) + { + yield tuple; + } + } + } + + unsafe { + if let Some(mut coroutine) = (*join_status_ptr).build_drop() { + while let CoroutineState::Yielded(tuple) = + Pin::new(&mut coroutine).resume(()) + { + yield tuple; + } + }; + } + }, + ) } } @@ -127,81 +185,97 @@ impl HashJoinStatus { Ok(()) } - #[try_stream(boxed, ok = Tuple, error = DatabaseError)] - #[allow(unused_assignments)] - pub(crate) async fn right_probe(&mut self, tuple: Tuple) { - let HashJoinStatus { - on_right_keys, - full_schema_ref, - build_map, - ty, - filter, - left_schema_len, - .. - } = self; - - let right_cols_len = tuple.values.len(); - let values = Self::eval_keys(on_right_keys, &tuple, &full_schema_ref[*left_schema_len..])?; - let has_null = values.iter().any(|value| value.is_null()); - - if let (false, Some((tuples, is_used, is_filtered))) = - (has_null, build_map.get_mut(&values)) - { - let mut bits_option = None; - *is_used = true; - - match ty { - JoinType::LeftSemi => { - if *is_filtered { - return Ok(()); - } else { - bits_option = Some(BitVector::new(tuples.len())); - } - } - JoinType::LeftAnti => return Ok(()), - _ => (), - } - for (i, Tuple { values, .. }) in tuples.iter().enumerate() { - let full_values = values - .iter() - .cloned() - .chain(tuple.values.clone()) - .collect_vec(); - let tuple = Tuple { - id: None, - values: full_values, - }; - if let Some(tuple) = - Self::filter(tuple, full_schema_ref, filter, ty, *left_schema_len)? + pub(crate) fn right_probe(&mut self, tuple: Tuple) -> Executor { + Box::new( + #[coroutine] + move || { + let HashJoinStatus { + on_right_keys, + full_schema_ref, + build_map, + ty, + filter, + left_schema_len, + .. + } = self; + + let right_cols_len = tuple.values.len(); + let values = throw!(Self::eval_keys( + on_right_keys, + &tuple, + &full_schema_ref[*left_schema_len..] + )); + let has_null = values.iter().any(|value| value.is_null()); + + if let (false, Some((tuples, is_used, is_filtered))) = + (has_null, build_map.get_mut(&values)) { - if let Some(bits) = bits_option.as_mut() { - bits.set_bit(i, true); - } else { - yield tuple; + let mut bits_option = None; + *is_used = true; + + match ty { + JoinType::LeftSemi => { + if *is_filtered { + return; + } else { + bits_option = Some(BitVector::new(tuples.len())); + } + } + JoinType::LeftAnti => return, + _ => (), + } + for (i, Tuple { values, .. }) in tuples.iter().enumerate() { + let full_values = values + .iter() + .cloned() + .chain(tuple.values.clone()) + .collect_vec(); + let tuple = Tuple { + id: None, + values: full_values, + }; + if let Some(tuple) = throw!(Self::filter( + tuple, + full_schema_ref, + filter, + ty, + *left_schema_len + )) { + if let Some(bits) = bits_option.as_mut() { + bits.set_bit(i, true); + } else { + yield Ok(tuple); + } + } + } + if let Some(bits) = bits_option { + let mut cnt = 0; + tuples.retain(|_| { + let res = bits.get_bit(cnt); + cnt += 1; + res + }); + *is_filtered = true + } + } else if matches!(ty, JoinType::RightOuter | JoinType::Full) { + let empty_len = full_schema_ref.len() - right_cols_len; + let values = (0..empty_len) + .map(|_| NULL_VALUE.clone()) + .chain(tuple.values) + .collect_vec(); + let tuple = Tuple { id: None, values }; + if let Some(tuple) = throw!(Self::filter( + tuple, + full_schema_ref, + filter, + ty, + *left_schema_len + )) { + yield Ok(tuple); } } - } - if let Some(bits) = bits_option { - let mut cnt = 0; - tuples.retain(|_| { - let res = bits.get_bit(cnt); - cnt += 1; - res - }); - *is_filtered = true - } - } else if matches!(ty, JoinType::RightOuter | JoinType::Full) { - let empty_len = full_schema_ref.len() - right_cols_len; - let values = (0..empty_len) - .map(|_| NULL_VALUE.clone()) - .chain(tuple.values) - .collect_vec(); - let tuple = Tuple { id: None, values }; - if let Some(tuple) = Self::filter(tuple, full_schema_ref, filter, ty, *left_schema_len)? - { - yield tuple; - } - } + }, + ) } pub(crate) fn filter( @@ -238,7 +312,7 @@ impl HashJoinStatus { Ok(Some(tuple)) } - pub(crate) fn build_drop(&mut self) -> Option { + pub(crate) fn build_drop(&mut self) -> Option { let HashJoinStatus { full_schema_ref, build_map, @@ -263,54 +337,67 @@ impl HashJoinStatus { } } - #[try_stream(boxed, ok = Tuple, error = DatabaseError)] - async fn right_null_tuple<'a>( + fn right_null_tuple<'a>( build_map: &'a mut HashMap, (Vec, bool, bool)>, schema: &'a Schema, - ) { - for (_, (left_tuples, is_used, _)) in build_map.drain() { - if is_used { - continue; - } - for mut tuple in left_tuples { - while tuple.values.len() != schema.len() { - tuple.values.push(NULL_VALUE.clone()); + ) -> Executor<'a> { + Box::new( + #[coroutine] + move || { + for (_, (left_tuples, is_used, _)) in build_map.drain() { + if is_used { + continue; + } + for mut tuple in left_tuples { + while tuple.values.len() != schema.len() { + tuple.values.push(NULL_VALUE.clone()); + } + yield Ok(tuple); + } } - yield tuple; - } - } + }, + ) } - #[try_stream(boxed, ok = Tuple, error = DatabaseError)] - async fn one_side_tuple<'a>( + fn one_side_tuple<'a>( build_map: &'a mut HashMap, (Vec, bool, bool)>, schema: &'a Schema, filter: &'a Option, join_ty: &'a JoinType, left_schema_len: usize, - ) { - let is_left_semi = matches!(join_ty, JoinType::LeftSemi); - - for (_, (left_tuples, mut is_used, is_filtered)) in build_map.drain() { - if is_left_semi { - is_used = !is_used; - } - if is_used { - continue; - } - if is_filtered { - for tuple in left_tuples { - yield tuple; - } - continue; - } - for tuple in left_tuples { - if let Some(tuple) = Self::filter(tuple, schema, filter, join_ty, left_schema_len)? - { - yield tuple; + ) -> Executor<'a> { + Box::new( + #[coroutine] + move || { + let is_left_semi = matches!(join_ty, JoinType::LeftSemi); + + for (_, (left_tuples, mut is_used, is_filtered)) in build_map.drain() { + if is_left_semi { + is_used = !is_used; + } + if is_used { + continue; + } + if is_filtered { + for tuple in left_tuples { + yield Ok(tuple); + } + continue; + } + for tuple in left_tuples { + if let Some(tuple) = throw!(Self::filter( + tuple, + schema, + filter, + join_ty, + left_schema_len + )) { + yield Ok(tuple); + } + } } - } - } + }, + ) } fn eval_keys( @@ -327,66 +414,19 @@ impl HashJoinStatus { } } -impl HashJoin { - #[try_stream(boxed, ok = Tuple, error = DatabaseError)] - pub async fn _execute(self, transaction: &T) { - let HashJoin { - on, - ty, - mut left_input, - mut right_input, - } = self; - - let mut join_status = HashJoinStatus::new( - on, - ty, - left_input.output_schema(), - right_input.output_schema(), - ); - - // build phase: - // 1.construct hashtable, one hash key may contains multiple rows indices. - // 2.merged all left tuples. - #[for_await] - for tuple in build_read(left_input, transaction) { - let tuple: Tuple = tuple?; - - join_status.left_build(tuple)?; - } - - // probe phase - #[for_await] - for tuple in build_read(right_input, transaction) { - let tuple: Tuple = tuple?; - - #[for_await] - for tuple in join_status.right_probe(tuple) { - yield tuple?; - } - } - - if let Some(stream) = join_status.build_drop() { - #[for_await] - for tuple in stream { - yield tuple?; - } - }; - } -} - #[cfg(test)] mod test { use crate::catalog::{ColumnCatalog, ColumnDesc}; use crate::errors::DatabaseError; - use crate::execution::volcano::dql::join::hash_join::HashJoin; - use crate::execution::volcano::dql::test::build_integers; - use crate::execution::volcano::{try_collect, ReadExecutor}; + use crate::execution::dql::join::hash_join::HashJoin; + use crate::execution::dql::test::build_integers; + use crate::execution::{try_collect, ReadExecutor}; use crate::expression::ScalarExpression; use crate::planner::operator::join::{JoinCondition, JoinOperator, JoinType}; use crate::planner::operator::values::ValuesOperator; use crate::planner::operator::Operator; use crate::planner::LogicalPlan; - use crate::storage::kipdb::KipStorage; + use crate::storage::rocksdb::RocksStorage; use crate::storage::Storage; use crate::types::value::DataValue; use crate::types::LogicalType; @@ -477,11 +517,11 @@ mod test { (on_keys, values_t1, values_t2) } - #[tokio::test] - async fn test_inner_join() -> Result<(), DatabaseError> { + #[test] + fn test_inner_join() -> Result<(), DatabaseError> { let temp_dir = TempDir::new().expect("unable to create temporary working directory"); - let storage = KipStorage::new(temp_dir.path()).await?; - let transaction = storage.transaction().await?; + let storage = RocksStorage::new(temp_dir.path())?; + let transaction = storage.transaction()?; let (keys, left, right) = build_join_values(); let op = JoinOperator { @@ -491,8 +531,8 @@ mod test { }, join_type: JoinType::Inner, }; - let mut executor = HashJoin::from((op, left, right)).execute(&transaction); - let tuples = try_collect(&mut executor).await?; + let executor = HashJoin::from((op, left, right)).execute(&transaction); + let tuples = try_collect(executor)?; assert_eq!(tuples.len(), 3); @@ -512,11 +552,11 @@ mod test { Ok(()) } - #[tokio::test] - async fn test_left_join() -> Result<(), DatabaseError> { + #[test] + fn test_left_join() -> Result<(), DatabaseError> { let temp_dir = TempDir::new().expect("unable to create temporary working directory"); - let storage = KipStorage::new(temp_dir.path()).await?; - let transaction = storage.transaction().await?; + let storage = RocksStorage::new(temp_dir.path())?; + let transaction = storage.transaction()?; let (keys, left, right) = build_join_values(); let op = JoinOperator { @@ -529,7 +569,7 @@ mod test { //Outer { let executor = HashJoin::from((op.clone(), left.clone(), right.clone())); - let tuples = try_collect(&mut executor.execute(&transaction)).await?; + let tuples = try_collect(executor.execute(&transaction))?; assert_eq!(tuples.len(), 4); @@ -554,7 +594,7 @@ mod test { { let mut executor = HashJoin::from((op.clone(), left.clone(), right.clone())); executor.ty = JoinType::LeftSemi; - let mut tuples = try_collect(&mut executor.execute(&transaction)).await?; + let mut tuples = try_collect(executor.execute(&transaction))?; assert_eq!(tuples.len(), 2); tuples.sort_by_key(|tuple| { @@ -576,7 +616,7 @@ mod test { { let mut executor = HashJoin::from((op, left, right)); executor.ty = JoinType::LeftAnti; - let tuples = try_collect(&mut executor.execute(&transaction)).await?; + let tuples = try_collect(executor.execute(&transaction))?; assert_eq!(tuples.len(), 1); assert_eq!( @@ -588,11 +628,11 @@ mod test { Ok(()) } - #[tokio::test] - async fn test_right_join() -> Result<(), DatabaseError> { + #[test] + fn test_right_join() -> Result<(), DatabaseError> { let temp_dir = TempDir::new().expect("unable to create temporary working directory"); - let storage = KipStorage::new(temp_dir.path()).await?; - let transaction = storage.transaction().await?; + let storage = RocksStorage::new(temp_dir.path())?; + let transaction = storage.transaction()?; let (keys, left, right) = build_join_values(); let op = JoinOperator { @@ -602,8 +642,8 @@ mod test { }, join_type: JoinType::RightOuter, }; - let mut executor = HashJoin::from((op, left, right)).execute(&transaction); - let tuples = try_collect(&mut executor).await?; + let executor = HashJoin::from((op, left, right)).execute(&transaction); + let tuples = try_collect(executor)?; assert_eq!(tuples.len(), 4); @@ -627,11 +667,11 @@ mod test { Ok(()) } - #[tokio::test] - async fn test_full_join() -> Result<(), DatabaseError> { + #[test] + fn test_full_join() -> Result<(), DatabaseError> { let temp_dir = TempDir::new().expect("unable to create temporary working directory"); - let storage = KipStorage::new(temp_dir.path()).await?; - let transaction = storage.transaction().await?; + let storage = RocksStorage::new(temp_dir.path())?; + let transaction = storage.transaction()?; let (keys, left, right) = build_join_values(); let op = JoinOperator { @@ -641,8 +681,8 @@ mod test { }, join_type: JoinType::Full, }; - let mut executor = HashJoin::from((op, left, right)).execute(&transaction); - let tuples = try_collect(&mut executor).await?; + let executor = HashJoin::from((op, left, right)).execute(&transaction); + let tuples = try_collect(executor)?; assert_eq!(tuples.len(), 5); diff --git a/src/execution/volcano/dql/join/mod.rs b/src/execution/dql/join/mod.rs similarity index 100% rename from src/execution/volcano/dql/join/mod.rs rename to src/execution/dql/join/mod.rs diff --git a/src/execution/volcano/dql/join/nested_loop_join.rs b/src/execution/dql/join/nested_loop_join.rs similarity index 70% rename from src/execution/volcano/dql/join/nested_loop_join.rs rename to src/execution/dql/join/nested_loop_join.rs index fd8e7627..00da70c1 100644 --- a/src/execution/volcano/dql/join/nested_loop_join.rs +++ b/src/execution/dql/join/nested_loop_join.rs @@ -2,22 +2,23 @@ //! [`JoinType::LeftSemi`], [`JoinType::LeftAnti`], [`JoinType::RightOuter`], [`JoinType::Cross`]. //! But [`JoinType::Full`] is not supported. -use std::sync::Arc; - +use super::joins_nullable; use crate::catalog::{ColumnCatalog, ColumnRef}; use crate::errors::DatabaseError; -use crate::execution::volcano::dql::projection::Projection; -use crate::execution::volcano::{build_read, BoxedExecutor, ReadExecutor}; +use crate::execution::dql::projection::Projection; +use crate::execution::{build_read, Executor, ReadExecutor}; use crate::expression::ScalarExpression; use crate::planner::operator::join::{JoinCondition, JoinOperator, JoinType}; use crate::planner::LogicalPlan; use crate::storage::Transaction; +use crate::throw; use crate::types::tuple::{Schema, SchemaRef, Tuple}; use crate::types::value::{DataValue, NULL_VALUE}; -use futures_async_stream::try_stream; use itertools::Itertools; - -use super::joins_nullable; +use std::ops::Coroutine; +use std::ops::CoroutineState; +use std::pin::Pin; +use std::sync::Arc; /// Equivalent condition struct EqualCondition { @@ -124,101 +125,121 @@ impl From<(JoinOperator, LogicalPlan, LogicalPlan)> for NestedLoopJoin { } } -impl ReadExecutor for NestedLoopJoin { - fn execute(self, transaction: &T) -> BoxedExecutor { - self._execute(transaction) - } -} - -impl NestedLoopJoin { - #[try_stream(boxed, ok = Tuple, error = DatabaseError)] - pub async fn _execute(self, transaction: &T) { - let NestedLoopJoin { - ty, - left_input, - right_input, - output_schema_ref, - filter, - eq_cond, - .. - } = self; - - if matches!(self.ty, JoinType::Full) { - unreachable!("{} cannot be handled in nested loop join", self.ty) - } - - let right_schema_len = eq_cond.right_schema.len(); - - #[for_await] - for tuple in build_read(left_input, transaction) { - let left_tuple: Tuple = tuple?; - let mut has_matched = false; - - #[for_await] - for right_tuple in build_read(right_input.clone(), transaction) { - let right_tuple: Tuple = right_tuple?; - - let tuple = match (filter.as_ref(), eq_cond.equals(&left_tuple, &right_tuple)?) { - (None, true) if matches!(ty, JoinType::RightOuter) => { - Self::emit_tuple(&right_tuple, &left_tuple, ty, true) - } - (None, true) => Self::emit_tuple(&left_tuple, &right_tuple, ty, true), - (Some(filter), true) => { - let new_tuple = Self::merge_tuple(&left_tuple, &right_tuple, &ty); - let value = filter.eval(&new_tuple, &output_schema_ref)?; - match value.as_ref() { - DataValue::Boolean(Some(true)) => { - let tuple = match ty { - JoinType::LeftAnti => None, - JoinType::LeftSemi if has_matched => None, - JoinType::RightOuter => { - Self::emit_tuple(&right_tuple, &left_tuple, ty, true) +impl<'a, T: Transaction + 'a> ReadExecutor<'a, T> for NestedLoopJoin { + fn execute(self, transaction: &'a T) -> Executor<'a> { + Box::new( + #[coroutine] + move || { + let NestedLoopJoin { + ty, + left_input, + right_input, + output_schema_ref, + filter, + eq_cond, + .. + } = self; + + if matches!(self.ty, JoinType::Full) { + unreachable!("{} cannot be handled in nested loop join", self.ty) + } + let right_schema_len = eq_cond.right_schema.len(); + let mut left_coroutine = build_read(left_input, transaction); + + while let CoroutineState::Yielded(left_tuple) = + Pin::new(&mut left_coroutine).resume(()) + { + let left_tuple: Tuple = throw!(left_tuple); + let mut has_matched = false; + + let mut right_coroutine = build_read(right_input.clone(), transaction); + + while let CoroutineState::Yielded(right_tuple) = + Pin::new(&mut right_coroutine).resume(()) + { + let right_tuple: Tuple = throw!(right_tuple); + + let tuple = match ( + filter.as_ref(), + throw!(eq_cond.equals(&left_tuple, &right_tuple)), + ) { + (None, true) if matches!(ty, JoinType::RightOuter) => { + Self::emit_tuple(&right_tuple, &left_tuple, ty, true) + } + (None, true) => Self::emit_tuple(&left_tuple, &right_tuple, ty, true), + (Some(filter), true) => { + let new_tuple = Self::merge_tuple(&left_tuple, &right_tuple, &ty); + let value = throw!(filter.eval(&new_tuple, &output_schema_ref)); + match value.as_ref() { + DataValue::Boolean(Some(true)) => { + let tuple = match ty { + JoinType::LeftAnti => None, + JoinType::LeftSemi if has_matched => None, + JoinType::RightOuter => Self::emit_tuple( + &right_tuple, + &left_tuple, + ty, + true, + ), + _ => Self::emit_tuple( + &left_tuple, + &right_tuple, + ty, + true, + ), + }; + has_matched = true; + tuple } - _ => Self::emit_tuple(&left_tuple, &right_tuple, ty, true), - }; - has_matched = true; - tuple + DataValue::Boolean(Some(_) | None) => None, + _ => { + yield Err(DatabaseError::InvalidType); + return; + } + } } - DataValue::Boolean(Some(_) | None) => None, - _ => return Err(DatabaseError::InvalidType), - } - } - _ => None, - }; + _ => None, + }; - if let Some(tuple) = tuple { - yield tuple; - if matches!(ty, JoinType::LeftSemi) { - break; + if let Some(tuple) = tuple { + yield Ok(tuple); + if matches!(ty, JoinType::LeftSemi) { + break; + } + } + if matches!(ty, JoinType::LeftAnti) && has_matched { + break; + } } - } - if matches!(ty, JoinType::LeftAnti) && has_matched { - break; - } - } - // handle no matched tuple case - let tuple = match ty { - JoinType::LeftAnti if !has_matched => Some(left_tuple.clone()), - JoinType::LeftOuter | JoinType::LeftSemi | JoinType::RightOuter if !has_matched => { - let right_tuple = Tuple { - id: None, - values: vec![NULL_VALUE.clone(); right_schema_len], + // handle no matched tuple case + let tuple = match ty { + JoinType::LeftAnti if !has_matched => Some(left_tuple.clone()), + JoinType::LeftOuter | JoinType::LeftSemi | JoinType::RightOuter + if !has_matched => + { + let right_tuple = Tuple { + id: None, + values: vec![NULL_VALUE.clone(); right_schema_len], + }; + if matches!(ty, JoinType::RightOuter) { + Self::emit_tuple(&right_tuple, &left_tuple, ty, false) + } else { + Self::emit_tuple(&left_tuple, &right_tuple, ty, false) + } + } + _ => None, }; - if matches!(ty, JoinType::RightOuter) { - Self::emit_tuple(&right_tuple, &left_tuple, ty, false) - } else { - Self::emit_tuple(&left_tuple, &right_tuple, ty, false) + if let Some(tuple) = tuple { + yield Ok(tuple) } } - _ => None, - }; - if let Some(tuple) = tuple { - yield tuple - } - } + }, + ) } +} +impl NestedLoopJoin { /// Emit a tuple according to the join type. /// /// `left_tuple`: left tuple to be included. @@ -323,12 +344,12 @@ mod test { use super::*; use crate::catalog::{ColumnCatalog, ColumnDesc}; - use crate::execution::volcano::dql::test::build_integers; - use crate::execution::volcano::{try_collect, ReadExecutor}; + use crate::execution::dql::test::build_integers; + use crate::execution::{try_collect, ReadExecutor}; use crate::expression::ScalarExpression; use crate::planner::operator::values::ValuesOperator; use crate::planner::operator::Operator; - use crate::storage::kipdb::KipStorage; + use crate::storage::rocksdb::RocksStorage; use crate::storage::Storage; use crate::types::evaluator::int32::Int32GtBinaryEvaluator; use crate::types::evaluator::BinaryEvaluatorBox; @@ -471,11 +492,11 @@ mod test { assert!(expected.is_empty()); } - #[tokio::test] - async fn test_nested_inner_join() -> Result<(), DatabaseError> { + #[test] + fn test_nested_inner_join() -> Result<(), DatabaseError> { let temp_dir = TempDir::new().expect("unable to create temporary working directory"); - let storage = KipStorage::new(temp_dir.path()).await?; - let transaction = storage.transaction().await?; + let storage = RocksStorage::new(temp_dir.path())?; + let transaction = storage.transaction()?; let (keys, left, right, filter) = build_join_values(true); let op = JoinOperator { on: JoinCondition::On { @@ -484,8 +505,8 @@ mod test { }, join_type: JoinType::Inner, }; - let mut executor = NestedLoopJoin::from((op, left, right)).execute(&transaction); - let tuples = try_collect(&mut executor).await?; + let executor = NestedLoopJoin::from((op, left, right)).execute(&transaction); + let tuples = try_collect(executor)?; let mut expected_set = HashSet::with_capacity(1); let tuple = build_integers(vec![Some(1), Some(2), Some(5), Some(0), Some(2), Some(4)]); @@ -496,11 +517,11 @@ mod test { Ok(()) } - #[tokio::test] - async fn test_nested_left_out_join() -> Result<(), DatabaseError> { + #[test] + fn test_nested_left_out_join() -> Result<(), DatabaseError> { let temp_dir = TempDir::new().expect("unable to create temporary working directory"); - let storage = KipStorage::new(temp_dir.path()).await?; - let transaction = storage.transaction().await?; + let storage = RocksStorage::new(temp_dir.path())?; + let transaction = storage.transaction()?; let (keys, left, right, filter) = build_join_values(true); let op = JoinOperator { on: JoinCondition::On { @@ -509,8 +530,8 @@ mod test { }, join_type: JoinType::LeftOuter, }; - let mut executor = NestedLoopJoin::from((op, left, right)).execute(&transaction); - let tuples = try_collect(&mut executor).await?; + let executor = NestedLoopJoin::from((op, left, right)).execute(&transaction); + let tuples = try_collect(executor)?; assert_eq!( tuples[0].values, @@ -533,11 +554,11 @@ mod test { Ok(()) } - #[tokio::test] - async fn test_nested_cross_join_with_on() -> Result<(), DatabaseError> { + #[test] + fn test_nested_cross_join_with_on() -> Result<(), DatabaseError> { let temp_dir = TempDir::new().expect("unable to create temporary working directory"); - let storage = KipStorage::new(temp_dir.path()).await?; - let transaction = storage.transaction().await?; + let storage = RocksStorage::new(temp_dir.path())?; + let transaction = storage.transaction()?; let (keys, left, right, filter) = build_join_values(true); let op = JoinOperator { on: JoinCondition::On { @@ -546,8 +567,8 @@ mod test { }, join_type: JoinType::Cross, }; - let mut executor = NestedLoopJoin::from((op, left, right)).execute(&transaction); - let tuples = try_collect(&mut executor).await?; + let executor = NestedLoopJoin::from((op, left, right)).execute(&transaction); + let tuples = try_collect(executor)?; let mut expected_set = HashSet::with_capacity(1); @@ -559,11 +580,11 @@ mod test { Ok(()) } - #[tokio::test] - async fn test_nested_cross_join_without_filter() -> Result<(), DatabaseError> { + #[test] + fn test_nested_cross_join_without_filter() -> Result<(), DatabaseError> { let temp_dir = TempDir::new().expect("unable to create temporary working directory"); - let storage = KipStorage::new(temp_dir.path()).await?; - let transaction = storage.transaction().await?; + let storage = RocksStorage::new(temp_dir.path())?; + let transaction = storage.transaction()?; let (keys, left, right, _) = build_join_values(true); let op = JoinOperator { on: JoinCondition::On { @@ -572,8 +593,8 @@ mod test { }, join_type: JoinType::Cross, }; - let mut executor = NestedLoopJoin::from((op, left, right)).execute(&transaction); - let tuples = try_collect(&mut executor).await?; + let executor = NestedLoopJoin::from((op, left, right)).execute(&transaction); + let tuples = try_collect(executor)?; let mut expected_set = HashSet::with_capacity(3); @@ -588,11 +609,11 @@ mod test { Ok(()) } - #[tokio::test] - async fn test_nested_cross_join_without_on() -> Result<(), DatabaseError> { + #[test] + fn test_nested_cross_join_without_on() -> Result<(), DatabaseError> { let temp_dir = TempDir::new().expect("unable to create temporary working directory"); - let storage = KipStorage::new(temp_dir.path()).await?; - let transaction = storage.transaction().await?; + let storage = RocksStorage::new(temp_dir.path())?; + let transaction = storage.transaction()?; let (keys, left, right, _) = build_join_values(false); let op = JoinOperator { on: JoinCondition::On { @@ -601,19 +622,19 @@ mod test { }, join_type: JoinType::Cross, }; - let mut executor = NestedLoopJoin::from((op, left, right)).execute(&transaction); - let tuples = try_collect(&mut executor).await?; + let executor = NestedLoopJoin::from((op, left, right)).execute(&transaction); + let tuples = try_collect(executor)?; assert_eq!(tuples.len(), 16); Ok(()) } - #[tokio::test] - async fn test_nested_left_semi_join() -> Result<(), DatabaseError> { + #[test] + fn test_nested_left_semi_join() -> Result<(), DatabaseError> { let temp_dir = TempDir::new().expect("unable to create temporary working directory"); - let storage = KipStorage::new(temp_dir.path()).await?; - let transaction = storage.transaction().await?; + let storage = RocksStorage::new(temp_dir.path())?; + let transaction = storage.transaction()?; let (keys, left, right, filter) = build_join_values(true); let op = JoinOperator { on: JoinCondition::On { @@ -622,8 +643,8 @@ mod test { }, join_type: JoinType::LeftSemi, }; - let mut executor = NestedLoopJoin::from((op, left, right)).execute(&transaction); - let tuples = try_collect(&mut executor).await?; + let executor = NestedLoopJoin::from((op, left, right)).execute(&transaction); + let tuples = try_collect(executor)?; let mut expected_set = HashSet::with_capacity(1); expected_set.insert(build_integers(vec![Some(1), Some(2), Some(5)])); @@ -633,11 +654,11 @@ mod test { Ok(()) } - #[tokio::test] - async fn test_nested_left_anti_join() -> Result<(), DatabaseError> { + #[test] + fn test_nested_left_anti_join() -> Result<(), DatabaseError> { let temp_dir = TempDir::new().expect("unable to create temporary working directory"); - let storage = KipStorage::new(temp_dir.path()).await?; - let transaction = storage.transaction().await?; + let storage = RocksStorage::new(temp_dir.path())?; + let transaction = storage.transaction()?; let (keys, left, right, filter) = build_join_values(true); let op = JoinOperator { on: JoinCondition::On { @@ -646,8 +667,8 @@ mod test { }, join_type: JoinType::LeftAnti, }; - let mut executor = NestedLoopJoin::from((op, left, right)).execute(&transaction); - let tuples = try_collect(&mut executor).await?; + let executor = NestedLoopJoin::from((op, left, right)).execute(&transaction); + let tuples = try_collect(executor)?; let mut expected_set = HashSet::with_capacity(3); expected_set.insert(build_integers(vec![Some(0), Some(2), Some(4)])); @@ -659,11 +680,11 @@ mod test { Ok(()) } - #[tokio::test] - async fn test_nested_right_out_join() -> Result<(), DatabaseError> { + #[test] + fn test_nested_right_out_join() -> Result<(), DatabaseError> { let temp_dir = TempDir::new().expect("unable to create temporary working directory"); - let storage = KipStorage::new(temp_dir.path()).await?; - let transaction = storage.transaction().await?; + let storage = RocksStorage::new(temp_dir.path())?; + let transaction = storage.transaction()?; let (keys, left, right, filter) = build_join_values(true); let op = JoinOperator { on: JoinCondition::On { @@ -672,8 +693,8 @@ mod test { }, join_type: JoinType::RightOuter, }; - let mut executor = NestedLoopJoin::from((op, left, right)).execute(&transaction); - let tuples = try_collect(&mut executor).await?; + let executor = NestedLoopJoin::from((op, left, right)).execute(&transaction); + let tuples = try_collect(executor)?; let mut expected_set = HashSet::with_capacity(4); let tuple = build_integers(vec![Some(1), Some(2), Some(5), Some(0), Some(2), Some(4)]); diff --git a/src/execution/dql/limit.rs b/src/execution/dql/limit.rs new file mode 100644 index 00000000..d50ba1ff --- /dev/null +++ b/src/execution/dql/limit.rs @@ -0,0 +1,61 @@ +use crate::execution::{build_read, Executor, ReadExecutor}; +use crate::planner::operator::limit::LimitOperator; +use crate::planner::LogicalPlan; +use crate::storage::Transaction; +use std::ops::Coroutine; +use std::ops::CoroutineState; +use std::pin::Pin; + +pub struct Limit { + offset: Option, + limit: Option, + input: LogicalPlan, +} + +impl From<(LimitOperator, LogicalPlan)> for Limit { + fn from((LimitOperator { offset, limit }, input): (LimitOperator, LogicalPlan)) -> Self { + Limit { + offset, + limit, + input, + } + } +} + +impl<'a, T: Transaction + 'a> ReadExecutor<'a, T> for Limit { + fn execute(self, transaction: &'a T) -> Executor<'a> { + Box::new( + #[coroutine] + move || { + let Limit { + offset, + limit, + input, + } = self; + + if limit.is_some() && limit.unwrap() == 0 { + return; + } + + let offset_val = offset.unwrap_or(0); + let offset_limit = offset_val + limit.unwrap_or(1) - 1; + + let mut i = 0; + let mut coroutine = build_read(input, transaction); + + while let CoroutineState::Yielded(tuple) = Pin::new(&mut coroutine).resume(()) { + i += 1; + if i - 1 < offset_val { + continue; + } else if i - 1 > offset_limit { + break; + } + + yield tuple; + } + }, + ) + } +} + +impl Limit {} diff --git a/src/execution/volcano/dql/mod.rs b/src/execution/dql/mod.rs similarity index 100% rename from src/execution/volcano/dql/mod.rs rename to src/execution/dql/mod.rs diff --git a/src/execution/volcano/dql/projection.rs b/src/execution/dql/projection.rs similarity index 53% rename from src/execution/volcano/dql/projection.rs rename to src/execution/dql/projection.rs index f0b84320..da87f0bc 100644 --- a/src/execution/volcano/dql/projection.rs +++ b/src/execution/dql/projection.rs @@ -1,13 +1,16 @@ use crate::catalog::ColumnRef; use crate::errors::DatabaseError; -use crate::execution::volcano::{build_read, BoxedExecutor, ReadExecutor}; +use crate::execution::{build_read, Executor, ReadExecutor}; use crate::expression::ScalarExpression; use crate::planner::operator::project::ProjectOperator; use crate::planner::LogicalPlan; use crate::storage::Transaction; +use crate::throw; use crate::types::tuple::Tuple; use crate::types::value::ValueRef; -use futures_async_stream::try_stream; +use std::ops::Coroutine; +use std::ops::CoroutineState; +use std::pin::Pin; pub struct Projection { exprs: Vec, @@ -20,9 +23,23 @@ impl From<(ProjectOperator, LogicalPlan)> for Projection { } } -impl ReadExecutor for Projection { - fn execute(self, transaction: &T) -> BoxedExecutor { - self._execute(transaction) +impl<'a, T: Transaction + 'a> ReadExecutor<'a, T> for Projection { + fn execute(self, transaction: &'a T) -> Executor<'a> { + Box::new( + #[coroutine] + move || { + let Projection { exprs, mut input } = self; + let schema = input.output_schema().clone(); + let mut coroutine = build_read(input, transaction); + + while let CoroutineState::Yielded(tuple) = Pin::new(&mut coroutine).resume(()) { + let mut tuple = throw!(tuple); + + tuple.values = throw!(Self::projection(&tuple, &exprs, &schema)); + yield Ok(tuple); + } + }, + ) } } @@ -39,18 +56,4 @@ impl Projection { } Ok(values) } - - #[try_stream(boxed, ok = Tuple, error = DatabaseError)] - pub async fn _execute(self, transaction: &T) { - let Projection { exprs, mut input } = self; - let schema = input.output_schema().clone(); - - #[for_await] - for tuple in build_read(input, transaction) { - let mut tuple = tuple?; - - tuple.values = Self::projection(&tuple, &exprs, &schema)?; - yield tuple; - } - } } diff --git a/src/execution/dql/seq_scan.rs b/src/execution/dql/seq_scan.rs new file mode 100644 index 00000000..348387c8 --- /dev/null +++ b/src/execution/dql/seq_scan.rs @@ -0,0 +1,36 @@ +use crate::execution::{Executor, ReadExecutor}; +use crate::planner::operator::scan::ScanOperator; +use crate::storage::{Iter, Transaction}; +use crate::throw; + +pub(crate) struct SeqScan { + op: ScanOperator, +} + +impl From for SeqScan { + fn from(op: ScanOperator) -> Self { + SeqScan { op } + } +} + +impl<'a, T: Transaction + 'a> ReadExecutor<'a, T> for SeqScan { + fn execute(self, transaction: &'a T) -> Executor<'a> { + Box::new( + #[coroutine] + move || { + let ScanOperator { + table_name, + columns, + limit, + .. + } = self.op; + + let mut iter = transaction.read(table_name, limit, columns).unwrap(); + + while let Some(tuple) = throw!(iter.next_tuple()) { + yield Ok(tuple); + } + }, + ) + } +} diff --git a/src/execution/dql/show_table.rs b/src/execution/dql/show_table.rs new file mode 100644 index 00000000..5f654426 --- /dev/null +++ b/src/execution/dql/show_table.rs @@ -0,0 +1,31 @@ +use crate::catalog::TableMeta; +use crate::execution::{Executor, ReadExecutor}; +use crate::storage::Transaction; +use crate::throw; +use crate::types::tuple::Tuple; +use crate::types::value::{DataValue, Utf8Type}; +use sqlparser::ast::CharLengthUnits; +use std::sync::Arc; + +pub struct ShowTables; + +impl<'a, T: Transaction + 'a> ReadExecutor<'a, T> for ShowTables { + fn execute(self, transaction: &'a T) -> Executor<'a> { + Box::new( + #[coroutine] + move || { + let metas = throw!(transaction.table_metas()); + + for TableMeta { table_name } in metas { + let values = vec![Arc::new(DataValue::Utf8 { + value: Some(table_name.to_string()), + ty: Utf8Type::Variable(None), + unit: CharLengthUnits::Characters, + })]; + + yield Ok(Tuple { id: None, values }); + } + }, + ) + } +} diff --git a/src/execution/volcano/dql/sort.rs b/src/execution/dql/sort.rs similarity index 70% rename from src/execution/volcano/dql/sort.rs rename to src/execution/dql/sort.rs index a8e6764c..0e12749b 100644 --- a/src/execution/volcano/dql/sort.rs +++ b/src/execution/dql/sort.rs @@ -1,12 +1,15 @@ use crate::errors::DatabaseError; -use crate::execution::volcano::{build_read, BoxedExecutor, ReadExecutor}; +use crate::execution::{build_read, Executor, ReadExecutor}; use crate::planner::operator::sort::{SortField, SortOperator}; use crate::planner::LogicalPlan; use crate::storage::Transaction; +use crate::throw; use crate::types::tuple::{Schema, Tuple}; -use futures_async_stream::try_stream; use itertools::Itertools; use std::mem; +use std::ops::Coroutine; +use std::ops::CoroutineState; +use std::pin::Pin; const BUCKET_SIZE: usize = u8::MAX as usize + 1; @@ -83,33 +86,33 @@ impl From<(SortOperator, LogicalPlan)> for Sort { } } -impl ReadExecutor for Sort { - fn execute(self, transaction: &T) -> BoxedExecutor { - self._execute(transaction) - } -} +impl<'a, T: Transaction + 'a> ReadExecutor<'a, T> for Sort { + fn execute(self, transaction: &'a T) -> Executor<'a> { + Box::new( + #[coroutine] + move || { + let Sort { + sort_fields, + limit, + mut input, + } = self; -impl Sort { - #[try_stream(boxed, ok = Tuple, error = DatabaseError)] - pub async fn _execute(self, transaction: &T) { - let Sort { - sort_fields, - limit, - mut input, - } = self; - let schema = input.output_schema().clone(); - let mut tuples: Vec = vec![]; - - #[for_await] - for tuple in build_read(input, transaction) { - tuples.push(tuple?); - } - let mut tuples = sort(&schema, &sort_fields, tuples)?; - let len = limit.unwrap_or(tuples.len()); + let schema = input.output_schema().clone(); + let mut tuples: Vec = vec![]; - for tuple in tuples.drain(..len) { - yield tuple; - } + let mut coroutine = build_read(input, transaction); + + while let CoroutineState::Yielded(tuple) = Pin::new(&mut coroutine).resume(()) { + tuples.push(throw!(tuple)); + } + let mut tuples = throw!(sort(&schema, &sort_fields, tuples)); + let limited_tuples = tuples.drain(..limit.unwrap_or(tuples.len())).collect_vec(); + + for tuple in limited_tuples { + yield Ok(tuple); + } + }, + ) } } diff --git a/src/execution/dql/union.rs b/src/execution/dql/union.rs new file mode 100644 index 00000000..311e5523 --- /dev/null +++ b/src/execution/dql/union.rs @@ -0,0 +1,44 @@ +use crate::execution::{build_read, Executor, ReadExecutor}; +use crate::planner::LogicalPlan; +use crate::storage::Transaction; +use std::ops::Coroutine; +use std::ops::CoroutineState; +use std::pin::Pin; + +pub struct Union { + left_input: LogicalPlan, + right_input: LogicalPlan, +} + +impl From<(LogicalPlan, LogicalPlan)> for Union { + fn from((left_input, right_input): (LogicalPlan, LogicalPlan)) -> Self { + Union { + left_input, + right_input, + } + } +} + +impl<'a, T: Transaction + 'a> ReadExecutor<'a, T> for Union { + fn execute(self, transaction: &'a T) -> Executor<'a> { + Box::new( + #[coroutine] + move || { + let Union { + left_input, + right_input, + } = self; + let mut coroutine = build_read(left_input, transaction); + + while let CoroutineState::Yielded(tuple) = Pin::new(&mut coroutine).resume(()) { + yield tuple; + } + let mut coroutine = build_read(right_input, transaction); + + while let CoroutineState::Yielded(tuple) = Pin::new(&mut coroutine).resume(()) { + yield tuple; + } + }, + ) + } +} diff --git a/src/execution/dql/values.rs b/src/execution/dql/values.rs new file mode 100644 index 00000000..0b72d568 --- /dev/null +++ b/src/execution/dql/values.rs @@ -0,0 +1,29 @@ +use crate::execution::{Executor, ReadExecutor}; +use crate::planner::operator::values::ValuesOperator; +use crate::storage::Transaction; +use crate::types::tuple::Tuple; + +pub struct Values { + op: ValuesOperator, +} + +impl From for Values { + fn from(op: ValuesOperator) -> Self { + Values { op } + } +} + +impl<'a, T: Transaction + 'a> ReadExecutor<'a, T> for Values { + fn execute(self, _: &T) -> Executor<'a> { + Box::new( + #[coroutine] + move || { + let ValuesOperator { rows, .. } = self.op; + + for values in rows { + yield Ok(Tuple { id: None, values }); + } + }, + ) + } +} diff --git a/src/execution/marco.rs b/src/execution/marco.rs new file mode 100644 index 00000000..97ba1586 --- /dev/null +++ b/src/execution/marco.rs @@ -0,0 +1,12 @@ +#[macro_export] +macro_rules! throw { + ($code:expr) => { + match $code { + Ok(item) => item, + Err(err) => { + yield Err(err); + return; + } + } + }; +} diff --git a/src/execution/mod.rs b/src/execution/mod.rs index 62e058ef..20464993 100644 --- a/src/execution/mod.rs +++ b/src/execution/mod.rs @@ -1 +1,207 @@ -pub mod volcano; +pub(crate) mod ddl; +pub(crate) mod dml; +pub(crate) mod dql; +pub(crate) mod marco; + +use self::ddl::add_column::AddColumn; +use self::dql::join::nested_loop_join::NestedLoopJoin; +use crate::errors::DatabaseError; +use crate::execution::ddl::create_index::CreateIndex; +use crate::execution::ddl::create_table::CreateTable; +use crate::execution::ddl::drop_column::DropColumn; +use crate::execution::ddl::drop_table::DropTable; +use crate::execution::ddl::truncate::Truncate; +use crate::execution::dml::analyze::Analyze; +use crate::execution::dml::copy_from_file::CopyFromFile; +use crate::execution::dml::delete::Delete; +use crate::execution::dml::insert::Insert; +use crate::execution::dml::update::Update; +use crate::execution::dql::aggregate::hash_agg::HashAggExecutor; +use crate::execution::dql::aggregate::simple_agg::SimpleAggExecutor; +use crate::execution::dql::describe::Describe; +use crate::execution::dql::dummy::Dummy; +use crate::execution::dql::explain::Explain; +use crate::execution::dql::filter::Filter; +use crate::execution::dql::index_scan::IndexScan; +use crate::execution::dql::join::hash_join::HashJoin; +use crate::execution::dql::limit::Limit; +use crate::execution::dql::projection::Projection; +use crate::execution::dql::seq_scan::SeqScan; +use crate::execution::dql::show_table::ShowTables; +use crate::execution::dql::sort::Sort; +use crate::execution::dql::union::Union; +use crate::execution::dql::values::Values; +use crate::planner::operator::join::JoinCondition; +use crate::planner::operator::{Operator, PhysicalOption}; +use crate::planner::LogicalPlan; +use crate::storage::Transaction; +use crate::types::index::IndexInfo; +use crate::types::tuple::Tuple; +use std::ops::{Coroutine, CoroutineState}; +use std::pin::Pin; + +pub type Executor<'a> = + Box, Return = ()> + 'a + Unpin>; + +pub trait ReadExecutor<'a, T: Transaction + 'a> { + fn execute(self, transaction: &'a T) -> Executor<'a>; +} + +pub trait WriteExecutor<'a, T: Transaction + 'a> { + fn execute_mut(self, transaction: &'a mut T) -> Executor<'a>; +} + +pub fn build_read<'a, T: Transaction + 'a>(plan: LogicalPlan, transaction: &'a T) -> Executor<'a> { + let LogicalPlan { + operator, + mut childrens, + .. + } = plan; + + match operator { + Operator::Dummy => Dummy {}.execute(transaction), + Operator::Aggregate(op) => { + let input = childrens.pop().unwrap(); + + if op.groupby_exprs.is_empty() { + SimpleAggExecutor::from((op, input)).execute(transaction) + } else { + HashAggExecutor::from((op, input)).execute(transaction) + } + } + Operator::Filter(op) => { + let input = childrens.pop().unwrap(); + + Filter::from((op, input)).execute(transaction) + } + Operator::Join(op) => { + let right_input = childrens.pop().unwrap(); + let left_input = childrens.pop().unwrap(); + + match &op.on { + JoinCondition::On { on, .. } + if !on.is_empty() && plan.physical_option == Some(PhysicalOption::HashJoin) => + { + HashJoin::from((op, left_input, right_input)).execute(transaction) + } + _ => NestedLoopJoin::from((op, left_input, right_input)).execute(transaction), + } + } + Operator::Project(op) => { + let input = childrens.pop().unwrap(); + + Projection::from((op, input)).execute(transaction) + } + Operator::Scan(op) => { + if let Some(PhysicalOption::IndexScan(IndexInfo { + meta, + range: Some(range), + })) = plan.physical_option + { + IndexScan::from((op, meta, range)).execute(transaction) + } else { + SeqScan::from(op).execute(transaction) + } + } + Operator::Sort(op) => { + let input = childrens.pop().unwrap(); + + Sort::from((op, input)).execute(transaction) + } + Operator::Limit(op) => { + let input = childrens.pop().unwrap(); + + Limit::from((op, input)).execute(transaction) + } + Operator::Values(op) => Values::from(op).execute(transaction), + Operator::Show => ShowTables.execute(transaction), + Operator::Explain => { + let input = childrens.pop().unwrap(); + + Explain::from(input).execute(transaction) + } + Operator::Describe(op) => Describe::from(op).execute(transaction), + Operator::Union(_) => { + let right_input = childrens.pop().unwrap(); + let left_input = childrens.pop().unwrap(); + + Union::from((left_input, right_input)).execute(transaction) + } + _ => unreachable!(), + } +} + +pub fn build_write<'a, T: Transaction + 'a>( + plan: LogicalPlan, + transaction: &'a mut T, +) -> Executor<'a> { + let LogicalPlan { + operator, + mut childrens, + physical_option, + _output_schema_ref, + } = plan; + + match operator { + Operator::Insert(op) => { + let input = childrens.pop().unwrap(); + + Insert::from((op, input)).execute_mut(transaction) + } + Operator::Update(op) => { + let values = childrens.pop().unwrap(); + let input = childrens.pop().unwrap(); + + Update::from((op, input, values)).execute_mut(transaction) + } + Operator::Delete(op) => { + let input = childrens.pop().unwrap(); + + Delete::from((op, input)).execute_mut(transaction) + } + Operator::AddColumn(op) => { + let input = childrens.pop().unwrap(); + AddColumn::from((op, input)).execute_mut(transaction) + } + Operator::DropColumn(op) => { + let input = childrens.pop().unwrap(); + DropColumn::from((op, input)).execute_mut(transaction) + } + Operator::CreateTable(op) => CreateTable::from(op).execute_mut(transaction), + Operator::CreateIndex(op) => { + let input = childrens.pop().unwrap(); + + CreateIndex::from((op, input)).execute_mut(transaction) + } + Operator::DropTable(op) => DropTable::from(op).execute_mut(transaction), + Operator::Truncate(op) => Truncate::from(op).execute_mut(transaction), + Operator::CopyFromFile(op) => CopyFromFile::from(op).execute_mut(transaction), + #[warn(unused_assignments)] + Operator::CopyToFile(_op) => { + todo!() + } + Operator::Analyze(op) => { + let input = childrens.pop().unwrap(); + + Analyze::from((op, input)).execute_mut(transaction) + } + operator => build_read( + LogicalPlan { + operator, + childrens, + physical_option, + _output_schema_ref, + }, + transaction, + ), + } +} + +pub fn try_collect(mut executor: Executor) -> Result, DatabaseError> { + let mut output = Vec::new(); + + while let CoroutineState::Yielded(tuple) = Pin::new(&mut executor).resume(()) { + output.push(tuple?); + } + Ok(output) +} diff --git a/src/execution/volcano/ddl/add_column.rs b/src/execution/volcano/ddl/add_column.rs deleted file mode 100644 index 3c228035..00000000 --- a/src/execution/volcano/ddl/add_column.rs +++ /dev/null @@ -1,84 +0,0 @@ -use crate::errors::DatabaseError; -use crate::execution::volcano::{build_read, BoxedExecutor, WriteExecutor}; -use crate::types::tuple::Tuple; -use crate::types::tuple_builder::TupleBuilder; -use crate::types::value::DataValue; -use futures_async_stream::try_stream; -use std::slice; -use std::sync::Arc; - -use crate::planner::LogicalPlan; -use crate::types::index::{Index, IndexType}; -use crate::{planner::operator::alter_table::add_column::AddColumnOperator, storage::Transaction}; - -pub struct AddColumn { - op: AddColumnOperator, - input: LogicalPlan, -} - -impl From<(AddColumnOperator, LogicalPlan)> for AddColumn { - fn from((op, input): (AddColumnOperator, LogicalPlan)) -> Self { - Self { op, input } - } -} - -impl WriteExecutor for AddColumn { - fn execute_mut(self, transaction: &mut T) -> BoxedExecutor { - self._execute(transaction) - } -} - -impl AddColumn { - #[try_stream(boxed, ok = Tuple, error = DatabaseError)] - async fn _execute(mut self, transaction: &mut T) { - let AddColumnOperator { - table_name, - column, - if_not_exists, - } = &self.op; - let mut unique_values = column.desc().is_unique.then(Vec::new); - let mut tuples = Vec::new(); - let schema = self.input.output_schema(); - let mut types = Vec::with_capacity(schema.len() + 1); - - for column_ref in schema.iter() { - types.push(*column_ref.datatype()); - } - types.push(*column.datatype()); - - #[for_await] - for tuple in build_read(self.input, transaction) { - let mut tuple: Tuple = tuple?; - - if let Some(value) = column.default_value()? { - if let Some(unique_values) = &mut unique_values { - unique_values.push((tuple.id.clone().unwrap(), value.clone())); - } - tuple.values.push(value); - } else { - tuple.values.push(Arc::new(DataValue::Null)); - } - tuples.push(tuple); - } - for tuple in tuples { - transaction.append(table_name, tuple, &types, true)?; - } - let col_id = transaction.add_column(table_name, column, *if_not_exists)?; - - // Unique Index - if let (Some(unique_values), Some(unique_meta)) = ( - unique_values, - transaction - .table(table_name.clone()) - .and_then(|table| table.get_unique_index(&col_id)) - .cloned(), - ) { - for (tuple_id, value) in unique_values { - let index = Index::new(unique_meta.id, slice::from_ref(&value), IndexType::Unique); - transaction.add_index(table_name, index, &tuple_id)?; - } - } - - yield TupleBuilder::build_result("1".to_string()); - } -} diff --git a/src/execution/volcano/ddl/create_index.rs b/src/execution/volcano/ddl/create_index.rs deleted file mode 100644 index c5395121..00000000 --- a/src/execution/volcano/ddl/create_index.rs +++ /dev/null @@ -1,81 +0,0 @@ -use crate::execution::volcano::dql::projection::Projection; -use crate::execution::volcano::DatabaseError; -use crate::execution::volcano::{build_read, BoxedExecutor, WriteExecutor}; -use crate::expression::ScalarExpression; -use crate::planner::operator::create_index::CreateIndexOperator; -use crate::planner::LogicalPlan; -use crate::storage::Transaction; -use crate::types::index::Index; -use crate::types::tuple::Tuple; -use crate::types::ColumnId; -use futures_async_stream::try_stream; - -pub struct CreateIndex { - op: CreateIndexOperator, - input: LogicalPlan, -} - -impl From<(CreateIndexOperator, LogicalPlan)> for CreateIndex { - fn from((op, input): (CreateIndexOperator, LogicalPlan)) -> Self { - Self { op, input } - } -} - -impl WriteExecutor for CreateIndex { - fn execute_mut(self, transaction: &mut T) -> BoxedExecutor { - self._execute(transaction) - } -} - -impl CreateIndex { - #[try_stream(boxed, ok = Tuple, error = DatabaseError)] - async fn _execute(mut self, transaction: &mut T) { - let CreateIndexOperator { - table_name, - index_name, - columns, - if_not_exists, - ty, - } = self.op; - let (column_ids, column_exprs): (Vec, Vec) = columns - .into_iter() - .filter_map(|column| { - column - .id() - .map(|id| (id, ScalarExpression::ColumnRef(column))) - }) - .unzip(); - let schema = self.input.output_schema().clone(); - let index_id = match transaction.add_index_meta(&table_name, index_name, column_ids, ty) { - Ok(index_id) => index_id, - Err(DatabaseError::DuplicateIndex(index_name)) => { - return if if_not_exists { - Ok(()) - } else { - Err(DatabaseError::DuplicateIndex(index_name)) - } - } - err => err?, - }; - let mut index_values = Vec::new(); - - #[for_await] - for tuple in build_read(self.input, transaction) { - let mut tuple: Tuple = tuple?; - - let tuple_id = if let Some(tuple_id) = tuple.id.take() { - tuple_id - } else { - continue; - }; - index_values.push(( - tuple_id, - Projection::projection(&tuple, &column_exprs, &schema)?, - )); - } - for (tuple_id, values) in index_values { - let index = Index::new(index_id, &values, ty); - transaction.add_index(table_name.as_str(), index, &tuple_id)?; - } - } -} diff --git a/src/execution/volcano/ddl/create_table.rs b/src/execution/volcano/ddl/create_table.rs deleted file mode 100644 index cf49b71a..00000000 --- a/src/execution/volcano/ddl/create_table.rs +++ /dev/null @@ -1,37 +0,0 @@ -use crate::errors::DatabaseError; -use crate::execution::volcano::{BoxedExecutor, WriteExecutor}; -use crate::planner::operator::create_table::CreateTableOperator; -use crate::storage::Transaction; -use crate::types::tuple::Tuple; -use crate::types::tuple_builder::TupleBuilder; -use futures_async_stream::try_stream; - -pub struct CreateTable { - op: CreateTableOperator, -} - -impl From for CreateTable { - fn from(op: CreateTableOperator) -> Self { - CreateTable { op } - } -} - -impl WriteExecutor for CreateTable { - fn execute_mut(self, transaction: &mut T) -> BoxedExecutor { - self._execute(transaction) - } -} - -impl CreateTable { - #[try_stream(boxed, ok = Tuple, error = DatabaseError)] - pub async fn _execute(self, transaction: &mut T) { - let CreateTableOperator { - table_name, - columns, - if_not_exists, - } = self.op; - let _ = transaction.create_table(table_name.clone(), columns, if_not_exists)?; - - yield TupleBuilder::build_result(format!("{}", table_name)); - } -} diff --git a/src/execution/volcano/ddl/drop_column.rs b/src/execution/volcano/ddl/drop_column.rs deleted file mode 100644 index ebb9ae40..00000000 --- a/src/execution/volcano/ddl/drop_column.rs +++ /dev/null @@ -1,76 +0,0 @@ -use crate::errors::DatabaseError; -use crate::execution::volcano::{build_read, BoxedExecutor, WriteExecutor}; -use crate::planner::operator::alter_table::drop_column::DropColumnOperator; -use crate::planner::LogicalPlan; -use crate::storage::Transaction; -use crate::types::tuple::Tuple; -use crate::types::tuple_builder::TupleBuilder; -use futures_async_stream::try_stream; - -pub struct DropColumn { - op: DropColumnOperator, - input: LogicalPlan, -} - -impl From<(DropColumnOperator, LogicalPlan)> for DropColumn { - fn from((op, input): (DropColumnOperator, LogicalPlan)) -> Self { - Self { op, input } - } -} - -impl WriteExecutor for DropColumn { - fn execute_mut(self, transaction: &mut T) -> BoxedExecutor { - self._execute(transaction) - } -} - -impl DropColumn { - #[try_stream(boxed, ok = Tuple, error = DatabaseError)] - async fn _execute(mut self, transaction: &mut T) { - let DropColumnOperator { - table_name, - column_name, - if_exists, - } = self.op; - let tuple_columns = self.input.output_schema(); - - if let Some((column_index, is_primary)) = tuple_columns - .iter() - .enumerate() - .find(|(_, column)| column.name() == column_name) - .map(|(i, column)| (i, column.desc.is_primary)) - { - if is_primary { - Err(DatabaseError::InvalidColumn( - "drop of primary key column is not allowed.".to_owned(), - ))?; - } - let mut tuples = Vec::new(); - let mut types = Vec::with_capacity(tuple_columns.len() - 1); - - for (i, column_ref) in tuple_columns.iter().enumerate() { - if i == column_index { - continue; - } - types.push(*column_ref.datatype()); - } - #[for_await] - for tuple in build_read(self.input, transaction) { - let mut tuple: Tuple = tuple?; - let _ = tuple.values.remove(column_index); - - tuples.push(tuple); - } - for tuple in tuples { - transaction.append(&table_name, tuple, &types, true)?; - } - transaction.drop_column(&table_name, &column_name)?; - - yield TupleBuilder::build_result("1".to_string()); - } else if if_exists { - return Ok(()); - } else { - return Err(DatabaseError::NotFound("drop column", column_name)); - } - } -} diff --git a/src/execution/volcano/ddl/drop_table.rs b/src/execution/volcano/ddl/drop_table.rs deleted file mode 100644 index f80d8fa2..00000000 --- a/src/execution/volcano/ddl/drop_table.rs +++ /dev/null @@ -1,36 +0,0 @@ -use crate::errors::DatabaseError; -use crate::execution::volcano::{BoxedExecutor, WriteExecutor}; -use crate::planner::operator::drop_table::DropTableOperator; -use crate::storage::Transaction; -use crate::types::tuple::Tuple; -use crate::types::tuple_builder::TupleBuilder; -use futures_async_stream::try_stream; - -pub struct DropTable { - op: DropTableOperator, -} - -impl From for DropTable { - fn from(op: DropTableOperator) -> Self { - DropTable { op } - } -} - -impl WriteExecutor for DropTable { - fn execute_mut(self, transaction: &mut T) -> BoxedExecutor { - self._execute(transaction) - } -} - -impl DropTable { - #[try_stream(boxed, ok = Tuple, error = DatabaseError)] - pub async fn _execute(self, transaction: &mut T) { - let DropTableOperator { - table_name, - if_exists, - } = self.op; - transaction.drop_table(&table_name, if_exists)?; - - yield TupleBuilder::build_result(format!("{}", table_name)); - } -} diff --git a/src/execution/volcano/ddl/truncate.rs b/src/execution/volcano/ddl/truncate.rs deleted file mode 100644 index 4d72eff8..00000000 --- a/src/execution/volcano/ddl/truncate.rs +++ /dev/null @@ -1,34 +0,0 @@ -use crate::errors::DatabaseError; -use crate::execution::volcano::{BoxedExecutor, WriteExecutor}; -use crate::planner::operator::truncate::TruncateOperator; -use crate::storage::Transaction; -use crate::types::tuple::Tuple; -use crate::types::tuple_builder::TupleBuilder; -use futures_async_stream::try_stream; - -pub struct Truncate { - op: TruncateOperator, -} - -impl From for Truncate { - fn from(op: TruncateOperator) -> Self { - Truncate { op } - } -} - -impl WriteExecutor for Truncate { - fn execute_mut(self, transaction: &mut T) -> BoxedExecutor { - self._execute(transaction) - } -} - -impl Truncate { - #[try_stream(boxed, ok = Tuple, error = DatabaseError)] - pub async fn _execute(self, transaction: &mut T) { - let TruncateOperator { table_name } = self.op; - - transaction.drop_data(&table_name)?; - - yield TupleBuilder::build_result(format!("{}", table_name)); - } -} diff --git a/src/execution/volcano/dml/analyze.rs b/src/execution/volcano/dml/analyze.rs deleted file mode 100644 index 8e1a6178..00000000 --- a/src/execution/volcano/dml/analyze.rs +++ /dev/null @@ -1,129 +0,0 @@ -use crate::catalog::TableName; -use crate::errors::DatabaseError; -use crate::execution::volcano::dql::projection::Projection; -use crate::execution::volcano::{build_read, BoxedExecutor, WriteExecutor}; -use crate::optimizer::core::histogram::HistogramBuilder; -use crate::optimizer::core::statistics_meta::StatisticsMeta; -use crate::planner::operator::analyze::AnalyzeOperator; -use crate::planner::LogicalPlan; -use crate::storage::Transaction; -use crate::types::index::IndexMetaRef; -use crate::types::tuple::Tuple; -use crate::types::value::{DataValue, Utf8Type}; -use futures_async_stream::try_stream; -use itertools::Itertools; -use sqlparser::ast::CharLengthUnits; -use std::fmt::Formatter; -use std::sync::Arc; -use std::time::{SystemTime, UNIX_EPOCH}; -use std::{fmt, fs}; - -const DEFAULT_NUM_OF_BUCKETS: usize = 100; -const DEFAULT_STATISTICS_META_PATH: &str = "fnck_sql_statistics_metas"; - -pub struct Analyze { - table_name: TableName, - input: LogicalPlan, - index_metas: Vec, -} - -impl From<(AnalyzeOperator, LogicalPlan)> for Analyze { - fn from( - ( - AnalyzeOperator { - table_name, - index_metas, - }, - input, - ): (AnalyzeOperator, LogicalPlan), - ) -> Self { - Analyze { - table_name, - input, - index_metas, - } - } -} - -impl WriteExecutor for Analyze { - fn execute_mut(self, transaction: &mut T) -> BoxedExecutor { - self._execute(transaction) - } -} - -impl Analyze { - #[try_stream(boxed, ok = Tuple, error = DatabaseError)] - pub async fn _execute(self, transaction: &mut T) { - let Analyze { - table_name, - mut input, - index_metas, - } = self; - - let schema = input.output_schema().clone(); - let mut builders = Vec::with_capacity(index_metas.len()); - let table = transaction - .table(table_name.clone()) - .cloned() - .ok_or(DatabaseError::TableNotFound)?; - - for index in table.indexes() { - builders.push(( - index.id, - index.column_exprs(&table)?, - HistogramBuilder::new(index, None)?, - )); - } - - #[for_await] - for tuple in build_read(input, transaction) { - let tuple = tuple?; - - for (_, exprs, builder) in builders.iter_mut() { - let values = Projection::projection(&tuple, exprs, &schema)?; - - if values.len() == 1 { - builder.append(&values[0])?; - } else { - builder.append(&Arc::new(DataValue::Tuple(Some(values))))?; - } - } - } - let mut values = Vec::with_capacity(builders.len()); - let ts = SystemTime::now() - .duration_since(UNIX_EPOCH) - .expect("It's the end of the world!") - .as_secs(); - let dir_path = dirs::config_dir() - .expect("Your system does not have a Config directory!") - .join(DEFAULT_STATISTICS_META_PATH) - .join(table_name.as_str()) - .join(ts.to_string()); - fs::create_dir_all(&dir_path)?; - - for (index_id, _, builder) in builders { - let path: String = dir_path.join(index_id.to_string()).to_string_lossy().into(); - let (histogram, sketch) = builder.build(DEFAULT_NUM_OF_BUCKETS)?; - let meta = StatisticsMeta::new(histogram, sketch); - - meta.to_file(&path)?; - values.push(Arc::new(DataValue::Utf8 { - value: Some(path.clone()), - ty: Utf8Type::Variable(None), - unit: CharLengthUnits::Characters, - })); - transaction.save_table_meta(&table_name, path, meta)?; - } - yield Tuple { id: None, values }; - } -} - -impl fmt::Display for AnalyzeOperator { - fn fmt(&self, f: &mut Formatter) -> fmt::Result { - let indexes = self.index_metas.iter().map(|index| &index.name).join(", "); - - write!(f, "Analyze {} -> [{}]", self.table_name, indexes)?; - - Ok(()) - } -} diff --git a/src/execution/volcano/dml/delete.rs b/src/execution/volcano/dml/delete.rs deleted file mode 100644 index 36ae51cc..00000000 --- a/src/execution/volcano/dml/delete.rs +++ /dev/null @@ -1,100 +0,0 @@ -use crate::catalog::TableName; -use crate::errors::DatabaseError; -use crate::execution::volcano::dql::projection::Projection; -use crate::execution::volcano::{build_read, BoxedExecutor, WriteExecutor}; -use crate::expression::ScalarExpression; -use crate::planner::operator::delete::DeleteOperator; -use crate::planner::LogicalPlan; -use crate::storage::Transaction; -use crate::types::index::{Index, IndexId, IndexType}; -use crate::types::tuple::Tuple; -use crate::types::value::ValueRef; -use futures_async_stream::try_stream; -use std::collections::HashMap; - -pub struct Delete { - table_name: TableName, - input: LogicalPlan, -} - -impl From<(DeleteOperator, LogicalPlan)> for Delete { - fn from((DeleteOperator { table_name, .. }, input): (DeleteOperator, LogicalPlan)) -> Self { - Delete { table_name, input } - } -} - -impl WriteExecutor for Delete { - fn execute_mut(self, transaction: &mut T) -> BoxedExecutor { - self._execute(transaction) - } -} - -struct Value { - exprs: Vec, - value_rows: Vec>, - index_ty: IndexType, -} - -impl Delete { - #[try_stream(boxed, ok = Tuple, error = DatabaseError)] - async fn _execute(self, transaction: &mut T) { - let Delete { - table_name, - mut input, - } = self; - let schema = input.output_schema().clone(); - let table = transaction - .table(table_name.clone()) - .cloned() - .ok_or(DatabaseError::TableNotFound)?; - let mut tuple_ids = Vec::new(); - let mut indexes: HashMap = HashMap::new(); - - #[for_await] - for tuple in build_read(input, transaction) { - let tuple: Tuple = tuple?; - - for index_meta in table.indexes() { - if let Some(Value { - exprs, value_rows, .. - }) = indexes.get_mut(&index_meta.id) - { - value_rows.push(Projection::projection(&tuple, exprs, &schema)?); - } else { - let exprs = index_meta.column_exprs(&table)?; - let values = Projection::projection(&tuple, &exprs, &schema)?; - - indexes.insert( - index_meta.id, - Value { - exprs, - value_rows: vec![values], - index_ty: index_meta.ty, - }, - ); - } - } - tuple_ids.push(tuple.id.unwrap()); - } - for ( - index_id, - Value { - value_rows, - index_ty, - .. - }, - ) in indexes - { - for (i, values) in value_rows.into_iter().enumerate() { - transaction.del_index( - &table_name, - &Index::new(index_id, &values, index_ty), - Some(&tuple_ids[i]), - )?; - } - } - for tuple_id in tuple_ids { - transaction.delete(&table_name, tuple_id)?; - } - } -} diff --git a/src/execution/volcano/dml/insert.rs b/src/execution/volcano/dml/insert.rs deleted file mode 100644 index 31cdee23..00000000 --- a/src/execution/volcano/dml/insert.rs +++ /dev/null @@ -1,111 +0,0 @@ -use crate::catalog::TableName; -use crate::errors::DatabaseError; -use crate::execution::volcano::dql::projection::Projection; -use crate::execution::volcano::{build_read, BoxedExecutor, WriteExecutor}; -use crate::planner::operator::insert::InsertOperator; -use crate::planner::LogicalPlan; -use crate::storage::Transaction; -use crate::types::index::Index; -use crate::types::tuple::Tuple; -use crate::types::value::DataValue; -use futures_async_stream::try_stream; -use std::collections::HashMap; -use std::sync::Arc; - -pub struct Insert { - table_name: TableName, - input: LogicalPlan, - is_overwrite: bool, -} - -impl From<(InsertOperator, LogicalPlan)> for Insert { - fn from( - ( - InsertOperator { - table_name, - is_overwrite, - }, - input, - ): (InsertOperator, LogicalPlan), - ) -> Self { - Insert { - table_name, - input, - is_overwrite, - } - } -} - -impl WriteExecutor for Insert { - fn execute_mut(self, transaction: &mut T) -> BoxedExecutor { - self._execute(transaction) - } -} - -impl Insert { - #[try_stream(boxed, ok = Tuple, error = DatabaseError)] - pub async fn _execute(self, transaction: &mut T) { - let Insert { - table_name, - mut input, - is_overwrite, - } = self; - let mut tuples = Vec::new(); - let schema = input.output_schema().clone(); - - let pk_index = schema - .iter() - .find(|col| col.desc.is_primary) - .map(|col| col.id()) - .ok_or_else(|| DatabaseError::NotNull)?; - - if let Some(table_catalog) = transaction.table(table_name.clone()).cloned() { - let types = table_catalog.types(); - #[for_await] - for tuple in build_read(input, transaction) { - let Tuple { values, .. } = tuple?; - - let mut tuple_map = HashMap::new(); - for (i, value) in values.into_iter().enumerate() { - tuple_map.insert(schema[i].id(), value); - } - let tuple_id = tuple_map - .get(&pk_index) - .cloned() - .ok_or(DatabaseError::NotNull)?; - let mut values = Vec::with_capacity(table_catalog.columns_len()); - - for col in table_catalog.columns() { - let value = { - let mut value = tuple_map.remove(&col.id()); - - if value.is_none() { - value = col.default_value()?; - } - value.unwrap_or_else(|| Arc::new(DataValue::none(col.datatype()))) - }; - if value.is_null() && !col.nullable { - return Err(DatabaseError::NotNull); - } - values.push(value) - } - tuples.push(Tuple { - id: Some(tuple_id), - values, - }); - } - for index_meta in table_catalog.indexes() { - let exprs = index_meta.column_exprs(&table_catalog)?; - - for tuple in tuples.iter() { - let values = Projection::projection(tuple, &exprs, &schema)?; - let index = Index::new(index_meta.id, &values, index_meta.ty); - transaction.add_index(&table_name, index, tuple.id.as_ref().unwrap())?; - } - } - for tuple in tuples { - transaction.append(&table_name, tuple, &types, is_overwrite)?; - } - } - } -} diff --git a/src/execution/volcano/dml/update.rs b/src/execution/volcano/dml/update.rs deleted file mode 100644 index bb4677e7..00000000 --- a/src/execution/volcano/dml/update.rs +++ /dev/null @@ -1,103 +0,0 @@ -use crate::catalog::TableName; -use crate::errors::DatabaseError; -use crate::execution::volcano::dql::projection::Projection; -use crate::execution::volcano::{build_read, BoxedExecutor, WriteExecutor}; -use crate::planner::operator::update::UpdateOperator; -use crate::planner::LogicalPlan; -use crate::storage::Transaction; -use crate::types::index::Index; -use crate::types::tuple::types; -use crate::types::tuple::Tuple; -use futures_async_stream::try_stream; -use std::collections::HashMap; - -pub struct Update { - table_name: TableName, - input: LogicalPlan, - values: LogicalPlan, -} - -impl From<(UpdateOperator, LogicalPlan, LogicalPlan)> for Update { - fn from( - (UpdateOperator { table_name }, input, values): (UpdateOperator, LogicalPlan, LogicalPlan), - ) -> Self { - Update { - table_name, - input, - values, - } - } -} - -impl WriteExecutor for Update { - fn execute_mut(self, transaction: &mut T) -> BoxedExecutor { - self._execute(transaction) - } -} - -impl Update { - #[try_stream(boxed, ok = Tuple, error = DatabaseError)] - pub async fn _execute(self, transaction: &mut T) { - let Update { - table_name, - mut input, - mut values, - } = self; - let values_schema = values.output_schema().clone(); - let input_schema = input.output_schema().clone(); - let types = types(&input_schema); - - if let Some(table_catalog) = transaction.table(table_name.clone()).cloned() { - let mut value_map = HashMap::new(); - let mut tuples = Vec::new(); - - // only once - #[for_await] - for tuple in build_read(values, transaction) { - let Tuple { values, .. } = tuple?; - for i in 0..values.len() { - value_map.insert(values_schema[i].id(), values[i].clone()); - } - } - #[for_await] - for tuple in build_read(input, transaction) { - let tuple: Tuple = tuple?; - - tuples.push(tuple); - } - let mut index_metas = Vec::new(); - for index_meta in table_catalog.indexes() { - let exprs = index_meta.column_exprs(&table_catalog)?; - - for tuple in tuples.iter() { - let values = Projection::projection(tuple, &exprs, &input_schema)?; - let index = Index::new(index_meta.id, &values, index_meta.ty); - transaction.del_index(&table_name, &index, Some(tuple.id.as_ref().unwrap()))?; - } - index_metas.push((index_meta, exprs)); - } - for mut tuple in tuples { - let mut is_overwrite = true; - - for (i, column) in input_schema.iter().enumerate() { - if let Some(value) = value_map.get(&column.id()) { - if column.desc.is_primary { - let old_key = tuple.id.replace(value.clone()).unwrap(); - - transaction.delete(&table_name, old_key)?; - is_overwrite = false; - } - tuple.values[i] = value.clone(); - } - } - for (index_meta, exprs) in index_metas.iter() { - let values = Projection::projection(&tuple, exprs, &input_schema)?; - let index = Index::new(index_meta.id, &values, index_meta.ty); - transaction.add_index(&table_name, index, tuple.id.as_ref().unwrap())?; - } - - transaction.append(&table_name, tuple, &types, is_overwrite)?; - } - } - } -} diff --git a/src/execution/volcano/dql/aggregate/simple_agg.rs b/src/execution/volcano/dql/aggregate/simple_agg.rs deleted file mode 100644 index fb2db157..00000000 --- a/src/execution/volcano/dql/aggregate/simple_agg.rs +++ /dev/null @@ -1,62 +0,0 @@ -use crate::errors::DatabaseError; -use crate::execution::volcano::dql::aggregate::create_accumulators; -use crate::execution::volcano::{build_read, BoxedExecutor, ReadExecutor}; -use crate::expression::ScalarExpression; -use crate::planner::operator::aggregate::AggregateOperator; -use crate::planner::LogicalPlan; -use crate::storage::Transaction; -use crate::types::tuple::Tuple; -use crate::types::value::ValueRef; -use futures_async_stream::try_stream; -use itertools::Itertools; - -pub struct SimpleAggExecutor { - agg_calls: Vec, - input: LogicalPlan, -} - -impl From<(AggregateOperator, LogicalPlan)> for SimpleAggExecutor { - fn from( - (AggregateOperator { agg_calls, .. }, input): (AggregateOperator, LogicalPlan), - ) -> Self { - SimpleAggExecutor { agg_calls, input } - } -} - -impl ReadExecutor for SimpleAggExecutor { - fn execute(self, transaction: &T) -> BoxedExecutor { - self._execute(transaction) - } -} - -impl SimpleAggExecutor { - #[try_stream(boxed, ok = Tuple, error = DatabaseError)] - pub async fn _execute(self, transaction: &T) { - let SimpleAggExecutor { - agg_calls, - mut input, - } = self; - let mut accs = create_accumulators(&agg_calls)?; - let schema = input.output_schema().clone(); - - #[for_await] - for tuple in build_read(input, transaction) { - let tuple = tuple?; - - let values: Vec = agg_calls - .iter() - .map(|expr| match expr { - ScalarExpression::AggCall { args, .. } => args[0].eval(&tuple, &schema), - _ => unreachable!(), - }) - .try_collect()?; - - for (acc, value) in accs.iter_mut().zip_eq(values.iter()) { - acc.update_value(value)?; - } - } - let values: Vec = accs.into_iter().map(|acc| acc.evaluate()).try_collect()?; - - yield Tuple { id: None, values }; - } -} diff --git a/src/execution/volcano/dql/describe.rs b/src/execution/volcano/dql/describe.rs deleted file mode 100644 index 814a7145..00000000 --- a/src/execution/volcano/dql/describe.rs +++ /dev/null @@ -1,104 +0,0 @@ -use crate::catalog::{ColumnCatalog, TableName}; -use crate::execution::volcano::DatabaseError; -use crate::execution::volcano::{BoxedExecutor, ReadExecutor}; -use crate::planner::operator::describe::DescribeOperator; -use crate::storage::Transaction; -use crate::types::tuple::Tuple; -use crate::types::value::{DataValue, Utf8Type, ValueRef}; -use futures_async_stream::try_stream; -use lazy_static::lazy_static; -use sqlparser::ast::CharLengthUnits; -use std::sync::Arc; - -lazy_static! { - static ref PRIMARY_KEY_TYPE: ValueRef = Arc::new(DataValue::Utf8 { - value: Some(String::from("PRIMARY")), - ty: Utf8Type::Variable(None), - unit: CharLengthUnits::Characters - }); - static ref UNIQUE_KEY_TYPE: ValueRef = Arc::new(DataValue::Utf8 { - value: Some(String::from("UNIQUE")), - ty: Utf8Type::Variable(None), - unit: CharLengthUnits::Characters - }); - static ref EMPTY_KEY_TYPE: ValueRef = Arc::new(DataValue::Utf8 { - value: Some(String::from("EMPTY")), - ty: Utf8Type::Variable(None), - unit: CharLengthUnits::Characters - }); -} - -pub struct Describe { - table_name: TableName, -} - -impl From for Describe { - fn from(op: DescribeOperator) -> Self { - Describe { - table_name: op.table_name, - } - } -} - -impl ReadExecutor for Describe { - fn execute(self, transaction: &T) -> BoxedExecutor { - self._execute(transaction) - } -} - -impl Describe { - #[try_stream(boxed, ok = Tuple, error = DatabaseError)] - pub async fn _execute(self, transaction: &T) { - let table = transaction - .table(self.table_name.clone()) - .ok_or(DatabaseError::TableNotFound)?; - let key_fn = |column: &ColumnCatalog| { - if column.desc.is_primary { - PRIMARY_KEY_TYPE.clone() - } else if column.desc.is_unique { - UNIQUE_KEY_TYPE.clone() - } else { - EMPTY_KEY_TYPE.clone() - } - }; - - for column in table.columns() { - let datatype = column.datatype(); - let default = column - .desc - .default - .as_ref() - .map(|expr| format!("{}", expr)) - .unwrap_or_else(|| "null".to_string()); - let values = vec![ - Arc::new(DataValue::Utf8 { - value: Some(column.name().to_string()), - ty: Utf8Type::Variable(None), - unit: CharLengthUnits::Characters, - }), - Arc::new(DataValue::Utf8 { - value: Some(datatype.to_string()), - ty: Utf8Type::Variable(None), - unit: CharLengthUnits::Characters, - }), - Arc::new(DataValue::Utf8 { - value: datatype.raw_len().map(|len| len.to_string()), - ty: Utf8Type::Variable(None), - unit: CharLengthUnits::Characters, - }), - Arc::new(DataValue::Utf8 { - value: Some(column.nullable.to_string()), - ty: Utf8Type::Variable(None), - unit: CharLengthUnits::Characters, - }), - key_fn(column), - Arc::new(DataValue::Utf8 { - value: Some(default), - ty: Utf8Type::Variable(None), - unit: CharLengthUnits::Characters, - }), - ]; - yield Tuple { id: None, values }; - } - } -} diff --git a/src/execution/volcano/dql/dummy.rs b/src/execution/volcano/dql/dummy.rs deleted file mode 100644 index c215ff92..00000000 --- a/src/execution/volcano/dql/dummy.rs +++ /dev/null @@ -1,23 +0,0 @@ -use crate::errors::DatabaseError; -use crate::execution::volcano::{BoxedExecutor, ReadExecutor}; -use crate::storage::Transaction; -use crate::types::tuple::Tuple; -use futures_async_stream::try_stream; - -pub struct Dummy {} - -impl ReadExecutor for Dummy { - fn execute(self, _: &T) -> BoxedExecutor { - self._execute() - } -} - -impl Dummy { - #[try_stream(boxed, ok = Tuple, error = DatabaseError)] - pub async fn _execute(self) { - yield Tuple { - id: None, - values: vec![], - } - } -} diff --git a/src/execution/volcano/dql/explain.rs b/src/execution/volcano/dql/explain.rs deleted file mode 100644 index 162657ef..00000000 --- a/src/execution/volcano/dql/explain.rs +++ /dev/null @@ -1,38 +0,0 @@ -use crate::errors::DatabaseError; -use crate::execution::volcano::{BoxedExecutor, ReadExecutor}; -use crate::planner::LogicalPlan; -use crate::storage::Transaction; -use crate::types::tuple::Tuple; -use crate::types::value::{DataValue, Utf8Type}; -use futures_async_stream::try_stream; -use sqlparser::ast::CharLengthUnits; -use std::sync::Arc; - -pub struct Explain { - plan: LogicalPlan, -} - -impl From for Explain { - fn from(plan: LogicalPlan) -> Self { - Explain { plan } - } -} - -impl ReadExecutor for Explain { - fn execute(self, _: &T) -> BoxedExecutor { - self._execute() - } -} - -impl Explain { - #[try_stream(boxed, ok = Tuple, error = DatabaseError)] - pub async fn _execute(self) { - let values = vec![Arc::new(DataValue::Utf8 { - value: Some(self.plan.explain(0)), - ty: Utf8Type::Variable(None), - unit: CharLengthUnits::Characters, - })]; - - yield Tuple { id: None, values }; - } -} diff --git a/src/execution/volcano/dql/filter.rs b/src/execution/volcano/dql/filter.rs deleted file mode 100644 index 49a12dcc..00000000 --- a/src/execution/volcano/dql/filter.rs +++ /dev/null @@ -1,45 +0,0 @@ -use crate::errors::DatabaseError; -use crate::execution::volcano::{build_read, BoxedExecutor, ReadExecutor}; -use crate::expression::ScalarExpression; -use crate::planner::operator::filter::FilterOperator; -use crate::planner::LogicalPlan; -use crate::storage::Transaction; -use crate::types::tuple::Tuple; -use futures_async_stream::try_stream; - -pub struct Filter { - predicate: ScalarExpression, - input: LogicalPlan, -} - -impl From<(FilterOperator, LogicalPlan)> for Filter { - fn from((FilterOperator { predicate, .. }, input): (FilterOperator, LogicalPlan)) -> Self { - Filter { predicate, input } - } -} - -impl ReadExecutor for Filter { - fn execute(self, transaction: &T) -> BoxedExecutor { - self._execute(transaction) - } -} - -impl Filter { - #[try_stream(boxed, ok = Tuple, error = DatabaseError)] - pub async fn _execute(self, transaction: &T) { - let Filter { - predicate, - mut input, - } = self; - let schema = input.output_schema().clone(); - - #[for_await] - for tuple in build_read(input, transaction) { - let tuple = tuple?; - - if predicate.eval(&tuple, &schema)?.is_true()? { - yield tuple; - } - } - } -} diff --git a/src/execution/volcano/dql/index_scan.rs b/src/execution/volcano/dql/index_scan.rs deleted file mode 100644 index c2c28def..00000000 --- a/src/execution/volcano/dql/index_scan.rs +++ /dev/null @@ -1,53 +0,0 @@ -use crate::errors::DatabaseError; -use crate::execution::volcano::{BoxedExecutor, ReadExecutor}; -use crate::expression::range_detacher::Range; -use crate::planner::operator::scan::ScanOperator; -use crate::storage::{Iter, Transaction}; -use crate::types::index::IndexMetaRef; -use crate::types::tuple::Tuple; -use futures_async_stream::try_stream; - -pub(crate) struct IndexScan { - op: ScanOperator, - index_by: IndexMetaRef, - ranges: Vec, -} - -impl From<(ScanOperator, IndexMetaRef, Range)> for IndexScan { - fn from((op, index_by, range): (ScanOperator, IndexMetaRef, Range)) -> Self { - let ranges = match range { - Range::SortedRanges(ranges) => ranges, - range => vec![range], - }; - - IndexScan { - op, - index_by, - ranges, - } - } -} - -impl ReadExecutor for IndexScan { - fn execute(self, transaction: &T) -> BoxedExecutor { - self._execute(transaction) - } -} - -impl IndexScan { - #[try_stream(boxed, ok = Tuple, error = DatabaseError)] - pub async fn _execute(self, transaction: &T) { - let ScanOperator { - table_name, - columns, - limit, - .. - } = self.op; - let mut iter = - transaction.read_by_index(table_name, limit, columns, self.index_by, self.ranges)?; - - while let Some(tuple) = iter.next_tuple()? { - yield tuple; - } - } -} diff --git a/src/execution/volcano/dql/limit.rs b/src/execution/volcano/dql/limit.rs deleted file mode 100644 index 11543407..00000000 --- a/src/execution/volcano/dql/limit.rs +++ /dev/null @@ -1,59 +0,0 @@ -use crate::errors::DatabaseError; -use crate::execution::volcano::{build_read, BoxedExecutor, ReadExecutor}; -use crate::planner::operator::limit::LimitOperator; -use crate::planner::LogicalPlan; -use crate::storage::Transaction; -use crate::types::tuple::Tuple; -use futures::StreamExt; -use futures_async_stream::try_stream; - -pub struct Limit { - offset: Option, - limit: Option, - input: LogicalPlan, -} - -impl From<(LimitOperator, LogicalPlan)> for Limit { - fn from((LimitOperator { offset, limit }, input): (LimitOperator, LogicalPlan)) -> Self { - Limit { - offset, - limit, - input, - } - } -} - -impl ReadExecutor for Limit { - fn execute(self, transaction: &T) -> BoxedExecutor { - self._execute(transaction) - } -} - -impl Limit { - #[try_stream(boxed, ok = Tuple, error = DatabaseError)] - pub async fn _execute(self, transaction: &T) { - let Limit { - offset, - limit, - input, - } = self; - - if limit.is_some() && limit.unwrap() == 0 { - return Ok(()); - } - - let offset_val = offset.unwrap_or(0); - let offset_limit = offset_val + limit.unwrap_or(1) - 1; - - #[for_await] - for (i, tuple) in build_read(input, transaction).enumerate() { - if i < offset_val { - continue; - } else if i > offset_limit { - break; - } - - yield tuple?; - } - } -} diff --git a/src/execution/volcano/dql/seq_scan.rs b/src/execution/volcano/dql/seq_scan.rs deleted file mode 100644 index ad9c671f..00000000 --- a/src/execution/volcano/dql/seq_scan.rs +++ /dev/null @@ -1,39 +0,0 @@ -use crate::errors::DatabaseError; -use crate::execution::volcano::{BoxedExecutor, ReadExecutor}; -use crate::planner::operator::scan::ScanOperator; -use crate::storage::{Iter, Transaction}; -use crate::types::tuple::Tuple; -use futures_async_stream::try_stream; - -pub(crate) struct SeqScan { - op: ScanOperator, -} - -impl From for SeqScan { - fn from(op: ScanOperator) -> Self { - SeqScan { op } - } -} - -impl ReadExecutor for SeqScan { - fn execute(self, transaction: &T) -> BoxedExecutor { - self._execute(transaction) - } -} - -impl SeqScan { - #[try_stream(boxed, ok = Tuple, error = DatabaseError)] - pub async fn _execute(self, transaction: &T) { - let ScanOperator { - table_name, - columns, - limit, - .. - } = self.op; - let mut iter = transaction.read(table_name, limit, columns)?; - - while let Some(tuple) = iter.next_tuple()? { - yield tuple; - } - } -} diff --git a/src/execution/volcano/dql/show_table.rs b/src/execution/volcano/dql/show_table.rs deleted file mode 100644 index b9862f08..00000000 --- a/src/execution/volcano/dql/show_table.rs +++ /dev/null @@ -1,34 +0,0 @@ -use crate::catalog::TableMeta; -use crate::errors::DatabaseError; -use crate::execution::volcano::{BoxedExecutor, ReadExecutor}; -use crate::storage::Transaction; -use crate::types::tuple::Tuple; -use crate::types::value::{DataValue, Utf8Type}; -use futures_async_stream::try_stream; -use sqlparser::ast::CharLengthUnits; -use std::sync::Arc; - -pub struct ShowTables; - -impl ReadExecutor for ShowTables { - fn execute(self, transaction: &T) -> BoxedExecutor { - self._execute(transaction) - } -} - -impl ShowTables { - #[try_stream(boxed, ok = Tuple, error = DatabaseError)] - pub async fn _execute(self, transaction: &T) { - let metas = transaction.table_metas()?; - - for TableMeta { table_name } in metas { - let values = vec![Arc::new(DataValue::Utf8 { - value: Some(table_name.to_string()), - ty: Utf8Type::Variable(None), - unit: CharLengthUnits::Characters, - })]; - - yield Tuple { id: None, values }; - } - } -} diff --git a/src/execution/volcano/dql/union.rs b/src/execution/volcano/dql/union.rs deleted file mode 100644 index 6f6cf763..00000000 --- a/src/execution/volcano/dql/union.rs +++ /dev/null @@ -1,45 +0,0 @@ -use crate::errors::DatabaseError; -use crate::execution::volcano::{build_read, BoxedExecutor, ReadExecutor}; -use crate::planner::LogicalPlan; -use crate::storage::Transaction; -use crate::types::tuple::Tuple; -use futures_async_stream::try_stream; - -pub struct Union { - left_input: LogicalPlan, - right_input: LogicalPlan, -} - -impl From<(LogicalPlan, LogicalPlan)> for Union { - fn from((left_input, right_input): (LogicalPlan, LogicalPlan)) -> Self { - Union { - left_input, - right_input, - } - } -} - -impl ReadExecutor for Union { - fn execute(self, transaction: &T) -> BoxedExecutor { - self._execute(transaction) - } -} - -impl Union { - #[try_stream(boxed, ok = Tuple, error = DatabaseError)] - pub async fn _execute(self, transaction: &T) { - let Union { - left_input, - right_input, - } = self; - - #[for_await] - for tuple in build_read(left_input, transaction) { - yield tuple?; - } - #[for_await] - for tuple in build_read(right_input, transaction) { - yield tuple?; - } - } -} diff --git a/src/execution/volcano/dql/values.rs b/src/execution/volcano/dql/values.rs deleted file mode 100644 index d276cc01..00000000 --- a/src/execution/volcano/dql/values.rs +++ /dev/null @@ -1,33 +0,0 @@ -use crate::errors::DatabaseError; -use crate::execution::volcano::{BoxedExecutor, ReadExecutor}; -use crate::planner::operator::values::ValuesOperator; -use crate::storage::Transaction; -use crate::types::tuple::Tuple; -use futures_async_stream::try_stream; - -pub struct Values { - op: ValuesOperator, -} - -impl From for Values { - fn from(op: ValuesOperator) -> Self { - Values { op } - } -} - -impl ReadExecutor for Values { - fn execute(self, _: &T) -> BoxedExecutor { - self._execute() - } -} - -impl Values { - #[try_stream(boxed, ok = Tuple, error = DatabaseError)] - pub async fn _execute(self) { - let ValuesOperator { rows, .. } = self.op; - - for values in rows { - yield Tuple { id: None, values }; - } - } -} diff --git a/src/execution/volcano/mod.rs b/src/execution/volcano/mod.rs deleted file mode 100644 index 472ace14..00000000 --- a/src/execution/volcano/mod.rs +++ /dev/null @@ -1,205 +0,0 @@ -pub(crate) mod ddl; -pub(crate) mod dml; -pub(crate) mod dql; - -use crate::errors::DatabaseError; -use crate::execution::volcano::ddl::create_index::CreateIndex; -use crate::execution::volcano::ddl::create_table::CreateTable; -use crate::execution::volcano::ddl::drop_column::DropColumn; -use crate::execution::volcano::ddl::drop_table::DropTable; -use crate::execution::volcano::ddl::truncate::Truncate; -use crate::execution::volcano::dml::analyze::Analyze; -use crate::execution::volcano::dml::copy_from_file::CopyFromFile; -use crate::execution::volcano::dml::delete::Delete; -use crate::execution::volcano::dml::insert::Insert; -use crate::execution::volcano::dml::update::Update; -use crate::execution::volcano::dql::aggregate::hash_agg::HashAggExecutor; -use crate::execution::volcano::dql::aggregate::simple_agg::SimpleAggExecutor; -use crate::execution::volcano::dql::describe::Describe; -use crate::execution::volcano::dql::dummy::Dummy; -use crate::execution::volcano::dql::explain::Explain; -use crate::execution::volcano::dql::filter::Filter; -use crate::execution::volcano::dql::index_scan::IndexScan; -use crate::execution::volcano::dql::join::hash_join::HashJoin; -use crate::execution::volcano::dql::limit::Limit; -use crate::execution::volcano::dql::projection::Projection; -use crate::execution::volcano::dql::seq_scan::SeqScan; -use crate::execution::volcano::dql::show_table::ShowTables; -use crate::execution::volcano::dql::sort::Sort; -use crate::execution::volcano::dql::union::Union; -use crate::execution::volcano::dql::values::Values; -use crate::planner::operator::join::JoinCondition; -use crate::planner::operator::{Operator, PhysicalOption}; -use crate::planner::LogicalPlan; -use crate::storage::Transaction; -use crate::types::index::IndexInfo; -use crate::types::tuple::Tuple; -use futures::stream::BoxStream; -use futures::TryStreamExt; - -use self::ddl::add_column::AddColumn; -use self::dql::join::nested_loop_join::NestedLoopJoin; - -pub type BoxedExecutor<'a> = BoxStream<'a, Result>; - -pub trait ReadExecutor { - fn execute(self, transaction: &T) -> BoxedExecutor; -} - -pub trait WriteExecutor { - fn execute_mut(self, transaction: &mut T) -> BoxedExecutor; -} - -pub fn build_read(plan: LogicalPlan, transaction: &T) -> BoxedExecutor { - let LogicalPlan { - operator, - mut childrens, - .. - } = plan; - - match operator { - Operator::Dummy => Dummy {}.execute(transaction), - Operator::Aggregate(op) => { - let input = childrens.pop().unwrap(); - - if op.groupby_exprs.is_empty() { - SimpleAggExecutor::from((op, input)).execute(transaction) - } else { - HashAggExecutor::from((op, input)).execute(transaction) - } - } - Operator::Filter(op) => { - let input = childrens.pop().unwrap(); - - Filter::from((op, input)).execute(transaction) - } - Operator::Join(op) => { - let right_input = childrens.pop().unwrap(); - let left_input = childrens.pop().unwrap(); - - match &op.on { - JoinCondition::On { on, .. } - if !on.is_empty() && plan.physical_option == Some(PhysicalOption::HashJoin) => - { - HashJoin::from((op, left_input, right_input)).execute(transaction) - } - _ => NestedLoopJoin::from((op, left_input, right_input)).execute(transaction), - } - } - Operator::Project(op) => { - let input = childrens.pop().unwrap(); - - Projection::from((op, input)).execute(transaction) - } - Operator::Scan(op) => { - if let Some(PhysicalOption::IndexScan(IndexInfo { - meta, - range: Some(range), - })) = plan.physical_option - { - IndexScan::from((op, meta, range)).execute(transaction) - } else { - SeqScan::from(op).execute(transaction) - } - } - Operator::Sort(op) => { - let input = childrens.pop().unwrap(); - - Sort::from((op, input)).execute(transaction) - } - Operator::Limit(op) => { - let input = childrens.pop().unwrap(); - - Limit::from((op, input)).execute(transaction) - } - Operator::Values(op) => Values::from(op).execute(transaction), - Operator::Show => ShowTables.execute(transaction), - Operator::Explain => { - let input = childrens.pop().unwrap(); - - Explain::from(input).execute(transaction) - } - Operator::Describe(op) => Describe::from(op).execute(transaction), - Operator::Union(_) => { - let right_input = childrens.pop().unwrap(); - let left_input = childrens.pop().unwrap(); - - Union::from((left_input, right_input)).execute(transaction) - } - _ => unreachable!(), - } -} - -pub fn build_write(plan: LogicalPlan, transaction: &mut T) -> BoxedExecutor { - let LogicalPlan { - operator, - mut childrens, - physical_option, - _output_schema_ref, - } = plan; - - match operator { - Operator::Insert(op) => { - let input = childrens.pop().unwrap(); - - Insert::from((op, input)).execute_mut(transaction) - } - Operator::Update(op) => { - let values = childrens.pop().unwrap(); - let input = childrens.pop().unwrap(); - - Update::from((op, input, values)).execute_mut(transaction) - } - Operator::Delete(op) => { - let input = childrens.pop().unwrap(); - - Delete::from((op, input)).execute_mut(transaction) - } - Operator::AddColumn(op) => { - let input = childrens.pop().unwrap(); - AddColumn::from((op, input)).execute_mut(transaction) - } - Operator::DropColumn(op) => { - let input = childrens.pop().unwrap(); - DropColumn::from((op, input)).execute_mut(transaction) - } - Operator::CreateTable(op) => CreateTable::from(op).execute_mut(transaction), - Operator::CreateIndex(op) => { - let input = childrens.pop().unwrap(); - - CreateIndex::from((op, input)).execute_mut(transaction) - } - Operator::DropTable(op) => DropTable::from(op).execute_mut(transaction), - Operator::Truncate(op) => Truncate::from(op).execute_mut(transaction), - Operator::CopyFromFile(op) => CopyFromFile::from(op).execute_mut(transaction), - #[warn(unused_assignments)] - Operator::CopyToFile(_op) => { - todo!() - } - Operator::Analyze(op) => { - let input = childrens.pop().unwrap(); - - Analyze::from((op, input)).execute_mut(transaction) - } - operator => build_read( - LogicalPlan { - operator, - childrens, - physical_option, - _output_schema_ref, - }, - transaction, - ), - } -} - -pub async fn try_collect<'a>( - executor: &mut BoxedExecutor<'a>, -) -> Result, DatabaseError> { - let mut output = Vec::new(); - - while let Some(tuple) = executor.try_next().await? { - output.push(tuple); - } - Ok(output) -} diff --git a/src/expression/range_detacher.rs b/src/expression/range_detacher.rs index afc33d4b..a44c96a0 100644 --- a/src/expression/range_detacher.rs +++ b/src/expression/range_detacher.rs @@ -789,7 +789,7 @@ mod test { use crate::planner::operator::filter::FilterOperator; use crate::planner::operator::Operator; use crate::planner::LogicalPlan; - use crate::storage::kipdb::KipTransaction; + use crate::storage::rocksdb::RocksTransaction; use crate::types::value::DataValue; use std::ops::Bound; use std::sync::Arc; @@ -801,7 +801,7 @@ mod test { HepBatchStrategy::once_topdown(), vec![NormalizationRuleImpl::SimplifyFilter], ) - .find_best::(None)?; + .find_best::(None)?; if let Operator::Filter(filter_op) = best_plan.childrens[0].clone().operator { Ok(Some(filter_op)) } else { @@ -809,24 +809,24 @@ mod test { } } - #[tokio::test] - async fn test_detach_ideal_cases() -> Result<(), DatabaseError> { + #[test] + fn test_detach_ideal_cases() -> Result<(), DatabaseError> { { - let plan = select_sql_run("select * from t1 where c1 = 1").await?; + let plan = select_sql_run("select * from t1 where c1 = 1")?; let op = plan_filter(plan)?.unwrap(); let range = RangeDetacher::new("t1", &0).detach(&op.predicate).unwrap(); println!("c1 = 1 => {}", range); assert_eq!(range, Range::Eq(Arc::new(DataValue::Int32(Some(1))))) } { - let plan = select_sql_run("select * from t1 where c1 != 1").await?; + let plan = select_sql_run("select * from t1 where c1 != 1")?; let op = plan_filter(plan)?.unwrap(); let range = RangeDetacher::new("t1", &0).detach(&op.predicate); println!("c1 != 1 => {:#?}", range); assert_eq!(range, None) } { - let plan = select_sql_run("select * from t1 where c1 > 1").await?; + let plan = select_sql_run("select * from t1 where c1 > 1")?; let op = plan_filter(plan)?.unwrap(); let range = RangeDetacher::new("t1", &0).detach(&op.predicate).unwrap(); println!("c1 > 1 => c1: {}", range); @@ -839,7 +839,7 @@ mod test { ) } { - let plan = select_sql_run("select * from t1 where c1 >= 1").await?; + let plan = select_sql_run("select * from t1 where c1 >= 1")?; let op = plan_filter(plan)?.unwrap(); let range = RangeDetacher::new("t1", &0).detach(&op.predicate).unwrap(); println!("c1 >= 1 => c1: {}", range); @@ -852,7 +852,7 @@ mod test { ) } { - let plan = select_sql_run("select * from t1 where c1 < 1").await?; + let plan = select_sql_run("select * from t1 where c1 < 1")?; let op = plan_filter(plan)?.unwrap(); let range = RangeDetacher::new("t1", &0).detach(&op.predicate).unwrap(); println!("c1 < 1 => c1: {}", range); @@ -865,7 +865,7 @@ mod test { ) } { - let plan = select_sql_run("select * from t1 where c1 <= 1").await?; + let plan = select_sql_run("select * from t1 where c1 <= 1")?; let op = plan_filter(plan)?.unwrap(); let range = RangeDetacher::new("t1", &0).detach(&op.predicate).unwrap(); println!("c1 <= 1 => c1: {}", range); @@ -878,7 +878,7 @@ mod test { ) } { - let plan = select_sql_run("select * from t1 where c1 < 1 and c1 >= 0").await?; + let plan = select_sql_run("select * from t1 where c1 < 1 and c1 >= 0")?; let op = plan_filter(plan)?.unwrap(); let range = RangeDetacher::new("t1", &0).detach(&op.predicate).unwrap(); println!("c1 < 1 and c1 >= 0 => c1: {}", range); @@ -891,7 +891,7 @@ mod test { ) } { - let plan = select_sql_run("select * from t1 where c1 < 1 or c1 >= 0").await?; + let plan = select_sql_run("select * from t1 where c1 < 1 or c1 >= 0")?; let op = plan_filter(plan)?.unwrap(); let range = RangeDetacher::new("t1", &0).detach(&op.predicate).unwrap(); println!("c1 < 1 or c1 >= 0 => c1: {}", range); @@ -905,14 +905,14 @@ mod test { } // and & or { - let plan = select_sql_run("select * from t1 where c1 = 1 and c1 = 0").await?; + let plan = select_sql_run("select * from t1 where c1 = 1 and c1 = 0")?; let op = plan_filter(plan)?.unwrap(); let range = RangeDetacher::new("t1", &0).detach(&op.predicate).unwrap(); println!("c1 = 1 and c1 = 0 => c1: {}", range); assert_eq!(range, Range::Dummy) } { - let plan = select_sql_run("select * from t1 where c1 = 1 or c1 = 0").await?; + let plan = select_sql_run("select * from t1 where c1 = 1 or c1 = 0")?; let op = plan_filter(plan)?.unwrap(); let range = RangeDetacher::new("t1", &0).detach(&op.predicate).unwrap(); println!("c1 = 1 or c1 = 0 => c1: {}", range); @@ -925,14 +925,14 @@ mod test { ) } { - let plan = select_sql_run("select * from t1 where c1 = 1 and c1 = 1").await?; + let plan = select_sql_run("select * from t1 where c1 = 1 and c1 = 1")?; let op = plan_filter(plan)?.unwrap(); let range = RangeDetacher::new("t1", &0).detach(&op.predicate).unwrap(); println!("c1 = 1 and c1 = 1 => c1: {}", range); assert_eq!(range, Range::Eq(Arc::new(DataValue::Int32(Some(1))))) } { - let plan = select_sql_run("select * from t1 where c1 = 1 or c1 = 1").await?; + let plan = select_sql_run("select * from t1 where c1 = 1 or c1 = 1")?; let op = plan_filter(plan)?.unwrap(); let range = RangeDetacher::new("t1", &0).detach(&op.predicate).unwrap(); println!("c1 = 1 or c1 = 1 => c1: {}", range); @@ -940,21 +940,21 @@ mod test { } { - let plan = select_sql_run("select * from t1 where c1 > 1 and c1 = 1").await?; + let plan = select_sql_run("select * from t1 where c1 > 1 and c1 = 1")?; let op = plan_filter(plan)?.unwrap(); let range = RangeDetacher::new("t1", &0).detach(&op.predicate).unwrap(); println!("c1 > 1 and c1 = 1 => c1: {}", range); assert_eq!(range, Range::Dummy) } { - let plan = select_sql_run("select * from t1 where c1 >= 1 and c1 = 1").await?; + let plan = select_sql_run("select * from t1 where c1 >= 1 and c1 = 1")?; let op = plan_filter(plan)?.unwrap(); let range = RangeDetacher::new("t1", &0).detach(&op.predicate).unwrap(); println!("c1 >= 1 and c1 = 1 => c1: {}", range); assert_eq!(range, Range::Eq(Arc::new(DataValue::Int32(Some(1))))) } { - let plan = select_sql_run("select * from t1 where c1 > 1 or c1 = 1").await?; + let plan = select_sql_run("select * from t1 where c1 > 1 or c1 = 1")?; let op = plan_filter(plan)?.unwrap(); let range = RangeDetacher::new("t1", &0).detach(&op.predicate).unwrap(); println!("c1 > 1 or c1 = 1 => c1: {}", range); @@ -967,7 +967,7 @@ mod test { ) } { - let plan = select_sql_run("select * from t1 where c1 >= 1 or c1 = 1").await?; + let plan = select_sql_run("select * from t1 where c1 >= 1 or c1 = 1")?; let op = plan_filter(plan)?.unwrap(); let range = RangeDetacher::new("t1", &0).detach(&op.predicate).unwrap(); println!("c1 >= 1 or c1 = 1 => c1: {}", range); @@ -983,8 +983,7 @@ mod test { { let plan = select_sql_run( "select * from t1 where (c1 > 0 and c1 < 3) and (c1 > 1 and c1 < 4)", - ) - .await?; + )?; let op = plan_filter(plan)?.unwrap(); let range = RangeDetacher::new("t1", &0).detach(&op.predicate).unwrap(); println!( @@ -1000,9 +999,9 @@ mod test { ) } { - let plan = - select_sql_run("select * from t1 where (c1 > 0 and c1 < 3) or (c1 > 1 and c1 < 4)") - .await?; + let plan = select_sql_run( + "select * from t1 where (c1 > 0 and c1 < 3) or (c1 > 1 and c1 < 4)", + )?; let op = plan_filter(plan)?.unwrap(); let range = RangeDetacher::new("t1", &0).detach(&op.predicate).unwrap(); println!( @@ -1021,8 +1020,7 @@ mod test { { let plan = select_sql_run( "select * from t1 where ((c1 > 0 and c1 < 3) and (c1 > 1 and c1 < 4)) and c1 = 0", - ) - .await?; + )?; let op = plan_filter(plan)?.unwrap(); let range = RangeDetacher::new("t1", &0).detach(&op.predicate).unwrap(); println!( @@ -1034,8 +1032,7 @@ mod test { { let plan = select_sql_run( "select * from t1 where ((c1 > 0 and c1 < 3) or (c1 > 1 and c1 < 4)) and c1 = 0", - ) - .await?; + )?; let op = plan_filter(plan)?.unwrap(); let range = RangeDetacher::new("t1", &0).detach(&op.predicate).unwrap(); println!( @@ -1047,8 +1044,7 @@ mod test { { let plan = select_sql_run( "select * from t1 where ((c1 > 0 and c1 < 3) and (c1 > 1 and c1 < 4)) or c1 = 0", - ) - .await?; + )?; let op = plan_filter(plan)?.unwrap(); let range = RangeDetacher::new("t1", &0).detach(&op.predicate).unwrap(); println!( @@ -1069,8 +1065,7 @@ mod test { { let plan = select_sql_run( "select * from t1 where ((c1 > 0 and c1 < 3) or (c1 > 1 and c1 < 4)) or c1 = 0", - ) - .await?; + )?; let op = plan_filter(plan)?.unwrap(); let range = RangeDetacher::new("t1", &0).detach(&op.predicate).unwrap(); println!( @@ -1087,14 +1082,14 @@ mod test { } { - let plan = select_sql_run("select * from t1 where (((c1 > 0 and c1 < 3) and (c1 > 1 and c1 < 4)) and c1 = 0) and (c1 >= 0 and c1 <= 2)").await?; + let plan = select_sql_run("select * from t1 where (((c1 > 0 and c1 < 3) and (c1 > 1 and c1 < 4)) and c1 = 0) and (c1 >= 0 and c1 <= 2)")?; let op = plan_filter(plan)?.unwrap(); let range = RangeDetacher::new("t1", &0).detach(&op.predicate).unwrap(); println!("(((c1 > 0 and c1 < 3) and (c1 > 1 and c1 < 4)) and c1 = 0) and (c1 >= 0 and c1 <= 2) => c1: {}", range); assert_eq!(range, Range::Dummy) } { - let plan = select_sql_run("select * from t1 where (((c1 > 0 and c1 < 3) and (c1 > 1 and c1 < 4)) and c1 = 0) or (c1 >= 0 and c1 <= 2)").await?; + let plan = select_sql_run("select * from t1 where (((c1 > 0 and c1 < 3) and (c1 > 1 and c1 < 4)) and c1 = 0) or (c1 >= 0 and c1 <= 2)")?; let op = plan_filter(plan)?.unwrap(); let range = RangeDetacher::new("t1", &0).detach(&op.predicate).unwrap(); println!("(((c1 > 0 and c1 < 3) and (c1 > 1 and c1 < 4)) and c1 = 0) or (c1 >= 0 and c1 <= 2) => c1: {}", range); @@ -1108,7 +1103,7 @@ mod test { } // ranges and ranges { - let plan = select_sql_run("select * from t1 where ((c1 < 2 and c1 > 0) or (c1 < 6 and c1 > 4)) and ((c1 < 3 and c1 > 1) or (c1 < 7 and c1 > 5))").await?; + let plan = select_sql_run("select * from t1 where ((c1 < 2 and c1 > 0) or (c1 < 6 and c1 > 4)) and ((c1 < 3 and c1 > 1) or (c1 < 7 and c1 > 5))")?; let op = plan_filter(plan)?.unwrap(); let range = RangeDetacher::new("t1", &0).detach(&op.predicate).unwrap(); println!("((c1 < 2 and c1 > 0) or (c1 < 6 and c1 > 4)) and ((c1 < 3 and c1 > 1) or (c1 < 7 and c1 > 5)) => c1: {}", range); @@ -1127,7 +1122,7 @@ mod test { ) } { - let plan = select_sql_run("select * from t1 where ((c1 < 2 and c1 > 0) or (c1 < 6 and c1 > 4)) or ((c1 < 3 and c1 > 1) or (c1 < 7 and c1 > 5))").await?; + let plan = select_sql_run("select * from t1 where ((c1 < 2 and c1 > 0) or (c1 < 6 and c1 > 4)) or ((c1 < 3 and c1 > 1) or (c1 < 7 and c1 > 5))")?; let op = plan_filter(plan)?.unwrap(); let range = RangeDetacher::new("t1", &0).detach(&op.predicate).unwrap(); println!("((c1 < 2 and c1 > 0) or (c1 < 6 and c1 > 4)) or ((c1 < 3 and c1 > 1) or (c1 < 7 and c1 > 5)) => c1: {}", range); @@ -1147,7 +1142,7 @@ mod test { } // empty { - let plan = select_sql_run("select * from t1 where true").await?; + let plan = select_sql_run("select * from t1 where true")?; let op = plan_filter(plan)?.unwrap(); let range = RangeDetacher::new("t1", &0).detach(&op.predicate); println!("empty => c1: {:#?}", range); @@ -1155,21 +1150,21 @@ mod test { } // other column { - let plan = select_sql_run("select * from t1 where c2 = 1").await?; + let plan = select_sql_run("select * from t1 where c2 = 1")?; let op = plan_filter(plan)?.unwrap(); let range = RangeDetacher::new("t1", &0).detach(&op.predicate); println!("c2 = 1 => c1: {:#?}", range); assert_eq!(range, None) } { - let plan = select_sql_run("select * from t1 where c1 > 1 or c2 > 1").await?; + let plan = select_sql_run("select * from t1 where c1 > 1 or c2 > 1")?; let op = plan_filter(plan)?.unwrap(); let range = RangeDetacher::new("t1", &0).detach(&op.predicate); println!("c1 > 1 or c2 > 1 => c1: {:#?}", range); assert_eq!(range, None) } { - let plan = select_sql_run("select * from t1 where c1 > c2 or c2 > 1").await?; + let plan = select_sql_run("select * from t1 where c1 > c2 or c2 > 1")?; let op = plan_filter(plan)?.unwrap(); let range = RangeDetacher::new("t1", &0).detach(&op.predicate); println!("c1 > c2 or c2 > 1 => c1: {:#?}", range); @@ -1179,8 +1174,7 @@ mod test { { let plan = select_sql_run( "select * from t1 where c1 = 5 or (c1 > 5 and (c1 > 6 or c1 < 8) and c1 < 12)", - ) - .await?; + )?; let op = plan_filter(plan)?.unwrap(); let range = RangeDetacher::new("t1", &0).detach(&op.predicate).unwrap(); println!( @@ -1199,8 +1193,7 @@ mod test { { let plan = select_sql_run( "select * from t1 where ((c2 >= -8 and -4 >= c1) or (c1 >= 0 and 5 > c2)) and ((c2 > 0 and c1 <= 1) or (c1 > -8 and c2 < -6))", - ) - .await?; + )?; let op = plan_filter(plan)?.unwrap(); let range = RangeDetacher::new("t1", &0).detach(&op.predicate).unwrap(); println!( @@ -1226,18 +1219,18 @@ mod test { } // Tips: `null` should be First - #[tokio::test] - async fn test_detach_null_cases() -> Result<(), DatabaseError> { + #[test] + fn test_detach_null_cases() -> Result<(), DatabaseError> { // eq { - let plan = select_sql_run("select * from t1 where c1 = null").await?; + let plan = select_sql_run("select * from t1 where c1 = null")?; let op = plan_filter(plan)?.unwrap(); let range = RangeDetacher::new("t1", &0).detach(&op.predicate).unwrap(); println!("c1 = null => c1: {}", range); assert_eq!(range, Range::Eq(Arc::new(DataValue::Int32(None)))) } { - let plan = select_sql_run("select * from t1 where c1 = null or c1 = 1").await?; + let plan = select_sql_run("select * from t1 where c1 = null or c1 = 1")?; let op = plan_filter(plan)?.unwrap(); let range = RangeDetacher::new("t1", &0).detach(&op.predicate).unwrap(); println!("c1 = null or c1 = 1 => c1: {}", range); @@ -1250,7 +1243,7 @@ mod test { ) } { - let plan = select_sql_run("select * from t1 where c1 = null or c1 < 5").await?; + let plan = select_sql_run("select * from t1 where c1 = null or c1 < 5")?; let op = plan_filter(plan)?.unwrap(); let range = RangeDetacher::new("t1", &0).detach(&op.predicate).unwrap(); println!("c1 = null or c1 < 5 => c1: {}", range); @@ -1263,8 +1256,7 @@ mod test { ) } { - let plan = - select_sql_run("select * from t1 where c1 = null or (c1 > 1 and c1 < 5)").await?; + let plan = select_sql_run("select * from t1 where c1 = null or (c1 > 1 and c1 < 5)")?; let op = plan_filter(plan)?.unwrap(); let range = RangeDetacher::new("t1", &0).detach(&op.predicate).unwrap(); println!("c1 = null or (c1 > 1 and c1 < 5) => c1: {}", range); @@ -1280,15 +1272,14 @@ mod test { ) } { - let plan = select_sql_run("select * from t1 where c1 = null and c1 < 5").await?; + let plan = select_sql_run("select * from t1 where c1 = null and c1 < 5")?; let op = plan_filter(plan)?.unwrap(); let range = RangeDetacher::new("t1", &0).detach(&op.predicate).unwrap(); println!("c1 = null and c1 < 5 => c1: {}", range); assert_eq!(range, Range::Eq(Arc::new(DataValue::Int32(None)))) } { - let plan = - select_sql_run("select * from t1 where c1 = null and (c1 > 1 and c1 < 5)").await?; + let plan = select_sql_run("select * from t1 where c1 = null and (c1 > 1 and c1 < 5)")?; let op = plan_filter(plan)?.unwrap(); let range = RangeDetacher::new("t1", &0).detach(&op.predicate).unwrap(); println!("c1 = null and (c1 > 1 and c1 < 5) => c1: {}", range); @@ -1296,36 +1287,35 @@ mod test { } // noteq { - let plan = select_sql_run("select * from t1 where c1 != null").await?; + let plan = select_sql_run("select * from t1 where c1 != null")?; let op = plan_filter(plan)?.unwrap(); let range = RangeDetacher::new("t1", &0).detach(&op.predicate); println!("c1 != null => c1: {:#?}", range); assert_eq!(range, None) } { - let plan = select_sql_run("select * from t1 where c1 = null or c1 != 1").await?; + let plan = select_sql_run("select * from t1 where c1 = null or c1 != 1")?; let op = plan_filter(plan)?.unwrap(); let range = RangeDetacher::new("t1", &0).detach(&op.predicate); println!("c1 = null or c1 != 1 => c1: {:#?}", range); assert_eq!(range, None) } { - let plan = select_sql_run("select * from t1 where c1 != null or c1 < 5").await?; + let plan = select_sql_run("select * from t1 where c1 != null or c1 < 5")?; let op = plan_filter(plan)?.unwrap(); let range = RangeDetacher::new("t1", &0).detach(&op.predicate); println!("c1 != null or c1 < 5 => c1: {:#?}", range); assert_eq!(range, None) } { - let plan = - select_sql_run("select * from t1 where c1 != null or (c1 > 1 and c1 < 5)").await?; + let plan = select_sql_run("select * from t1 where c1 != null or (c1 > 1 and c1 < 5)")?; let op = plan_filter(plan)?.unwrap(); let range = RangeDetacher::new("t1", &0).detach(&op.predicate); println!("c1 != null or (c1 > 1 and c1 < 5) => c1: {:#?}", range); assert_eq!(range, None) } { - let plan = select_sql_run("select * from t1 where c1 != null and c1 < 5").await?; + let plan = select_sql_run("select * from t1 where c1 != null and c1 < 5")?; let op = plan_filter(plan)?.unwrap(); let range = RangeDetacher::new("t1", &0).detach(&op.predicate).unwrap(); println!("c1 != null and c1 < 5 => c1: {}", range); @@ -1338,8 +1328,7 @@ mod test { ) } { - let plan = - select_sql_run("select * from t1 where c1 != null and (c1 > 1 and c1 < 5)").await?; + let plan = select_sql_run("select * from t1 where c1 != null and (c1 > 1 and c1 < 5)")?; let op = plan_filter(plan)?.unwrap(); let range = RangeDetacher::new("t1", &0).detach(&op.predicate).unwrap(); println!("c1 != null and (c1 > 1 and c1 < 5) => c1: {}", range); @@ -1352,7 +1341,7 @@ mod test { ) } { - let plan = select_sql_run("select * from t1 where (c1 = null or (c1 < 2 and c1 > 0) or (c1 < 6 and c1 > 4)) or ((c1 < 3 and c1 > 1) or (c1 < 7 and c1 > 5))").await?; + let plan = select_sql_run("select * from t1 where (c1 = null or (c1 < 2 and c1 > 0) or (c1 < 6 and c1 > 4)) or ((c1 < 3 and c1 > 1) or (c1 < 7 and c1 > 5))")?; let op = plan_filter(plan)?.unwrap(); let range = RangeDetacher::new("t1", &0).detach(&op.predicate).unwrap(); println!("(c1 = null or (c1 < 2 and c1 > 0) or (c1 < 6 and c1 > 4)) or ((c1 < 3 and c1 > 1) or (c1 < 7 and c1 > 5)) => c1: {}", range); @@ -1372,7 +1361,7 @@ mod test { ) } { - let plan = select_sql_run("select * from t1 where ((c1 < 2 and c1 > 0) or (c1 < 6 and c1 > 4)) or (c1 = null or (c1 < 3 and c1 > 1) or (c1 < 7 and c1 > 5))").await?; + let plan = select_sql_run("select * from t1 where ((c1 < 2 and c1 > 0) or (c1 < 6 and c1 > 4)) or (c1 = null or (c1 < 3 and c1 > 1) or (c1 < 7 and c1 > 5))")?; let op = plan_filter(plan)?.unwrap(); let range = RangeDetacher::new("t1", &0).detach(&op.predicate).unwrap(); println!("((c1 < 2 and c1 > 0) or (c1 < 6 and c1 > 4)) or (c1 = null or (c1 < 3 and c1 > 1) or (c1 < 7 and c1 > 5)) => c1: {}", range); @@ -1392,7 +1381,7 @@ mod test { ) } { - let plan = select_sql_run("select * from t1 where (c1 = null or (c1 < 2 and c1 > 0) or (c1 < 6 and c1 > 4)) and ((c1 < 3 and c1 > 1) or (c1 < 7 and c1 > 5))").await?; + let plan = select_sql_run("select * from t1 where (c1 = null or (c1 < 2 and c1 > 0) or (c1 < 6 and c1 > 4)) and ((c1 < 3 and c1 > 1) or (c1 < 7 and c1 > 5))")?; let op = plan_filter(plan)?.unwrap(); let range = RangeDetacher::new("t1", &0).detach(&op.predicate).unwrap(); println!("(c1 = null or (c1 < 2 and c1 > 0) or (c1 < 6 and c1 > 4)) and ((c1 < 3 and c1 > 1) or (c1 < 7 and c1 > 5)) => c1: {}", range); @@ -1411,7 +1400,7 @@ mod test { ) } { - let plan = select_sql_run("select * from t1 where ((c1 < 2 and c1 > 0) or (c1 < 6 and c1 > 4)) and (c1 = null or (c1 < 3 and c1 > 1) or (c1 < 7 and c1 > 5))").await?; + let plan = select_sql_run("select * from t1 where ((c1 < 2 and c1 > 0) or (c1 < 6 and c1 > 4)) and (c1 = null or (c1 < 3 and c1 > 1) or (c1 < 7 and c1 > 5))")?; let op = plan_filter(plan)?.unwrap(); let range = RangeDetacher::new("t1", &0).detach(&op.predicate).unwrap(); println!("((c1 < 2 and c1 > 0) or (c1 < 6 and c1 > 4)) and (c1 = null or (c1 < 3 and c1 > 1) or (c1 < 7 and c1 > 5)) => c1: {}", range); diff --git a/src/lib.rs b/src/lib.rs index 964f3625..117d3392 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -2,6 +2,7 @@ #![allow(unused_doc_comments)] #![feature(result_flattening)] #![feature(coroutines)] +#![feature(coroutine_trait)] #![feature(iterator_try_collect)] #![feature(slice_pattern)] #![feature(is_sorted)] @@ -21,3 +22,4 @@ pub mod planner; pub mod storage; pub mod types; mod udf; +pub(crate) mod utils; diff --git a/src/main.rs b/src/main.rs new file mode 100644 index 00000000..209b9b6d --- /dev/null +++ b/src/main.rs @@ -0,0 +1,2 @@ +// TODO +fn main() {} diff --git a/src/marcos/mod.rs b/src/marcos/mod.rs index 8d1b8729..f3b1e5c8 100644 --- a/src/marcos/mod.rs +++ b/src/marcos/mod.rs @@ -63,7 +63,7 @@ macro_rules! implement_from_tuple { /// let fnck_sql = DataBaseBuilder::path("./example") /// .register_function(TestFunction::new()) /// .build() -/// .await?; +/// ?; /// ``` #[macro_export] macro_rules! function { diff --git a/src/optimizer/core/histogram.rs b/src/optimizer/core/histogram.rs index f3adb672..4d3e7620 100644 --- a/src/optimizer/core/histogram.rs +++ b/src/optimizer/core/histogram.rs @@ -1,5 +1,5 @@ use crate::errors::DatabaseError; -use crate::execution::volcano::dql::sort::radix_sort; +use crate::execution::dql::sort::radix_sort; use crate::expression::range_detacher::Range; use crate::expression::BinaryOperator; use crate::optimizer::core::cm_sketch::CountMinSketch; diff --git a/src/optimizer/core/memo.rs b/src/optimizer/core/memo.rs index 2df1ee44..1d18bc4a 100644 --- a/src/optimizer/core/memo.rs +++ b/src/optimizer/core/memo.rs @@ -91,7 +91,7 @@ mod tests { use crate::optimizer::rule::implementation::ImplementationRuleImpl; use crate::optimizer::rule::normalization::NormalizationRuleImpl; use crate::planner::operator::PhysicalOption; - use crate::storage::kipdb::KipTransaction; + use crate::storage::rocksdb::RocksTransaction; use crate::storage::{Storage, Transaction}; use crate::types::index::{IndexInfo, IndexMeta, IndexType}; use crate::types::value::DataValue; @@ -102,25 +102,19 @@ mod tests { use std::sync::Arc; use tempfile::TempDir; - #[tokio::test] - async fn test_build_memo() -> Result<(), DatabaseError> { + #[test] + fn test_build_memo() -> Result<(), DatabaseError> { let temp_dir = TempDir::new().expect("unable to create temporary working directory"); - let database = DataBaseBuilder::path(temp_dir.path()).build().await?; - database - .run("create table t1 (c1 int primary key, c2 int)") - .await?; - database - .run("create table t2 (c3 int primary key, c4 int)") - .await?; + let database = DataBaseBuilder::path(temp_dir.path()).build()?; + database.run("create table t1 (c1 int primary key, c2 int)")?; + database.run("create table t2 (c3 int primary key, c4 int)")?; for i in 0..1000 { - let _ = database - .run(format!("insert into t1 values({}, {})", i, i + 1).as_str()) - .await?; + let _ = database.run(format!("insert into t1 values({}, {})", i, i + 1).as_str())?; } - database.run("analyze table t1").await?; + database.run("analyze table t1")?; - let transaction = database.storage.transaction().await?; + let transaction = database.storage.transaction()?; let functions = Default::default(); let mut binder = Binder::new( BinderContext::new(&transaction, &functions, Arc::new(AtomicUsize::new(0))), @@ -145,7 +139,7 @@ mod tests { NormalizationRuleImpl::PushPredicateIntoScan, ], ) - .find_best::(None)?; + .find_best::(None)?; let graph = HepGraph::new(best_plan); let rules = vec![ ImplementationRuleImpl::Projection, diff --git a/src/optimizer/core/statistics_meta.rs b/src/optimizer/core/statistics_meta.rs index 2b941e78..e5eab3db 100644 --- a/src/optimizer/core/statistics_meta.rs +++ b/src/optimizer/core/statistics_meta.rs @@ -34,10 +34,9 @@ impl<'a, T: Transaction> StatisticMetaLoader<'a, T> { return Ok(Some(statistics_meta)); } if let Some(path) = self.tx.table_meta_path(table_name.as_str(), index_id)? { - let statistics_meta = StatisticsMeta::from_file(path)?; - Ok(Some( - self.cache.get_or_insert(key, |_| Ok(statistics_meta))?, + self.cache + .get_or_insert(key, |_| StatisticsMeta::from_file(path))?, )) } else { Ok(None) @@ -84,6 +83,7 @@ impl StatisticsMeta { .create(true) .write(true) .read(true) + .truncate(false) .open(path)?; bincode::serialize_into(&mut file, self)?; file.flush()?; @@ -96,6 +96,7 @@ impl StatisticsMeta { .create(true) .write(true) .read(true) + .truncate(false) .open(path)?; Ok(bincode::deserialize_from(file)?) } diff --git a/src/optimizer/heuristic/graph.rs b/src/optimizer/heuristic/graph.rs index 32f13102..acc46260 100644 --- a/src/optimizer/heuristic/graph.rs +++ b/src/optimizer/heuristic/graph.rs @@ -221,9 +221,9 @@ mod tests { use crate::planner::LogicalPlan; use petgraph::stable_graph::{EdgeIndex, NodeIndex}; - #[tokio::test] - async fn test_graph_for_plan() -> Result<(), DatabaseError> { - let plan = select_sql_run("select * from t1 left join t2 on c1 = c3").await?; + #[test] + fn test_graph_for_plan() -> Result<(), DatabaseError> { + let plan = select_sql_run("select * from t1 left join t2 on c1 = c3")?; let graph = HepGraph::new(plan); assert!(graph @@ -243,9 +243,9 @@ mod tests { Ok(()) } - #[tokio::test] - async fn test_graph_add_node() -> Result<(), DatabaseError> { - let plan = select_sql_run("select * from t1 left join t2 on c1 = c3").await?; + #[test] + fn test_graph_add_node() -> Result<(), DatabaseError> { + let plan = select_sql_run("select * from t1 left join t2 on c1 = c3")?; let mut graph = HepGraph::new(plan); graph.add_node(HepNodeId::new(1), None, Operator::Dummy); @@ -271,9 +271,9 @@ mod tests { Ok(()) } - #[tokio::test] - async fn test_graph_replace_node() -> Result<(), DatabaseError> { - let plan = select_sql_run("select * from t1 left join t2 on c1 = c3").await?; + #[test] + fn test_graph_replace_node() -> Result<(), DatabaseError> { + let plan = select_sql_run("select * from t1 left join t2 on c1 = c3")?; let mut graph = HepGraph::new(plan); graph.replace_node(HepNodeId::new(1), Operator::Dummy); @@ -283,9 +283,9 @@ mod tests { Ok(()) } - #[tokio::test] - async fn test_graph_remove_middle_node_by_single() -> Result<(), DatabaseError> { - let plan = select_sql_run("select * from t1 left join t2 on c1 = c3").await?; + #[test] + fn test_graph_remove_middle_node_by_single() -> Result<(), DatabaseError> { + let plan = select_sql_run("select * from t1 left join t2 on c1 = c3")?; let mut graph = HepGraph::new(plan); graph.remove_node(HepNodeId::new(1), false); @@ -302,9 +302,9 @@ mod tests { Ok(()) } - #[tokio::test] - async fn test_graph_remove_middle_node_with_childrens() -> Result<(), DatabaseError> { - let plan = select_sql_run("select * from t1 left join t2 on c1 = c3").await?; + #[test] + fn test_graph_remove_middle_node_with_childrens() -> Result<(), DatabaseError> { + let plan = select_sql_run("select * from t1 left join t2 on c1 = c3")?; let mut graph = HepGraph::new(plan); graph.remove_node(HepNodeId::new(1), true); @@ -314,9 +314,9 @@ mod tests { Ok(()) } - #[tokio::test] - async fn test_graph_swap_node() -> Result<(), DatabaseError> { - let plan = select_sql_run("select * from t1 left join t2 on c1 = c3").await?; + #[test] + fn test_graph_swap_node() -> Result<(), DatabaseError> { + let plan = select_sql_run("select * from t1 left join t2 on c1 = c3")?; let mut graph = HepGraph::new(plan); let before_op_0 = graph.operator(HepNodeId::new(0)).clone(); @@ -333,9 +333,9 @@ mod tests { Ok(()) } - #[tokio::test] - async fn test_graph_add_root() -> Result<(), DatabaseError> { - let plan = select_sql_run("select * from t1 left join t2 on c1 = c3").await?; + #[test] + fn test_graph_add_root() -> Result<(), DatabaseError> { + let plan = select_sql_run("select * from t1 left join t2 on c1 = c3")?; let mut graph = HepGraph::new(plan); graph.add_root(Operator::Dummy); @@ -349,8 +349,8 @@ mod tests { Ok(()) } - #[tokio::test] - async fn test_graph_to_plan() -> Result<(), DatabaseError> { + #[test] + fn test_graph_to_plan() -> Result<(), DatabaseError> { fn clear_output_schema_buf(plan: &mut LogicalPlan) { plan._output_schema_ref = None; @@ -358,7 +358,7 @@ mod tests { clear_output_schema_buf(child); } } - let mut plan = select_sql_run("select * from t1 left join t2 on c1 = c3").await?; + let mut plan = select_sql_run("select * from t1 left join t2 on c1 = c3")?; clear_output_schema_buf(&mut plan); let graph = HepGraph::new(plan.clone()); diff --git a/src/optimizer/heuristic/matcher.rs b/src/optimizer/heuristic/matcher.rs index 4522bb4b..a7208771 100644 --- a/src/optimizer/heuristic/matcher.rs +++ b/src/optimizer/heuristic/matcher.rs @@ -65,9 +65,9 @@ mod tests { use crate::planner::operator::Operator; use crate::planner::LogicalPlan; - #[tokio::test] - async fn test_predicate() -> Result<(), DatabaseError> { - let plan = select_sql_run("select * from t1").await?; + #[test] + fn test_predicate() -> Result<(), DatabaseError> { + let plan = select_sql_run("select * from t1")?; let graph = HepGraph::new(plan.clone()); let project_into_table_scan_pattern = Pattern { diff --git a/src/optimizer/rule/normalization/column_pruning.rs b/src/optimizer/rule/normalization/column_pruning.rs index ce9ad9ad..f2ce11dc 100644 --- a/src/optimizer/rule/normalization/column_pruning.rs +++ b/src/optimizer/rule/normalization/column_pruning.rs @@ -200,11 +200,11 @@ mod tests { use crate::optimizer::rule::normalization::NormalizationRuleImpl; use crate::planner::operator::join::JoinCondition; use crate::planner::operator::Operator; - use crate::storage::kipdb::KipTransaction; + use crate::storage::rocksdb::RocksTransaction; - #[tokio::test] - async fn test_column_pruning() -> Result<(), DatabaseError> { - let plan = select_sql_run("select c1, c3 from t1 left join t2 on c1 = c3").await?; + #[test] + fn test_column_pruning() -> Result<(), DatabaseError> { + let plan = select_sql_run("select c1, c3 from t1 left join t2 on c1 = c3")?; let best_plan = HepOptimizer::new(plan.clone()) .batch( @@ -212,7 +212,7 @@ mod tests { HepBatchStrategy::once_topdown(), vec![NormalizationRuleImpl::ColumnPruning], ) - .find_best::(None)?; + .find_best::(None)?; assert_eq!(best_plan.childrens.len(), 1); match best_plan.operator { diff --git a/src/optimizer/rule/normalization/combine_operators.rs b/src/optimizer/rule/normalization/combine_operators.rs index 0aff8a08..f816880d 100644 --- a/src/optimizer/rule/normalization/combine_operators.rs +++ b/src/optimizer/rule/normalization/combine_operators.rs @@ -154,14 +154,14 @@ mod tests { use crate::optimizer::heuristic::optimizer::HepOptimizer; use crate::optimizer::rule::normalization::NormalizationRuleImpl; use crate::planner::operator::Operator; - use crate::storage::kipdb::KipTransaction; + use crate::storage::rocksdb::RocksTransaction; use crate::types::value::DataValue; use crate::types::LogicalType; use std::sync::Arc; - #[tokio::test] - async fn test_collapse_project() -> Result<(), DatabaseError> { - let plan = select_sql_run("select c1, c2 from t1").await?; + #[test] + fn test_collapse_project() -> Result<(), DatabaseError> { + let plan = select_sql_run("select c1, c2 from t1")?; let mut optimizer = HepOptimizer::new(plan.clone()).batch( "test_collapse_project".to_string(), @@ -179,7 +179,7 @@ mod tests { optimizer.graph.add_root(new_project_op); - let best_plan = optimizer.find_best::(None)?; + let best_plan = optimizer.find_best::(None)?; if let Operator::Project(op) = &best_plan.operator { assert_eq!(op.exprs.len(), 1); @@ -196,9 +196,9 @@ mod tests { Ok(()) } - #[tokio::test] - async fn test_combine_filter() -> Result<(), DatabaseError> { - let plan = select_sql_run("select * from t1 where c1 > 1").await?; + #[test] + fn test_combine_filter() -> Result<(), DatabaseError> { + let plan = select_sql_run("select * from t1 where c1 > 1")?; let mut optimizer = HepOptimizer::new(plan.clone()).batch( "test_combine_filter".to_string(), @@ -224,7 +224,7 @@ mod tests { .graph .add_node(HepNodeId::new(0), Some(HepNodeId::new(1)), new_filter_op); - let best_plan = optimizer.find_best::(None)?; + let best_plan = optimizer.find_best::(None)?; if let Operator::Filter(op) = &best_plan.childrens[0].operator { if let ScalarExpression::Binary { op, .. } = &op.predicate { @@ -239,9 +239,9 @@ mod tests { Ok(()) } - #[tokio::test] - async fn test_collapse_group_by_agg() -> Result<(), DatabaseError> { - let plan = select_sql_run("select distinct c1, c2 from t1 group by c1, c2").await?; + #[test] + fn test_collapse_group_by_agg() -> Result<(), DatabaseError> { + let plan = select_sql_run("select distinct c1, c2 from t1 group by c1, c2")?; let optimizer = HepOptimizer::new(plan.clone()).batch( "test_collapse_group_by_agg".to_string(), @@ -249,7 +249,7 @@ mod tests { vec![NormalizationRuleImpl::CollapseGroupByAgg], ); - let best_plan = optimizer.find_best::(None)?; + let best_plan = optimizer.find_best::(None)?; if let Operator::Aggregate(_) = &best_plan.childrens[0].operator { if let Operator::Aggregate(_) = &best_plan.childrens[0].childrens[0].operator { diff --git a/src/optimizer/rule/normalization/pushdown_limit.rs b/src/optimizer/rule/normalization/pushdown_limit.rs index b39f711a..40724aad 100644 --- a/src/optimizer/rule/normalization/pushdown_limit.rs +++ b/src/optimizer/rule/normalization/pushdown_limit.rs @@ -192,11 +192,11 @@ mod tests { use crate::optimizer::rule::normalization::NormalizationRuleImpl; use crate::planner::operator::limit::LimitOperator; use crate::planner::operator::Operator; - use crate::storage::kipdb::KipTransaction; + use crate::storage::rocksdb::RocksTransaction; - #[tokio::test] - async fn test_limit_project_transpose() -> Result<(), DatabaseError> { - let plan = select_sql_run("select c1, c2 from t1 limit 1").await?; + #[test] + fn test_limit_project_transpose() -> Result<(), DatabaseError> { + let plan = select_sql_run("select c1, c2 from t1 limit 1")?; let best_plan = HepOptimizer::new(plan.clone()) .batch( @@ -204,7 +204,7 @@ mod tests { HepBatchStrategy::once_topdown(), vec![NormalizationRuleImpl::LimitProjectTranspose], ) - .find_best::(None)?; + .find_best::(None)?; if let Operator::Project(_) = &best_plan.operator { } else { @@ -219,9 +219,9 @@ mod tests { Ok(()) } - #[tokio::test] - async fn test_eliminate_limits() -> Result<(), DatabaseError> { - let plan = select_sql_run("select c1, c2 from t1 limit 1 offset 1").await?; + #[test] + fn test_eliminate_limits() -> Result<(), DatabaseError> { + let plan = select_sql_run("select c1, c2 from t1 limit 1 offset 1")?; let mut optimizer = HepOptimizer::new(plan.clone()).batch( "test_eliminate_limits".to_string(), @@ -236,7 +236,7 @@ mod tests { optimizer.graph.add_root(Operator::Limit(new_limit_op)); - let best_plan = optimizer.find_best::(None)?; + let best_plan = optimizer.find_best::(None)?; if let Operator::Limit(op) = &best_plan.operator { assert_eq!(op.limit, Some(1)); @@ -252,9 +252,9 @@ mod tests { Ok(()) } - #[tokio::test] - async fn test_push_limit_through_join() -> Result<(), DatabaseError> { - let plan = select_sql_run("select * from t1 left join t2 on c1 = c3 limit 1").await?; + #[test] + fn test_push_limit_through_join() -> Result<(), DatabaseError> { + let plan = select_sql_run("select * from t1 left join t2 on c1 = c3 limit 1")?; let best_plan = HepOptimizer::new(plan.clone()) .batch( @@ -265,7 +265,7 @@ mod tests { NormalizationRuleImpl::PushLimitThroughJoin, ], ) - .find_best::(None)?; + .find_best::(None)?; if let Operator::Join(_) = &best_plan.childrens[0].childrens[0].operator { } else { @@ -281,9 +281,9 @@ mod tests { Ok(()) } - #[tokio::test] - async fn test_push_limit_into_table_scan() -> Result<(), DatabaseError> { - let plan = select_sql_run("select * from t1 limit 1 offset 1").await?; + #[test] + fn test_push_limit_into_table_scan() -> Result<(), DatabaseError> { + let plan = select_sql_run("select * from t1 limit 1 offset 1")?; let best_plan = HepOptimizer::new(plan.clone()) .batch( @@ -294,7 +294,7 @@ mod tests { NormalizationRuleImpl::PushLimitIntoTableScan, ], ) - .find_best::(None)?; + .find_best::(None)?; if let Operator::Scan(op) = &best_plan.childrens[0].operator { assert_eq!(op.limit, (Some(1), Some(1))) diff --git a/src/optimizer/rule/normalization/pushdown_predicates.rs b/src/optimizer/rule/normalization/pushdown_predicates.rs index 10651938..b70b0002 100644 --- a/src/optimizer/rule/normalization/pushdown_predicates.rs +++ b/src/optimizer/rule/normalization/pushdown_predicates.rs @@ -273,16 +273,16 @@ mod tests { use crate::optimizer::heuristic::optimizer::HepOptimizer; use crate::optimizer::rule::normalization::NormalizationRuleImpl; use crate::planner::operator::Operator; - use crate::storage::kipdb::KipTransaction; + use crate::storage::rocksdb::RocksTransaction; use crate::types::value::DataValue; use crate::types::LogicalType; use std::collections::Bound; use std::sync::Arc; - #[tokio::test] - async fn test_push_predicate_into_scan() -> Result<(), DatabaseError> { + #[test] + fn test_push_predicate_into_scan() -> Result<(), DatabaseError> { // 1 - c2 < 0 => c2 > 1 - let plan = select_sql_run("select * from t1 where -(1 - c2) > 0").await?; + let plan = select_sql_run("select * from t1 where -(1 - c2) > 0")?; let best_plan = HepOptimizer::new(plan) .batch( @@ -295,7 +295,7 @@ mod tests { HepBatchStrategy::once_topdown(), vec![NormalizationRuleImpl::PushPredicateIntoScan], ) - .find_best::(None)?; + .find_best::(None)?; if let Operator::Scan(op) = &best_plan.childrens[0].childrens[0].operator { let mock_range = Range::Scope { @@ -311,11 +311,10 @@ mod tests { Ok(()) } - #[tokio::test] - async fn test_push_predicate_through_join_in_left_join() -> Result<(), DatabaseError> { + #[test] + fn test_push_predicate_through_join_in_left_join() -> Result<(), DatabaseError> { let plan = - select_sql_run("select * from t1 left join t2 on c1 = c3 where c1 > 1 and c3 < 2") - .await?; + select_sql_run("select * from t1 left join t2 on c1 = c3 where c1 > 1 and c3 < 2")?; let best_plan = HepOptimizer::new(plan) .batch( @@ -323,7 +322,7 @@ mod tests { HepBatchStrategy::once_topdown(), vec![NormalizationRuleImpl::PushPredicateThroughJoin], ) - .find_best::(None)?; + .find_best::(None)?; if let Operator::Filter(op) = &best_plan.childrens[0].operator { match op.predicate { @@ -354,11 +353,10 @@ mod tests { Ok(()) } - #[tokio::test] - async fn test_push_predicate_through_join_in_right_join() -> Result<(), DatabaseError> { + #[test] + fn test_push_predicate_through_join_in_right_join() -> Result<(), DatabaseError> { let plan = - select_sql_run("select * from t1 right join t2 on c1 = c3 where c1 > 1 and c3 < 2") - .await?; + select_sql_run("select * from t1 right join t2 on c1 = c3 where c1 > 1 and c3 < 2")?; let best_plan = HepOptimizer::new(plan) .batch( @@ -366,7 +364,7 @@ mod tests { HepBatchStrategy::once_topdown(), vec![NormalizationRuleImpl::PushPredicateThroughJoin], ) - .find_best::(None)?; + .find_best::(None)?; if let Operator::Filter(op) = &best_plan.childrens[0].operator { match op.predicate { @@ -397,11 +395,10 @@ mod tests { Ok(()) } - #[tokio::test] - async fn test_push_predicate_through_join_in_inner_join() -> Result<(), DatabaseError> { + #[test] + fn test_push_predicate_through_join_in_inner_join() -> Result<(), DatabaseError> { let plan = - select_sql_run("select * from t1 inner join t2 on c1 = c3 where c1 > 1 and c3 < 2") - .await?; + select_sql_run("select * from t1 inner join t2 on c1 = c3 where c1 > 1 and c3 < 2")?; let best_plan = HepOptimizer::new(plan) .batch( @@ -409,7 +406,7 @@ mod tests { HepBatchStrategy::once_topdown(), vec![NormalizationRuleImpl::PushPredicateThroughJoin], ) - .find_best::(None)?; + .find_best::(None)?; if let Operator::Join(_) = &best_plan.childrens[0].operator { } else { diff --git a/src/optimizer/rule/normalization/simplification.rs b/src/optimizer/rule/normalization/simplification.rs index 1c4d431f..a048795e 100644 --- a/src/optimizer/rule/normalization/simplification.rs +++ b/src/optimizer/rule/normalization/simplification.rs @@ -121,18 +121,17 @@ mod test { use crate::optimizer::rule::normalization::NormalizationRuleImpl; use crate::planner::operator::Operator; use crate::planner::LogicalPlan; - use crate::storage::kipdb::KipTransaction; + use crate::storage::rocksdb::RocksTransaction; use crate::types::value::DataValue; use crate::types::{ColumnId, LogicalType}; use std::collections::Bound; use std::sync::Arc; - #[tokio::test] - async fn test_constant_calculation_omitted() -> Result<(), DatabaseError> { + #[test] + fn test_constant_calculation_omitted() -> Result<(), DatabaseError> { // (2 + (-1)) < -(c1 + 1) let plan = - select_sql_run("select c1 + (2 + 1), 2 + 1 from t1 where (2 + (-1)) < -(c1 + 1)") - .await?; + select_sql_run("select c1 + (2 + 1), 2 + 1 from t1 where (2 + (-1)) < -(c1 + 1)")?; let best_plan = HepOptimizer::new(plan) .batch( @@ -143,7 +142,7 @@ mod test { NormalizationRuleImpl::ConstantCalculation, ], ) - .find_best::(None)?; + .find_best::(None)?; if let Operator::Project(project_op) = best_plan.clone().operator { let constant_expr = ScalarExpression::Constant(Arc::new(DataValue::Int32(Some(3)))); if let ScalarExpression::Binary { right_expr, .. } = &project_op.exprs[0] { @@ -173,29 +172,29 @@ mod test { Ok(()) } - #[tokio::test] - async fn test_simplify_filter_single_column() -> Result<(), DatabaseError> { + #[test] + fn test_simplify_filter_single_column() -> Result<(), DatabaseError> { // c1 + 1 < -1 => c1 < -2 - let plan_1 = select_sql_run("select * from t1 where -(c1 + 1) > 1").await?; + let plan_1 = select_sql_run("select * from t1 where -(c1 + 1) > 1")?; // 1 - c1 < -1 => c1 > 2 - let plan_2 = select_sql_run("select * from t1 where -(1 - c1) > 1").await?; + let plan_2 = select_sql_run("select * from t1 where -(1 - c1) > 1")?; // c1 < -1 - let plan_3 = select_sql_run("select * from t1 where -c1 > 1").await?; + let plan_3 = select_sql_run("select * from t1 where -c1 > 1")?; // c1 > 0 - let plan_4 = select_sql_run("select * from t1 where c1 + 1 > 1").await?; + let plan_4 = select_sql_run("select * from t1 where c1 + 1 > 1")?; // c1 + 1 < -1 => c1 < -2 - let plan_5 = select_sql_run("select * from t1 where 1 < -(c1 + 1)").await?; + let plan_5 = select_sql_run("select * from t1 where 1 < -(c1 + 1)")?; // 1 - c1 < -1 => c1 > 2 - let plan_6 = select_sql_run("select * from t1 where 1 < -(1 - c1)").await?; + let plan_6 = select_sql_run("select * from t1 where 1 < -(1 - c1)")?; // c1 < -1 - let plan_7 = select_sql_run("select * from t1 where 1 < -c1").await?; + let plan_7 = select_sql_run("select * from t1 where 1 < -c1")?; // c1 > 0 - let plan_8 = select_sql_run("select * from t1 where 1 < c1 + 1").await?; + let plan_8 = select_sql_run("select * from t1 where 1 < c1 + 1")?; // c1 < 24 - let plan_9 = select_sql_run("select * from t1 where (-1 - c1) + 1 > 24").await?; + let plan_9 = select_sql_run("select * from t1 where (-1 - c1) + 1 > 24")?; // c1 < 24 - let plan_10 = select_sql_run("select * from t1 where 24 < (-1 - c1) + 1").await?; + let plan_10 = select_sql_run("select * from t1 where 24 < (-1 - c1) + 1")?; let op = |plan: LogicalPlan| -> Result, DatabaseError> { let best_plan = HepOptimizer::new(plan.clone()) @@ -204,7 +203,7 @@ mod test { HepBatchStrategy::once_topdown(), vec![NormalizationRuleImpl::SimplifyFilter], ) - .find_best::(None)?; + .find_best::(None)?; if let Operator::Filter(filter_op) = best_plan.childrens[0].clone().operator { Ok(RangeDetacher::new("t1", &0).detach(&filter_op.predicate)) } else { @@ -233,9 +232,9 @@ mod test { Ok(()) } - #[tokio::test] - async fn test_simplify_filter_repeating_column() -> Result<(), DatabaseError> { - let plan = select_sql_run("select * from t1 where -(c1 + 1) > c2").await?; + #[test] + fn test_simplify_filter_repeating_column() -> Result<(), DatabaseError> { + let plan = select_sql_run("select * from t1 where -(c1 + 1) > c2")?; let best_plan = HepOptimizer::new(plan.clone()) .batch( @@ -243,7 +242,7 @@ mod test { HepBatchStrategy::once_topdown(), vec![NormalizationRuleImpl::SimplifyFilter], ) - .find_best::(None)?; + .find_best::(None)?; if let Operator::Filter(filter_op) = best_plan.childrens[0].clone().operator { let c1_col = ColumnCatalog { summary: ColumnSummary { @@ -315,7 +314,7 @@ mod test { HepBatchStrategy::once_topdown(), vec![NormalizationRuleImpl::SimplifyFilter], ) - .find_best::(None)?; + .find_best::(None)?; if let Operator::Filter(filter_op) = best_plan.childrens[0].clone().operator { Ok(RangeDetacher::new("t1", &column_id).detach(&filter_op.predicate)) } else { @@ -323,18 +322,16 @@ mod test { } } - #[tokio::test] - async fn test_simplify_filter_multiple_column() -> Result<(), DatabaseError> { + #[test] + fn test_simplify_filter_multiple_column() -> Result<(), DatabaseError> { // c1 + 1 < -1 => c1 < -2 - let plan_1 = - select_sql_run("select * from t1 where -(c1 + 1) > 1 and -(1 - c2) > 1").await?; + let plan_1 = select_sql_run("select * from t1 where -(c1 + 1) > 1 and -(1 - c2) > 1")?; // 1 - c1 < -1 => c1 > 2 - let plan_2 = - select_sql_run("select * from t1 where -(1 - c1) > 1 and -(c2 + 1) > 1").await?; + let plan_2 = select_sql_run("select * from t1 where -(1 - c1) > 1 and -(c2 + 1) > 1")?; // c1 < -1 - let plan_3 = select_sql_run("select * from t1 where -c1 > 1 and c2 + 1 > 1").await?; + let plan_3 = select_sql_run("select * from t1 where -c1 > 1 and c2 + 1 > 1")?; // c1 > 0 - let plan_4 = select_sql_run("select * from t1 where c1 + 1 > 1 and -c2 > 1").await?; + let plan_4 = select_sql_run("select * from t1 where c1 + 1 > 1 and -c2 > 1")?; let range_1_c1 = plan_filter(&plan_1, &0)?.unwrap(); let range_1_c2 = plan_filter(&plan_1, &1)?.unwrap(); @@ -408,20 +405,19 @@ mod test { Ok(()) } - #[tokio::test] - async fn test_simplify_filter_multiple_column_in_or() -> Result<(), DatabaseError> { + #[test] + fn test_simplify_filter_multiple_column_in_or() -> Result<(), DatabaseError> { // c1 > c2 or c1 > 1 - let plan_1 = select_sql_run("select * from t1 where c1 > c2 or c1 > 1").await?; + let plan_1 = select_sql_run("select * from t1 where c1 > c2 or c1 > 1")?; assert_eq!(plan_filter(&plan_1, &0)?, None); Ok(()) } - #[tokio::test] - async fn test_simplify_filter_multiple_dispersed_same_column_in_or() -> Result<(), DatabaseError> - { - let plan_1 = select_sql_run("select * from t1 where c1 = 4 and c1 > c2 or c1 > 1").await?; + #[test] + fn test_simplify_filter_multiple_dispersed_same_column_in_or() -> Result<(), DatabaseError> { + let plan_1 = select_sql_run("select * from t1 where c1 = 4 and c1 > c2 or c1 > 1")?; assert_eq!( plan_filter(&plan_1, &0)?, @@ -434,9 +430,9 @@ mod test { Ok(()) } - #[tokio::test] - async fn test_simplify_filter_column_is_null() -> Result<(), DatabaseError> { - let plan_1 = select_sql_run("select * from t1 where c1 is null").await?; + #[test] + fn test_simplify_filter_column_is_null() -> Result<(), DatabaseError> { + let plan_1 = select_sql_run("select * from t1 where c1 is null")?; assert_eq!( plan_filter(&plan_1, &0)?, @@ -446,18 +442,18 @@ mod test { Ok(()) } - #[tokio::test] - async fn test_simplify_filter_column_is_not_null() -> Result<(), DatabaseError> { - let plan_1 = select_sql_run("select * from t1 where c1 is not null").await?; + #[test] + fn test_simplify_filter_column_is_not_null() -> Result<(), DatabaseError> { + let plan_1 = select_sql_run("select * from t1 where c1 is not null")?; assert_eq!(plan_filter(&plan_1, &0)?, None); Ok(()) } - #[tokio::test] - async fn test_simplify_filter_column_in() -> Result<(), DatabaseError> { - let plan_1 = select_sql_run("select * from t1 where c1 in (1, 2, 3)").await?; + #[test] + fn test_simplify_filter_column_in() -> Result<(), DatabaseError> { + let plan_1 = select_sql_run("select * from t1 where c1 in (1, 2, 3)")?; assert_eq!( plan_filter(&plan_1, &0)?, @@ -471,9 +467,9 @@ mod test { Ok(()) } - #[tokio::test] - async fn test_simplify_filter_column_not_in() -> Result<(), DatabaseError> { - let plan_1 = select_sql_run("select * from t1 where c1 not in (1, 2, 3)").await?; + #[test] + fn test_simplify_filter_column_not_in() -> Result<(), DatabaseError> { + let plan_1 = select_sql_run("select * from t1 where c1 not in (1, 2, 3)")?; assert_eq!(plan_filter(&plan_1, &0)?, None); diff --git a/src/storage/mod.rs b/src/storage/mod.rs index 4e942bde..8394f25e 100644 --- a/src/storage/mod.rs +++ b/src/storage/mod.rs @@ -1,4 +1,4 @@ -pub mod kipdb; +pub mod rocksdb; mod table_codec; use crate::catalog::{ColumnCatalog, ColumnRef, TableCatalog, TableMeta, TableName}; @@ -10,9 +10,9 @@ use crate::types::index::{Index, IndexId, IndexMetaRef, IndexType}; use crate::types::tuple::{Tuple, TupleId}; use crate::types::value::{DataValue, ValueRef}; use crate::types::{ColumnId, LogicalType}; +use crate::utils::lru::ShardingLruCache; use bytes::Bytes; use itertools::Itertools; -use kip_db::kernel::utils::lru_cache::ShardingLruCache; use std::collections::{Bound, VecDeque}; use std::ops::SubAssign; use std::sync::Arc; @@ -20,18 +20,21 @@ use std::{mem, slice}; pub(crate) type StatisticsMetaCache = ShardingLruCache<(TableName, IndexId), StatisticsMeta>; -pub trait Storage: Sync + Send + Clone + 'static { - type TransactionType: Transaction; +pub trait Storage: Clone { + type TransactionType<'a>: Transaction + where + Self: 'a; - #[allow(async_fn_in_trait)] - async fn transaction(&self) -> Result; + fn transaction(&self) -> Result, DatabaseError>; } /// Optional bounds of the reader, of the form (offset, limit). pub(crate) type Bounds = (Option, Option); -pub trait Transaction: Sync + Send + 'static + Sized { - type IterType<'a>: InnerIter; +pub trait Transaction: Sized { + type IterType<'a>: InnerIter + where + Self: 'a; /// The bounds is applied to the whole data batches, not per batch. /// @@ -335,21 +338,14 @@ pub trait Transaction: Sync + Send + 'static + Sized { } fn table(&self, table_name: TableName) -> Option<&TableCatalog> { - let mut option = self.table_cache().get(&table_name); - - if option.is_none() { - // TODO: unify the data into a `Meta` prefix and use one iteration to collect all data - let (columns, indexes) = self.table_collect(table_name.clone()).ok()?; - - if let Ok(catalog) = TableCatalog::reload(table_name.clone(), columns, indexes) { - option = self - .table_cache() - .get_or_insert(table_name.to_string(), |_| Ok(catalog)) - .ok(); - } - } + self.table_cache() + .get_or_insert(table_name.to_string(), |_| { + // TODO: unify the data into a `Meta` prefix and use one iteration to collect all data + let (columns, indexes) = self.table_collect(table_name.clone())?; - option + TableCatalog::reload(table_name.clone(), columns, indexes) + }) + .ok() } fn table_metas(&self) -> Result, DatabaseError> { @@ -490,8 +486,7 @@ pub trait Transaction: Sync + Send + 'static + Sized { fn table_cache(&self) -> &ShardingLruCache; fn meta_cache(&self) -> &StatisticsMetaCache; - #[allow(async_fn_in_trait)] - async fn commit(self) -> Result<(), DatabaseError>; + fn commit(self) -> Result<(), DatabaseError>; } trait IndexImpl { @@ -563,7 +558,7 @@ impl IndexImplParams<'_, T> { } } -enum IndexResult<'a, T: Transaction> { +enum IndexResult<'a, T: Transaction + 'a> { Tuple(Tuple), Scope(T::IterType<'a>), } @@ -788,7 +783,7 @@ impl IndexImpl for CompositeIndexImpl { } } -pub struct TupleIter<'a, T: Transaction> { +pub struct TupleIter<'a, T: Transaction + 'a> { offset: usize, limit: Option, table_types: Vec, @@ -797,7 +792,7 @@ pub struct TupleIter<'a, T: Transaction> { iter: T::IterType<'a>, } -impl Iter for TupleIter<'_, T> { +impl<'a, T: Transaction + 'a> Iter for TupleIter<'a, T> { fn next_tuple(&mut self) -> Result, DatabaseError> { while self.offset > 0 { let _ = self.iter.try_next()?; @@ -841,7 +836,7 @@ pub struct IndexIter<'a, T: Transaction> { scope_iter: Option>, } -impl IndexIter<'_, T> { +impl<'a, T: Transaction + 'a> IndexIter<'a, T> { fn offset_move(offset: &mut usize) -> bool { if *offset > 0 { offset.sub_assign(1); @@ -947,10 +942,10 @@ impl Iter for IndexIter<'_, T> { } } -pub trait InnerIter: Sync + Send { +pub trait InnerIter { fn try_next(&mut self) -> Result, DatabaseError>; } -pub trait Iter: Sync + Send { +pub trait Iter { fn next_tuple(&mut self) -> Result, DatabaseError>; } diff --git a/src/storage/kipdb.rs b/src/storage/rocksdb.rs similarity index 65% rename from src/storage/kipdb.rs rename to src/storage/rocksdb.rs index 40c37f96..81f0921f 100644 --- a/src/storage/kipdb.rs +++ b/src/storage/rocksdb.rs @@ -1,33 +1,35 @@ use crate::catalog::TableCatalog; use crate::errors::DatabaseError; use crate::storage::{InnerIter, StatisticsMetaCache, Storage, Transaction}; +use crate::utils::lru::ShardingLruCache; use bytes::Bytes; -use kip_db::kernel::lsm::iterator::Iter as KipDBIter; -use kip_db::kernel::lsm::mvcc::{CheckType, TransactionIter}; -use kip_db::kernel::lsm::storage::Config; -use kip_db::kernel::lsm::{mvcc, storage}; -use kip_db::kernel::utils::lru_cache::ShardingLruCache; -use std::collections::hash_map::RandomState; +use rocksdb::{DBIteratorWithThreadMode, Direction, IteratorMode, OptimisticTransactionDB}; use std::collections::Bound; +use std::hash::RandomState; use std::path::PathBuf; use std::sync::Arc; #[derive(Clone)] -pub struct KipStorage { - pub inner: Arc, +pub struct RocksStorage { + pub inner: Arc, pub(crate) meta_cache: Arc, pub(crate) table_cache: Arc>, } -impl KipStorage { - pub async fn new(path: impl Into + Send) -> Result { - let storage = - storage::KipStorage::open_with_config(Config::new(path).enable_level_0_memorization()) - .await?; +impl RocksStorage { + pub fn new(path: impl Into + Send) -> Result { + let mut bb = rocksdb::BlockBasedOptions::default(); + bb.set_block_cache(&rocksdb::Cache::new_lru_cache(4 * 1_024 * 1_024 * 1_024)); + + let mut opts = rocksdb::Options::default(); + opts.set_block_based_table_factory(&bb); + opts.create_if_missing(true); + + let storage = OptimisticTransactionDB::open(&opts, path.into())?; let meta_cache = Arc::new(ShardingLruCache::new(128, 16, RandomState::new()).unwrap()); let table_cache = Arc::new(ShardingLruCache::new(128, 16, RandomState::new()).unwrap()); - Ok(KipStorage { + Ok(RocksStorage { inner: Arc::new(storage), meta_cache, table_cache, @@ -35,55 +37,72 @@ impl KipStorage { } } -impl Storage for KipStorage { - type TransactionType = KipTransaction; +impl Storage for RocksStorage { + type TransactionType<'a> + = RocksTransaction<'a> where + Self: 'a; - async fn transaction(&self) -> Result { - let tx = self.inner.new_transaction(CheckType::Optimistic).await; - - Ok(KipTransaction { - tx, - table_cache: Arc::clone(&self.table_cache), + fn transaction(&self) -> Result, DatabaseError> { + Ok(RocksTransaction { + tx: self.inner.transaction(), meta_cache: self.meta_cache.clone(), + table_cache: self.table_cache.clone(), }) } } -pub struct KipTransaction { - tx: mvcc::Transaction, - table_cache: Arc>, - meta_cache: Arc, +pub struct RocksTransaction<'db> { + tx: rocksdb::Transaction<'db, OptimisticTransactionDB>, + pub(crate) meta_cache: Arc, + pub(crate) table_cache: Arc>, } -impl Transaction for KipTransaction { - type IterType<'a> = KipIter<'a>; +impl<'txn> Transaction for RocksTransaction<'txn> { + type IterType<'iter> + = RocksIter<'txn, 'iter> where + Self: 'iter; fn get(&self, key: &[u8]) -> Result, DatabaseError> { - Ok(self.tx.get(key)?) - } - - fn range<'a>( - &'a self, - min: Bound<&[u8]>, - max: Bound<&[u8]>, - ) -> Result, DatabaseError> { - Ok(KipIter { - iter: self.tx.iter(min, max)?, - }) + Ok(self.tx.get(key)?.map(Bytes::from)) } fn set(&mut self, key: Bytes, value: Bytes) -> Result<(), DatabaseError> { - self.tx.set(key, value); + self.tx.put(key, value)?; Ok(()) } fn remove(&mut self, key: &[u8]) -> Result<(), DatabaseError> { - self.tx.remove(key)?; + self.tx.delete(key)?; Ok(()) } + // Tips: rocksdb has weak support for `Include` and `Exclude`, so precision will be lost + fn range<'a>( + &'a self, + min: Bound<&[u8]>, + max: Bound<&[u8]>, + ) -> Result, DatabaseError> { + fn bound_to_include(bound: Bound<&[u8]>) -> Option<&[u8]> { + match bound { + Bound::Included(bytes) | Bound::Excluded(bytes) => Some(bytes), + Bound::Unbounded => None, + } + } + + let lower = bound_to_include(min) + .map(|bytes| IteratorMode::From(bytes, Direction::Forward)) + .unwrap_or(IteratorMode::Start); + let iter = self.tx.iterator(lower); + + Ok(RocksIter { + lower: min.map(|bytes| bytes.to_vec()), + upper: max.map(|bytes| bytes.to_vec()), + iter, + }) + } + fn table_cache(&self) -> &ShardingLruCache { self.table_cache.as_ref() } @@ -92,22 +111,35 @@ impl Transaction for KipTransaction { self.meta_cache.as_ref() } - async fn commit(self) -> Result<(), DatabaseError> { - self.tx.commit().await?; - + fn commit(self) -> Result<(), DatabaseError> { + self.tx.commit()?; Ok(()) } } -pub struct KipIter<'a> { - iter: TransactionIter<'a>, +pub struct RocksIter<'txn, 'iter> { + lower: Bound>, + upper: Bound>, + iter: DBIteratorWithThreadMode<'iter, rocksdb::Transaction<'txn, OptimisticTransactionDB>>, } -impl InnerIter for KipIter<'_> { +impl InnerIter for RocksIter<'_, '_> { fn try_next(&mut self) -> Result, DatabaseError> { - while let Some((key, value_option)) = self.iter.try_next()? { - if let Some(value) = value_option { - return Ok(Some((key, value))); + for result in self.iter.by_ref() { + let (key, value) = result?; + let lower_bound_check = match &self.lower { + Bound::Included(ref lower) => key.as_ref() >= lower.as_slice(), + Bound::Excluded(ref lower) => key.as_ref() > lower.as_slice(), + Bound::Unbounded => true, + }; + let upper_bound_check = match &self.upper { + Bound::Included(ref upper) => key.as_ref() <= upper.as_slice(), + Bound::Excluded(ref upper) => key.as_ref() < upper.as_slice(), + Bound::Unbounded => true, + }; + + if lower_bound_check && upper_bound_check { + return Ok(Some((Bytes::from(key), Bytes::from(value)))); } } Ok(None) @@ -120,7 +152,7 @@ mod test { use crate::db::DataBaseBuilder; use crate::errors::DatabaseError; use crate::expression::range_detacher::Range; - use crate::storage::kipdb::KipStorage; + use crate::storage::rocksdb::RocksStorage; use crate::storage::{ IndexImplEnum, IndexImplParams, IndexIter, Iter, PrimaryKeyIndexImpl, Storage, Transaction, }; @@ -133,11 +165,11 @@ mod test { use std::sync::Arc; use tempfile::TempDir; - #[tokio::test] - async fn test_in_kipdb_storage_works_with_data() -> Result<(), DatabaseError> { + #[test] + fn test_in_kipdb_storage_works_with_data() -> Result<(), DatabaseError> { let temp_dir = TempDir::new().expect("unable to create temporary working directory"); - let storage = KipStorage::new(temp_dir.path()).await?; - let mut transaction = storage.transaction().await?; + let storage = RocksStorage::new(temp_dir.path())?; + let mut transaction = storage.transaction()?; let columns = Arc::new(vec![ Arc::new(ColumnCatalog::new( "c1".to_string(), @@ -207,16 +239,14 @@ mod test { Ok(()) } - #[tokio::test] - async fn test_index_iter_pk() -> Result<(), DatabaseError> { + #[test] + fn test_index_iter_pk() -> Result<(), DatabaseError> { let temp_dir = TempDir::new().expect("unable to create temporary working directory"); - let fnck_sql = DataBaseBuilder::path(temp_dir.path()).build().await?; + let fnck_sql = DataBaseBuilder::path(temp_dir.path()).build()?; - let _ = fnck_sql.run("create table t1 (a int primary key)").await?; - let _ = fnck_sql - .run("insert into t1 (a) values (0), (1), (2), (3), (4)") - .await?; - let transaction = fnck_sql.storage.transaction().await?; + let _ = fnck_sql.run("create table t1 (a int primary key)")?; + let _ = fnck_sql.run("insert into t1 (a) values (0), (1), (2), (3), (4)")?; + let transaction = fnck_sql.storage.transaction()?; let table_name = Arc::new("t1".to_string()); let table = transaction.table(table_name.clone()).unwrap().clone(); @@ -265,17 +295,13 @@ mod test { Ok(()) } - #[tokio::test] - async fn test_read_by_index() -> Result<(), DatabaseError> { + #[test] + fn test_read_by_index() -> Result<(), DatabaseError> { let temp_dir = TempDir::new().expect("unable to create temporary working directory"); - let fnck_sql = DataBaseBuilder::path(temp_dir.path()).build().await?; - let _ = fnck_sql - .run("create table t1 (a int primary key, b int unique)") - .await?; - let _ = fnck_sql - .run("insert into t1 (a, b) values (0, 0), (1, 1), (2, 2)") - .await?; - let transaction = fnck_sql.storage.transaction().await.unwrap(); + let fnck_sql = DataBaseBuilder::path(temp_dir.path()).build()?; + let _ = fnck_sql.run("create table t1 (a int primary key, b int unique)")?; + let _ = fnck_sql.run("insert into t1 (a, b) values (0, 0), (1, 1), (2, 2)")?; + let transaction = fnck_sql.storage.transaction().unwrap(); let table = transaction .table(Arc::new("t1".to_string())) diff --git a/src/utils/bit_vector.rs b/src/utils/bit_vector.rs new file mode 100644 index 00000000..5c93c77e --- /dev/null +++ b/src/utils/bit_vector.rs @@ -0,0 +1,93 @@ +use integer_encoding::FixedInt; +use itertools::Itertools; +use std::slice; + +#[derive(Debug, Default)] +pub struct BitVector { + len: u64, + bit_groups: Vec, +} + +impl BitVector { + pub fn new(len: usize) -> BitVector { + BitVector { + len: len as u64, + bit_groups: vec![0; (len + 7) / 8], + } + } + + pub fn set_bit(&mut self, index: usize, value: bool) { + let byte_index = index / 8; + let bit_index = index % 8; + + if value { + self.bit_groups[byte_index] |= 1 << bit_index; + } else { + self.bit_groups[byte_index] &= !(1 << bit_index); + } + } + + pub fn get_bit(&self, index: usize) -> bool { + self.bit_groups[index / 8] >> (index % 8) & 1 != 0 + } + + pub fn len(&self) -> usize { + self.len as usize + } + + pub fn is_empty(&self) -> bool { + self.len == 0 + } + + pub fn to_raw(&self, bytes: &mut Vec) { + bytes.append(&mut u64::encode_fixed_vec(self.len)); + + for bits in &self.bit_groups { + bytes.append(&mut bits.encode_fixed_vec()); + } + } + + pub fn from_raw(bytes: &[u8]) -> Self { + let len = u64::decode_fixed(&bytes[0..8]); + let bit_groups = bytes[8..] + .iter() + .map(|bit| i8::decode_fixed(slice::from_ref(bit))) + .collect_vec(); + + BitVector { len, bit_groups } + } +} + +#[cfg(test)] +mod tests { + use crate::utils::bit_vector::BitVector; + + #[test] + fn bit_vector_serialization() { + let mut vector = BitVector::new(100); + + vector.set_bit(99, true); + + let mut bytes = Vec::new(); + + vector.to_raw(&mut bytes); + let vector = BitVector::from_raw(&bytes); + + for i in 0..98 { + assert!(!vector.get_bit(i)); + } + assert!(vector.get_bit(99)); + } + + #[test] + fn bit_vector_simple() { + let mut vector = BitVector::new(100); + + vector.set_bit(99, true); + + for i in 0..98 { + assert!(!vector.get_bit(i)); + } + assert!(vector.get_bit(99)); + } +} diff --git a/src/utils/lru.rs b/src/utils/lru.rs new file mode 100644 index 00000000..44d20f85 --- /dev/null +++ b/src/utils/lru.rs @@ -0,0 +1,430 @@ +use crate::errors::DatabaseError; +use parking_lot::Mutex; +use std::borrow::Borrow; +use std::cmp::Ordering; +use std::collections::hash_map::{Iter, RandomState}; +use std::collections::HashMap; +use std::hash::{BuildHasher, Hash, Hasher}; +use std::marker::PhantomData; +use std::ops::{Deref, DerefMut}; +use std::ptr::NonNull; +use std::sync::Arc; + +// 只读Node操作裸指针 +// https://course.rs/advance/concurrency-with-threads/send-sync.html#:~:text=%E5%AE%89%E5%85%A8%E7%9A%84%E4%BD%BF%E7%94%A8%E3%80%82-,%E4%B8%BA%E8%A3%B8%E6%8C%87%E9%92%88%E5%AE%9E%E7%8E%B0Send,-%E4%B8%8A%E9%9D%A2%E6%88%91%E4%BB%AC%E6%8F%90%E5%88%B0 +// 通过只读数据已保证线程安全 +struct NodeReadPtr(NonNull>); + +unsafe impl Send for NodeReadPtr {} +unsafe impl Sync for NodeReadPtr {} + +impl Clone for NodeReadPtr { + fn clone(&self) -> Self { + *self + } +} + +impl Copy for NodeReadPtr {} + +impl Deref for NodeReadPtr { + type Target = NonNull>; + + fn deref(&self) -> &Self::Target { + &self.0 + } +} + +impl DerefMut for NodeReadPtr { + fn deref_mut(&mut self) -> &mut Self::Target { + &mut self.0 + } +} + +unsafe impl Send for ShardingLruCache {} +unsafe impl Sync for ShardingLruCache {} + +pub struct ShardingLruCache { + sharding_vec: Vec>>>, + hasher: S, +} + +struct Node { + key: K, + value: V, + prev: Option>, + next: Option>, +} + +struct KeyRef(NodeReadPtr); + +impl Borrow for KeyRef { + fn borrow(&self) -> &K { + unsafe { &self.0.as_ref().key } + } +} + +impl Hash for KeyRef { + fn hash(&self, state: &mut H) { + unsafe { self.0.as_ref().key.hash(state) } + } +} + +impl Eq for KeyRef {} + +impl PartialEq for KeyRef { + #[allow(clippy::unconditional_recursion)] + fn eq(&self, other: &Self) -> bool { + unsafe { self.0.as_ref().key.eq(&other.0.as_ref().key) } + } +} + +impl PartialOrd for KeyRef { + fn partial_cmp(&self, other: &Self) -> Option { + Some(self.cmp(other)) + } +} + +impl Ord for KeyRef { + fn cmp(&self, other: &Self) -> Ordering { + unsafe { self.0.as_ref().key.cmp(&other.0.as_ref().key) } + } +} + +/// LRU缓存 +/// 参考知乎中此文章的实现: +/// https://zhuanlan.zhihu.com/p/466409120 +pub struct LruCache { + head: Option>, + tail: Option>, + inner: HashMap, NodeReadPtr>, + cap: usize, + marker: PhantomData>, +} + +impl Node { + fn new(key: K, value: V) -> Self { + Self { + key, + value, + prev: None, + next: None, + } + } +} + +impl ShardingLruCache { + #[inline] + pub fn new(cap: usize, sharding_size: usize, hasher: S) -> Result { + let mut sharding_vec = Vec::with_capacity(sharding_size); + if cap % sharding_size != 0 { + return Err(DatabaseError::ShardingNotAlign); + } + let sharding_cap = cap / sharding_size; + for _ in 0..sharding_size { + sharding_vec.push(Arc::new(Mutex::new(LruCache::new(sharding_cap)?))); + } + + Ok(ShardingLruCache { + sharding_vec, + hasher, + }) + } + + #[inline] + pub fn get(&self, key: &K) -> Option<&V> { + self.shard(key) + .lock() + .get_node(key) + .map(|node| unsafe { &node.as_ref().value }) + } + + #[inline] + pub fn put(&self, key: K, value: V) -> Option { + self.shard(&key).lock().put(key, value) + } + + #[inline] + pub fn remove(&self, key: &K) -> Option { + self.shard(key).lock().remove(key) + } + + #[inline] + pub fn is_empty(&self) -> bool { + for lru in &self.sharding_vec { + if !lru.lock().is_empty() { + return false; + } + } + true + } + + #[inline] + pub fn get_or_insert(&self, key: K, fn_once: F) -> Result<&V, DatabaseError> + where + F: FnOnce(&K) -> Result, + { + self.shard(&key) + .lock() + .get_or_insert_node(key, fn_once) + .map(|node| unsafe { &node.as_ref().value }) + } + + fn sharding_size(&self) -> usize { + self.sharding_vec.len() + } + + /// 通过key获取hash值后对其求余获取对应分片 + fn shard(&self, key: &K) -> Arc>> { + let mut hasher = self.hasher.build_hasher(); + key.hash(&mut hasher); + #[allow(clippy::manual_hash_one)] + Arc::clone(&self.sharding_vec[hasher.finish() as usize % self.sharding_size()]) + } +} + +impl LruCache { + #[inline] + pub fn new(cap: usize) -> Result { + if cap < 1 { + return Err(DatabaseError::CacheSizeOverFlow); + } + + Ok(Self { + head: None, + tail: None, + inner: HashMap::new(), + cap, + marker: PhantomData, + }) + } + + /// 移除节点 + fn detach(&mut self, mut node: NodeReadPtr) { + unsafe { + match node.as_mut().prev { + Some(mut prev) => { + prev.as_mut().next = node.as_ref().next; + } + None => { + self.head = node.as_ref().next; + } + } + match node.as_mut().next { + Some(mut next) => { + next.as_mut().prev = node.as_ref().prev; + } + None => { + self.tail = node.as_ref().prev; + } + } + + node.as_mut().prev = None; + node.as_mut().next = None; + } + } + + /// 添加节点至头部 + fn attach(&mut self, mut node: NodeReadPtr) { + match self.head { + Some(mut head) => { + unsafe { + head.as_mut().prev = Some(node); + node.as_mut().next = Some(head); + node.as_mut().prev = None; + } + self.head = Some(node); + } + None => { + unsafe { + node.as_mut().prev = None; + node.as_mut().next = None; + } + self.head = Some(node); + self.tail = Some(node); + } + } + } + + /// 判断并驱逐节点 + fn expulsion(&mut self) { + if let Some(tail) = self.tail { + if self.inner.len() >= self.cap { + self.detach(tail); + let _ignore = self.inner.remove(&KeyRef(tail)); + } + } + } + + #[inline] + pub fn put(&mut self, key: K, value: V) -> Option { + let node = NodeReadPtr(Box::leak(Box::new(Node::new(key, value))).into()); + let old_node = self.inner.remove(&KeyRef(node)).map(|node| { + self.detach(node); + node + }); + self.expulsion(); + self.attach(node); + let _ignore1 = self.inner.insert(KeyRef(node), node); + old_node.map(|node| unsafe { + let node: Box> = Box::from_raw(node.as_ptr()); + node.value + }) + } + + #[allow(dead_code)] + fn get_node(&mut self, key: &K) -> Option> { + if let Some(node) = self.inner.get(key) { + let node = *node; + self.detach(node); + self.attach(node); + Some(node) + } else { + None + } + } + + #[inline] + pub fn get(&mut self, key: &K) -> Option<&V> { + if let Some(node) = self.inner.get(key) { + let node = *node; + self.detach(node); + self.attach(node); + unsafe { Some(&node.as_ref().value) } + } else { + None + } + } + + #[inline] + pub fn remove(&mut self, key: &K) -> Option { + self.inner.remove(key).map(|node| { + self.detach(node); + unsafe { + let node: Box> = Box::from_raw(node.as_ptr()); + node.value + } + }) + } + + fn get_or_insert_node( + &mut self, + key: K, + fn_once: F, + ) -> Result, DatabaseError> + where + F: FnOnce(&K) -> Result, + { + if let Some(node) = self.inner.get(&key) { + let node = *node; + self.detach(node); + self.attach(node); + Ok(node) + } else { + let value = fn_once(&key)?; + let node = NodeReadPtr(Box::leak(Box::new(Node::new(key, value))).into()); + let _ignore = self.inner.remove(&KeyRef(node)).map(|node| { + self.detach(node); + node + }); + self.expulsion(); + self.attach(node); + let _ignore1 = self.inner.insert(KeyRef(node), node); + Ok(node) + } + } + + #[inline] + pub fn get_or_insert(&mut self, key: K, fn_once: F) -> Result<&V, DatabaseError> + where + F: FnOnce(&K) -> Result, + { + self.get_or_insert_node(key, fn_once) + .map(|node| unsafe { &node.as_ref().value }) + } + + #[inline] + pub fn len(&self) -> usize { + self.inner.len() + } + + #[inline] + pub fn is_empty(&self) -> bool { + self.inner.is_empty() + } + + #[inline] + pub fn iter(&self) -> LruCacheIter { + LruCacheIter { + inner: self.inner.iter(), + } + } +} + +pub struct LruCacheIter<'a, K, V> { + inner: Iter<'a, KeyRef, NodeReadPtr>, +} + +impl<'a, K, V> Iterator for LruCacheIter<'a, K, V> { + type Item = (&'a K, &'a V); + + #[inline] + fn next(&mut self) -> Option { + self.inner + .next() + .map(|(_, node)| unsafe { (&node.as_ref().key, &node.as_ref().value) }) + } +} + +impl Drop for LruCache { + #[inline] + fn drop(&mut self) { + while let Some(node) = self.head.take() { + unsafe { + self.head = node.as_ref().next; + drop(Box::from_raw(node.as_ptr())) + } + } + } +} + +#[cfg(test)] +mod tests { + use crate::utils::lru::{LruCache, ShardingLruCache}; + use std::collections::hash_map::RandomState; + use std::collections::HashSet; + + #[test] + fn test_lru_cache() { + let mut lru = LruCache::new(3).unwrap(); + assert!(lru.is_empty()); + assert_eq!(lru.put(1, 10), None); + assert_eq!(lru.put(2, 20), None); + assert_eq!(lru.put(3, 30), None); + assert_eq!(lru.get(&1), Some(&10)); + assert_eq!(lru.put(2, 200), Some(20)); + assert_eq!(lru.put(4, 40), None); + assert_eq!(lru.get(&2), Some(&200)); + assert_eq!(lru.get(&3), None); + + assert_eq!(lru.get_or_insert(9, |_| Ok(9)).unwrap(), &9); + + assert_eq!(lru.len(), 3); + assert!(!lru.is_empty()); + + let mut set = HashSet::from([(&9, &9), (&2, &200), (&4, &40)]); + + for item in lru.iter() { + assert!(set.remove(&item)) + } + } + + #[test] + fn test_sharding_cache() { + let lru = ShardingLruCache::new(4, 2, RandomState::default()).unwrap(); + assert!(lru.is_empty()); + assert_eq!(lru.put(1, 10), None); + assert_eq!(lru.get(&1), Some(&10)); + assert!(!lru.is_empty()); + assert_eq!(lru.get_or_insert(9, |_| Ok(9)).unwrap(), &9); + } +} diff --git a/src/utils/mod.rs b/src/utils/mod.rs new file mode 100644 index 00000000..81efc1b4 --- /dev/null +++ b/src/utils/mod.rs @@ -0,0 +1,2 @@ +pub(crate) mod bit_vector; +pub(crate) mod lru; diff --git a/tests/sqllogictest/Cargo.toml b/tests/sqllogictest/Cargo.toml index e0258e6b..5391b82f 100644 --- a/tests/sqllogictest/Cargo.toml +++ b/tests/sqllogictest/Cargo.toml @@ -5,10 +5,7 @@ edition = "2021" [dependencies] "fnck_sql" = { path = "../.." } -glob = { version = "0.3.1" } -async-trait = { version = "0.1.77" } -tokio = { version = "1.36.0" } -sqllogictest = { version = "0.14.0" } -tokio-test = { version = "0.4.3" } -tempfile = { version = "3.10.1" } -clap = { version = "4.5.2" } \ No newline at end of file +glob = { version = "0.3" } +sqllogictest = { version = "0.14" } +tempfile = { version = "3.10" } +clap = { version = "4" } \ No newline at end of file diff --git a/tests/sqllogictest/src/lib.rs b/tests/sqllogictest/src/lib.rs index ccc367cd..c31c43b6 100644 --- a/tests/sqllogictest/src/lib.rs +++ b/tests/sqllogictest/src/lib.rs @@ -1,21 +1,20 @@ use fnck_sql::db::Database; use fnck_sql::errors::DatabaseError; -use fnck_sql::storage::kipdb::KipStorage; -use sqllogictest::{AsyncDB, DBOutput, DefaultColumnType}; +use fnck_sql::storage::rocksdb::RocksStorage; +use sqllogictest::{DBOutput, DefaultColumnType, DB}; use std::time::Instant; pub struct SQLBase { - pub db: Database, + pub db: Database, } -#[async_trait::async_trait] -impl AsyncDB for SQLBase { +impl DB for SQLBase { type Error = DatabaseError; type ColumnType = DefaultColumnType; - async fn run(&mut self, sql: &str) -> Result, Self::Error> { + fn run(&mut self, sql: &str) -> Result, Self::Error> { let start = Instant::now(); - let (schema, tuples) = self.db.run(sql).await?; + let (schema, tuples) = self.db.run(sql)?; println!("|— Input SQL: {}", sql); println!(" |— time spent: {:?}", start.elapsed()); diff --git a/tests/sqllogictest/src/main.rs b/tests/sqllogictest/src/main.rs index fb9e5645..419d0cd9 100644 --- a/tests/sqllogictest/src/main.rs +++ b/tests/sqllogictest/src/main.rs @@ -16,8 +16,7 @@ struct Args { path: String, } -#[tokio::main] -async fn main() { +fn main() { let args = Args::parse(); let path = Path::new(env!("CARGO_MANIFEST_DIR")).join("..").join(".."); @@ -39,11 +38,10 @@ async fn main() { let db = DataBaseBuilder::path(temp_dir.path()) .build() - .await .expect("init db error"); let mut tester = Runner::new(SQLBase { db }); - if let Err(err) = tester.run_file_async(filepath).await { + if let Err(err) = tester.run_file(filepath) { panic!("test error: {}", err); } println!("-> Pass!\n"); From c7501e8dfa93822822704f874b4603162923f73c Mon Sep 17 00:00:00 2001 From: Kould Date: Wed, 7 Aug 2024 20:04:18 +0800 Subject: [PATCH 2/4] chore: codefmt --- src/expression/evaluator.rs | 18 +++++++----------- 1 file changed, 7 insertions(+), 11 deletions(-) diff --git a/src/expression/evaluator.rs b/src/expression/evaluator.rs index daac5783..33312dac 100644 --- a/src/expression/evaluator.rs +++ b/src/expression/evaluator.rs @@ -230,6 +230,7 @@ impl ScalarExpression { trim_what_expr, trim_where, } => { + let mut value = None; if let Some(string) = DataValue::clone(expr.eval(tuple, schema)?.as_ref()) .cast(&LogicalType::Varchar(None, CharLengthUnits::Characters))? .utf8() @@ -258,18 +259,13 @@ impl ScalarExpression { }; let string_trimmed = trim_regex.replace_all(&string, "$1").to_string(); - Ok(Arc::new(DataValue::Utf8 { - value: Some(string_trimmed), - ty: Utf8Type::Variable(None), - unit: CharLengthUnits::Characters, - })) - } else { - Ok(Arc::new(DataValue::Utf8 { - value: None, - ty: Utf8Type::Variable(None), - unit: CharLengthUnits::Characters, - })) + value = Some(string_trimmed) } + Ok(Arc::new(DataValue::Utf8 { + value, + ty: Utf8Type::Variable(None), + unit: CharLengthUnits::Characters, + })) } ScalarExpression::Reference { pos, .. } => { return Ok(tuple From 8b10c99181d020f14daa3f824fd55f00a831cf95 Mon Sep 17 00:00:00 2001 From: Kould Date: Wed, 7 Aug 2024 20:10:27 +0800 Subject: [PATCH 3/4] chore: codefmt --- src/execution/dql/aggregate/mod.rs | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/src/execution/dql/aggregate/mod.rs b/src/execution/dql/aggregate/mod.rs index 28af5aee..28d77363 100644 --- a/src/execution/dql/aggregate/mod.rs +++ b/src/execution/dql/aggregate/mod.rs @@ -7,9 +7,7 @@ mod sum; use crate::errors::DatabaseError; use crate::execution::dql::aggregate::avg::AvgAccumulator; -use crate::execution::dql::aggregate::count::{ - CountAccumulator, DistinctCountAccumulator, -}; +use crate::execution::dql::aggregate::count::{CountAccumulator, DistinctCountAccumulator}; use crate::execution::dql::aggregate::min_max::MinMaxAccumulator; use crate::execution::dql::aggregate::sum::{DistinctSumAccumulator, SumAccumulator}; use crate::expression::agg::AggKind; From 0e6d49a4c5fdb6347a606da0e10300fba86d73f1 Mon Sep 17 00:00:00 2001 From: Kould Date: Wed, 7 Aug 2024 20:12:23 +0800 Subject: [PATCH 4/4] docs: add llvm --- README.md | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/README.md b/README.md index e9301704..f6aa998f 100755 --- a/README.md +++ b/README.md @@ -40,15 +40,13 @@ FnckSQL individual developers independently implemented LSM KV-based SQL DBMS ou Welcome to our WebSite, Power By FnckSQL: **http://www.kipdata.site/** ### Quick Started -Tips: Install rust toolchain first. +Tips: Install rust toolchain and llvm first. Clone the repository ``` shell git clone https://github.com/KipData/FnckSQL.git ``` -![start](./static/images/start.gif) -then use `psql` to enter sql -![pg](./static/images/pg.gif) + Using FnckSQL in code ```rust let fnck_sql = DataBaseBuilder::path("./data")