Skip to content

Commit

Permalink
Optimize Sort Executor (#112)
Browse files Browse the repository at this point in the history
* feat: valid column & table name

* perf: use advance calculation of OrderBy Values to avoid double calculations

* test: add test case for issue #110

* code fmt

* perf: reconstruction Sort operator reconstruction using radix sort + memcomparable encode

ref https://duckdb.org/2021/08/27/external-sorting.html

* code fmt

* fix: nulls first default true

* test: add multi-null value case

* code fmt

* perf: filter Key after sorting

* code fmt

* fix: index out of bounds
  • Loading branch information
KKould authored Dec 25, 2023
1 parent a6f3809 commit 9cc8c89
Show file tree
Hide file tree
Showing 21 changed files with 204 additions and 220 deletions.
2 changes: 1 addition & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ ahash = "0.8.3"
lazy_static = "1.4.0"
comfy-table = "7.0.1"
bytes = "1.5.0"
kip_db = "0.1.2-alpha.19"
kip_db = "0.1.2-alpha.20"
rust_decimal = "1"
csv = "1"
regex = "1.10.2"
Expand Down
2 changes: 1 addition & 1 deletion src/binder/aggregate.rs
Original file line number Diff line number Diff line change
Expand Up @@ -77,7 +77,7 @@ impl<'a, T: Transaction> Binder<'a, T> {
return_orderby.push(SortField::new(
expr,
asc.map_or(true, |asc| asc),
nulls_first.map_or(false, |first| first),
nulls_first.map_or(true, |first| first),
));
}
Some(return_orderby)
Expand Down
12 changes: 9 additions & 3 deletions src/binder/alter_table.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ use sqlparser::ast::{AlterTableOperation, ObjectName};

use std::sync::Arc;

use super::Binder;
use super::{is_valid_identifier, Binder};
use crate::binder::{lower_case_name, split_name, BindError};
use crate::planner::operator::alter_table::add_column::AddColumnOperator;
use crate::planner::operator::alter_table::drop_column::DropColumnOperator;
Expand All @@ -17,7 +17,7 @@ impl<'a, T: Transaction> Binder<'a, T> {
name: &ObjectName,
operation: &AlterTableOperation,
) -> Result<LogicalPlan, BindError> {
let table_name: Arc<String> = Arc::new(split_name(&lower_case_name(name))?.1.to_string());
let table_name: Arc<String> = Arc::new(split_name(&lower_case_name(name))?.to_string());

if let Some(table) = self.context.table(table_name.clone()) {
let plan = match operation {
Expand All @@ -27,12 +27,18 @@ impl<'a, T: Transaction> Binder<'a, T> {
column_def,
} => {
let plan = ScanOperator::build(table_name.clone(), table);
let column = self.bind_column(column_def)?;

if !is_valid_identifier(column.name()) {
return Err(BindError::InvalidColumn(
"illegal column naming".to_string(),
));
}
LogicalPlan {
operator: Operator::AddColumn(AddColumnOperator {
table_name,
if_not_exists: *if_not_exists,
column: self.bind_column(column_def)?,
column,
}),
childrens: vec![plan],
}
Expand Down
12 changes: 10 additions & 2 deletions src/binder/create_table.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ use sqlparser::ast::{ColumnDef, ColumnOption, ObjectName, TableConstraint};
use std::collections::HashSet;
use std::sync::Arc;

use super::Binder;
use super::{is_valid_identifier, Binder};
use crate::binder::{lower_case_name, split_name, BindError};
use crate::catalog::{ColumnCatalog, ColumnDesc};
use crate::expression::ScalarExpression;
Expand All @@ -24,9 +24,12 @@ impl<'a, T: Transaction> Binder<'a, T> {
if_not_exists: bool,
) -> Result<LogicalPlan, BindError> {
let name = lower_case_name(name);
let (_, name) = split_name(&name)?;
let name = split_name(&name)?;
let table_name = Arc::new(name.to_string());

if !is_valid_identifier(&table_name) {
return Err(BindError::InvalidTable("illegal table naming".to_string()));
}
{
// check duplicated column names
let mut set = HashSet::new();
Expand All @@ -35,6 +38,11 @@ impl<'a, T: Transaction> Binder<'a, T> {
if !set.insert(col_name.clone()) {
return Err(BindError::AmbiguousColumn(col_name.to_string()));
}
if !is_valid_identifier(col_name) {
return Err(BindError::InvalidColumn(
"illegal column naming".to_string(),
));
}
}
}
let mut columns: Vec<ColumnCatalog> = columns
Expand Down
2 changes: 1 addition & 1 deletion src/binder/delete.rs
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ impl<'a, T: Transaction> Binder<'a, T> {
) -> Result<LogicalPlan, BindError> {
if let TableFactor::Table { name, alias, .. } = &from.relation {
let name = lower_case_name(name);
let (_, name) = split_name(&name)?;
let name = split_name(&name)?;
let (table_name, mut plan) =
self._bind_single_table_ref(None, name, Self::trans_alias(alias))?;

Expand Down
2 changes: 1 addition & 1 deletion src/binder/drop_table.rs
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ impl<'a, T: Transaction> Binder<'a, T> {
if_exists: &bool,
) -> Result<LogicalPlan, BindError> {
let name = lower_case_name(name);
let (_, name) = split_name(&name)?;
let name = split_name(&name)?;
let table_name = Arc::new(name.to_string());

let plan = LogicalPlan {
Expand Down
2 changes: 1 addition & 1 deletion src/binder/insert.rs
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ impl<'a, T: Transaction> Binder<'a, T> {
is_overwrite: bool,
) -> Result<LogicalPlan, BindError> {
let name = lower_case_name(&name);
let (_, name) = split_name(&name)?;
let name = split_name(&name)?;
let table_name = Arc::new(name.to_string());

if let Some(table) = self.context.table(table_name.clone()) {
Expand Down
31 changes: 23 additions & 8 deletions src/binder/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ mod update;
use sqlparser::ast::{Ident, ObjectName, ObjectType, SetExpr, Statement};
use std::collections::BTreeMap;

use crate::catalog::{CatalogError, TableCatalog, TableName, DEFAULT_SCHEMA_NAME};
use crate::catalog::{CatalogError, TableCatalog, TableName};
use crate::expression::ScalarExpression;
use crate::planner::operator::join::JoinType;
use crate::planner::LogicalPlan;
Expand Down Expand Up @@ -199,11 +199,10 @@ fn lower_case_name(name: &ObjectName) -> ObjectName {
}

/// Split an object name into `(schema name, table name)`.
fn split_name(name: &ObjectName) -> Result<(&str, &str), BindError> {
fn split_name(name: &ObjectName) -> Result<&str, BindError> {
Ok(match name.0.as_slice() {
[table] => (DEFAULT_SCHEMA_NAME, &table.value),
[schema, table] => (&schema.value, &table.value),
_ => return Err(BindError::InvalidTableName(name.0.clone())),
[table] => &table.value,
_ => return Err(BindError::InvalidTable(name.to_string())),
})
}

Expand All @@ -213,8 +212,6 @@ pub enum BindError {
UnsupportedStmt(String),
#[error("invalid table {0}")]
InvalidTable(String),
#[error("invalid table name: {0:?}")]
InvalidTableName(Vec<Ident>),
#[error("invalid column {0}")]
InvalidColumn(String),
#[error("ambiguous column {0}")]
Expand All @@ -237,9 +234,15 @@ pub enum BindError {
UnsupportedCopySource(String),
}

pub(crate) fn is_valid_identifier(s: &str) -> bool {
s.chars().all(|c| c.is_alphanumeric() || c == '_')
&& !s.chars().next().unwrap_or_default().is_numeric()
&& !s.chars().all(|c| c == '_')
}

#[cfg(test)]
pub mod test {
use crate::binder::{Binder, BinderContext};
use crate::binder::{is_valid_identifier, Binder, BinderContext};
use crate::catalog::{ColumnCatalog, ColumnDesc};
use crate::execution::ExecutorError;
use crate::planner::LogicalPlan;
Expand Down Expand Up @@ -308,4 +311,16 @@ pub mod test {

Ok(binder.bind(&stmt[0])?)
}

#[test]
pub fn test_valid_identifier() {
assert!(is_valid_identifier("valid_table"));
assert!(is_valid_identifier("valid_column"));
assert!(is_valid_identifier("_valid_column"));
assert!(is_valid_identifier("valid_column_1"));

assert!(!is_valid_identifier("invalid_name&"));
assert!(!is_valid_identifier("1_invalid_name"));
assert!(!is_valid_identifier("____"));
}
}
12 changes: 4 additions & 8 deletions src/binder/select.rs
Original file line number Diff line number Diff line change
Expand Up @@ -16,9 +16,7 @@ use crate::{
use super::Binder;

use crate::binder::BindError;
use crate::catalog::{
ColumnCatalog, TableCatalog, TableName, DEFAULT_DATABASE_NAME, DEFAULT_SCHEMA_NAME,
};
use crate::catalog::{ColumnCatalog, TableCatalog, TableName};
use crate::execution::executor::dql::join::joins_nullable;
use crate::expression::BinaryOperator;
use crate::planner::operator::join::JoinCondition;
Expand Down Expand Up @@ -155,11 +153,9 @@ impl<'a, T: Transaction> Binder<'a, T> {
.map(|ident| Ident::new(ident.value.to_lowercase()))
.collect_vec();

let (_database, _schema, table): (&str, &str, &str) = match obj_name.as_slice() {
[table] => (DEFAULT_DATABASE_NAME, DEFAULT_SCHEMA_NAME, &table.value),
[schema, table] => (DEFAULT_DATABASE_NAME, &schema.value, &table.value),
[database, schema, table] => (&database.value, &schema.value, &table.value),
_ => return Err(BindError::InvalidTableName(obj_name)),
let table: &str = match obj_name.as_slice() {
[table] => &table.value,
_ => return Err(BindError::InvalidTable(obj_name.iter().join(","))),
};

let (table, plan) =
Expand Down
2 changes: 1 addition & 1 deletion src/binder/truncate.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ use std::sync::Arc;
impl<'a, T: Transaction> Binder<'a, T> {
pub(crate) fn bind_truncate(&mut self, name: &ObjectName) -> Result<LogicalPlan, BindError> {
let name = lower_case_name(name);
let (_, name) = split_name(&name)?;
let name = split_name(&name)?;
let table_name = Arc::new(name.to_string());

let plan = LogicalPlan {
Expand Down
2 changes: 1 addition & 1 deletion src/binder/update.rs
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ impl<'a, T: Transaction> Binder<'a, T> {
) -> Result<LogicalPlan, BindError> {
if let TableFactor::Table { name, .. } = &to.relation {
let name = lower_case_name(name);
let (_, name) = split_name(&name)?;
let name = split_name(&name)?;
let table_name = Arc::new(name.to_string());

let mut plan = self.bind_table_ref(slice::from_ref(to))?;
Expand Down
9 changes: 0 additions & 9 deletions src/catalog/mod.rs
Original file line number Diff line number Diff line change
@@ -1,18 +1,9 @@
// Module: catalog
use std::sync::Arc;

pub(crate) use self::column::*;
pub(crate) use self::root::*;
pub(crate) use self::table::*;

/// The type of catalog reference.
pub type RootRef = Arc<RootCatalog>;

pub(crate) static DEFAULT_DATABASE_NAME: &str = "kipsql";
pub(crate) static DEFAULT_SCHEMA_NAME: &str = "kipsql";

mod column;
mod root;
mod table;

#[derive(thiserror::Error, Debug)]
Expand Down
86 changes: 0 additions & 86 deletions src/catalog/root.rs

This file was deleted.

Loading

0 comments on commit 9cc8c89

Please sign in to comment.