Skip to content

Commit

Permalink
Refactor Column Pruning and support where case: like and not like
Browse files Browse the repository at this point in the history
…, `is not null` (#93)

* refactor(column_pruning): reconstruct column clipping using Column detection under self-direction

* feat(simplify): add `constant_calculation` compute constant calculations ahead of time to avoid double calculations

* feat: support like operator

* feat: support is not null operator

* ci: nightly version

* config: nightly version update

* ci: config toolchain
  • Loading branch information
KKould authored Nov 11, 2023
1 parent 3dbe4a8 commit 09c1042
Show file tree
Hide file tree
Showing 39 changed files with 676 additions and 463 deletions.
3 changes: 2 additions & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ integer-encoding = "3.0.4"
strum_macros = "0.24"
ordered-float = "3.0"
petgraph = "0.6.3"
futures-async-stream = "0.2.6"
futures-async-stream = "0.2.9"
futures = "0.3.25"
ahash = "0.8.3"
lazy_static = "1.4.0"
Expand All @@ -39,6 +39,7 @@ bytes = "1.5.0"
kip_db = "0.1.2-alpha.17"
rust_decimal = "1"
csv = "1"
regex = "1.10.2"

[dev-dependencies]
tokio-test = "0.4.2"
Expand Down
27 changes: 24 additions & 3 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,12 +1,27 @@
# KipSQL
<pre align="center">
Built by @KipData

██╗ ██╗██╗██████╗ ███████╗ ██████╗ ██╗
██║ ██╔╝██║██╔══██╗██╔════╝██╔═══██╗██║
█████╔╝ ██║██████╔╝███████╗██║ ██║██║
██╔═██╗ ██║██╔═══╝ ╚════██║██║▄▄ ██║██║
██║ ██╗██║██║ ███████║╚██████╔╝███████╗
╚═╝ ╚═╝╚═╝╚═╝ ╚══════╝ ╚══▀▀═╝ ╚══════╝
-----------------------------------
Embedded SQL DBMS
</pre>
<br/>

### Architecture
Welcome to our WebSite, Power By KipSQL:
**http://www.kipdata.site/**

> Lightweight SQL calculation engine, as the SQL layer of KipDB, implemented with TalentPlan's TinySQL as the reference standard
### Architecture

![architecture](./static/images/architecture.png)

### Get Started
#### 组件引入
``` toml
kip-sql = "0.0.1-alpha.0"
```
Expand Down Expand Up @@ -79,6 +94,12 @@ implement_from_tuple!(Post, (
- not null
- null
- unique
- primary key
- SQL where options
- is null
- is not null
- like
- not like
- Supports index type
- Unique Index
- Supports multiple primary key types
Expand Down
2 changes: 1 addition & 1 deletion rust-toolchain
Original file line number Diff line number Diff line change
@@ -1 +1 @@
nightly-2023-10-13
nightly
26 changes: 22 additions & 4 deletions src/binder/aggregate.rs
Original file line number Diff line number Diff line change
Expand Up @@ -92,14 +92,20 @@ impl<'a, T: Transaction> Binder<'a, T> {
expr: &mut ScalarExpression,
is_select: bool,
) -> Result<(), BindError> {
let ref_columns = expr.referenced_columns();

match expr {
ScalarExpression::AggCall {
ty: return_type, ..
} => {
let ty = return_type.clone();
if is_select {
let index = self.context.input_ref_index(InputRefType::AggCall);
let input_ref = ScalarExpression::InputRef { index, ty };
let input_ref = ScalarExpression::InputRef {
index,
ty,
ref_columns,
};
match std::mem::replace(expr, input_ref) {
ScalarExpression::AggCall {
kind,
Expand All @@ -124,14 +130,21 @@ impl<'a, T: Transaction> Binder<'a, T> {
.find_position(|agg_expr| agg_expr == &expr)
.ok_or_else(|| BindError::AggMiss(format!("{:?}", expr)))?;

let _ = std::mem::replace(expr, ScalarExpression::InputRef { index, ty });
let _ = std::mem::replace(
expr,
ScalarExpression::InputRef {
index,
ty,
ref_columns,
},
);
}
}

ScalarExpression::TypeCast { expr, .. } => {
self.visit_column_agg_expr(expr, is_select)?
}
ScalarExpression::IsNull { expr } => self.visit_column_agg_expr(expr, is_select)?,
ScalarExpression::IsNull { expr, .. } => self.visit_column_agg_expr(expr, is_select)?,
ScalarExpression::Unary { expr, .. } => self.visit_column_agg_expr(expr, is_select)?,
ScalarExpression::Alias { expr, .. } => self.visit_column_agg_expr(expr, is_select)?,
ScalarExpression::Binary {
Expand Down Expand Up @@ -228,13 +241,15 @@ impl<'a, T: Transaction> Binder<'a, T> {
}) {
let index = self.context.input_ref_index(InputRefType::GroupBy);
let mut select_item = &mut select_list[i];
let ref_columns = select_item.referenced_columns();
let return_type = select_item.return_type();

self.context.group_by_exprs.push(std::mem::replace(
&mut select_item,
ScalarExpression::InputRef {
index,
ty: return_type,
ref_columns,
},
));
return;
Expand All @@ -243,6 +258,8 @@ impl<'a, T: Transaction> Binder<'a, T> {

if let Some(i) = select_list.iter().position(|column| column == expr) {
let expr = &mut select_list[i];
let ref_columns = expr.referenced_columns();

match expr {
ScalarExpression::Constant(_) | ScalarExpression::ColumnRef { .. } => {
self.context.group_by_exprs.push(expr.clone())
Expand All @@ -255,6 +272,7 @@ impl<'a, T: Transaction> Binder<'a, T> {
ScalarExpression::InputRef {
index,
ty: expr.return_type(),
ref_columns,
},
))
}
Expand Down Expand Up @@ -300,7 +318,7 @@ impl<'a, T: Transaction> Binder<'a, T> {
}

ScalarExpression::TypeCast { expr, .. } => self.validate_having_orderby(expr),
ScalarExpression::IsNull { expr } => self.validate_having_orderby(expr),
ScalarExpression::IsNull { expr, .. } => self.validate_having_orderby(expr),
ScalarExpression::Unary { expr, .. } => self.validate_having_orderby(expr),
ScalarExpression::Binary {
left_expr,
Expand Down
4 changes: 2 additions & 2 deletions src/binder/create_table.rs
Original file line number Diff line number Diff line change
Expand Up @@ -80,13 +80,13 @@ mod tests {
match plan1.operator {
Operator::CreateTable(op) => {
assert_eq!(op.table_name, Arc::new("t1".to_string()));
assert_eq!(op.columns[0].name, "id".to_string());
assert_eq!(op.columns[0].name(), "id");
assert_eq!(op.columns[0].nullable, false);
assert_eq!(
op.columns[0].desc,
ColumnDesc::new(LogicalType::Integer, true, false)
);
assert_eq!(op.columns[1].name, "name".to_string());
assert_eq!(op.columns[1].name(), "name");
assert_eq!(op.columns[1].nullable, true);
assert_eq!(
op.columns[1].desc,
Expand Down
34 changes: 32 additions & 2 deletions src/binder/expr.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
use crate::binder::BindError;
use crate::expression;
use crate::expression::agg::AggKind;
use itertools::Itertools;
use sqlparser::ast::{
Expand All @@ -25,13 +26,41 @@ impl<'a, T: Transaction> Binder<'a, T> {
Expr::Function(func) => self.bind_agg_call(func),
Expr::Nested(expr) => self.bind_expr(expr),
Expr::UnaryOp { expr, op } => self.bind_unary_op_internal(expr, op),
Expr::IsNull(expr) => self.bind_is_null(expr),
Expr::Like {
negated,
expr,
pattern,
..
} => self.bind_like(*negated, expr, pattern),
Expr::IsNull(expr) => self.bind_is_null(expr, false),
Expr::IsNotNull(expr) => self.bind_is_null(expr, true),
_ => {
todo!()
}
}
}

pub fn bind_like(
&mut self,
negated: bool,
expr: &Expr,
pattern: &Expr,
) -> Result<ScalarExpression, BindError> {
let left_expr = Box::new(self.bind_expr(expr)?);
let right_expr = Box::new(self.bind_expr(pattern)?);
let op = if negated {
expression::BinaryOperator::NotLike
} else {
expression::BinaryOperator::Like
};
Ok(ScalarExpression::Binary {
op,
left_expr,
right_expr,
ty: LogicalType::Boolean,
})
}

pub fn bind_column_ref_from_identifiers(
&mut self,
idents: &[Ident],
Expand Down Expand Up @@ -199,8 +228,9 @@ impl<'a, T: Transaction> Binder<'a, T> {
})
}

fn bind_is_null(&mut self, expr: &Expr) -> Result<ScalarExpression, BindError> {
fn bind_is_null(&mut self, expr: &Expr, negated: bool) -> Result<ScalarExpression, BindError> {
Ok(ScalarExpression::IsNull {
negated,
expr: Box::new(self.bind_expr(expr)?),
})
}
Expand Down
15 changes: 7 additions & 8 deletions src/binder/select.rs
Original file line number Diff line number Diff line change
Expand Up @@ -343,9 +343,7 @@ impl<'a, T: Transaction> Binder<'a, T> {
select_list: Vec<ScalarExpression>,
) -> LogicalPlan {
LogicalPlan {
operator: Operator::Project(ProjectOperator {
columns: select_list,
}),
operator: Operator::Project(ProjectOperator { exprs: select_list }),
childrens: vec![children],
}
}
Expand Down Expand Up @@ -431,7 +429,8 @@ impl<'a, T: Transaction> Binder<'a, T> {

for column in select_items {
if let ScalarExpression::ColumnRef(col) = column {
if let Some(nullable) = table_force_nullable.get(col.table_name.as_ref().unwrap()) {
if let Some(nullable) = table_force_nullable.get(col.table_name().as_ref().unwrap())
{
let mut new_col = ColumnCatalog::clone(col);
new_col.nullable = *nullable;

Expand Down Expand Up @@ -504,12 +503,12 @@ impl<'a, T: Transaction> Binder<'a, T> {
// example: foo = bar
(ScalarExpression::ColumnRef(l), ScalarExpression::ColumnRef(r)) => {
// reorder left and right joins keys to pattern: (left, right)
if left_schema.contains_column(&l.name)
&& right_schema.contains_column(&r.name)
if left_schema.contains_column(l.name())
&& right_schema.contains_column(r.name())
{
accum.push((left, right));
} else if left_schema.contains_column(&r.name)
&& right_schema.contains_column(&l.name)
} else if left_schema.contains_column(r.name())
&& right_schema.contains_column(l.name())
{
accum.push((right, left));
} else {
Expand Down
47 changes: 37 additions & 10 deletions src/catalog/column.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ use crate::catalog::TableName;
use crate::expression::ScalarExpression;
use serde::{Deserialize, Serialize};
use sqlparser::ast::{ColumnDef, ColumnOption};
use std::hash::Hash;
use std::sync::Arc;

use crate::types::{ColumnId, LogicalType};
Expand All @@ -10,14 +11,19 @@ pub type ColumnRef = Arc<ColumnCatalog>;

#[derive(Debug, Clone, Serialize, Deserialize, Hash, Eq, PartialEq)]
pub struct ColumnCatalog {
pub id: Option<ColumnId>,
pub name: String,
pub table_name: Option<TableName>,
pub summary: ColumnSummary,
pub nullable: bool,
pub desc: ColumnDesc,
pub ref_expr: Option<ScalarExpression>,
}

#[derive(Debug, Clone, Serialize, Deserialize, Hash, Eq, PartialEq)]
pub struct ColumnSummary {
pub id: Option<ColumnId>,
pub name: String,
pub table_name: Option<TableName>,
}

impl ColumnCatalog {
pub(crate) fn new(
column_name: String,
Expand All @@ -26,9 +32,11 @@ impl ColumnCatalog {
ref_expr: Option<ScalarExpression>,
) -> ColumnCatalog {
ColumnCatalog {
id: None,
name: column_name,
table_name: None,
summary: ColumnSummary {
id: None,
name: column_name,
table_name: None,
},
nullable,
desc: column_desc,
ref_expr,
Expand All @@ -37,20 +45,39 @@ impl ColumnCatalog {

pub(crate) fn new_dummy(column_name: String) -> ColumnCatalog {
ColumnCatalog {
id: Some(0),
name: column_name,
table_name: None,
summary: ColumnSummary {
id: Some(0),
name: column_name,
table_name: None,
},
nullable: false,
desc: ColumnDesc::new(LogicalType::Varchar(None), false, false),
ref_expr: None,
}
}

pub(crate) fn summary(&self) -> &ColumnSummary {
&self.summary
}

pub(crate) fn id(&self) -> Option<ColumnId> {
self.summary.id
}

pub(crate) fn table_name(&self) -> Option<TableName> {
self.summary.table_name.clone()
}

pub(crate) fn name(&self) -> &str {
&self.summary.name
}

pub(crate) fn datatype(&self) -> &LogicalType {
&self.desc.column_datatype
}

pub fn desc(&self) -> &ColumnDesc {
#[allow(dead_code)]
pub(crate) fn desc(&self) -> &ColumnDesc {
&self.desc
}
}
Expand Down
Loading

0 comments on commit 09c1042

Please sign in to comment.