Skip to content

Commit

Permalink
Implement indexes (currently only unique indexes are supported) (#65)
Browse files Browse the repository at this point in the history
* feat: implement `DataValue::to_index_key`

* feat: implement `Index` and encoding in `TableCodec`

* feat: implementing when create table, detect the unique field and create a unique index for it

* feat: when inserting data, it will check whether the field has a corresponding unique index, insert the index and check whether it already exists

* feat: Processing of unique indexes when adding update and delete

* feat: Processing of unique indexes when truncate

* style: rename Table -> Transaction, Transaction -> Iter

* feat: added `ScalarExpression::convert_binary`

used to extract the constant binary expression information corresponding to Column in the condition of the where clause

* style: code optimization

* feat: Implement RBO rule -> `SimplifyFilter`

* test: add test case for simplification.rs

* feat: add RBO Rule`SimplifyFilter`

* perf: `ConstantBinary::scope_aggregation`

Fusion of marginal values with values within a range

* feat: implement `ConstantBinary::rearrange`

constant folding -> expression extraction -> aggregation (and) -> rearrangement (or)

* fix: `ConstantBinary::scope_aggregation` selection of Eq/NotEq and Scope

* fix: `ConstantBinary::scope_aggregation` the eq condition only aggregates one

* feat: add RBO Rule`PushPredicateIntoScan`

* feat: implement `IndexScan`

* feat: implement offset and limit for `IndexScan`

* fix: many bugs

- RBO Rule: `PushProjectThroughChild`: fixed the problem of missing fields when pushing down
- RBO Rule: `PushLimitThroughJoin`: fixed the problem that when the on condition in Join generates multiple same number of connection rows, the limit limit is exceeded.
-

* fix: resolve merge conflicts

* style: code format

* fix: check or in `c1 > c2 or c1 > 1`

* docs: supplementary index related

* perf: `ScalarExpression::check_or` optimize implementation

* feat: implemented for Decimal type `DataValue::to_index_key`

* perf: Optimized `DataValue` conversion to bitwise sequence

* perf: optimized `DataValue::Utf8` convert to encoding of primary/unique key

* refactor: reconstruct the Key encoding of each structure of TableCodec

#68
  • Loading branch information
KKould authored Sep 30, 2023
1 parent a696e2a commit 6691037
Show file tree
Hide file tree
Showing 45 changed files with 3,020 additions and 665 deletions.
8 changes: 8 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,12 @@ Storage Support:
![demo](./static/images/demo.png)

### Features
- SQL field options
- not null
- null
- unique
- Supports index type
- Unique Index
- Supports multiple primary key types
- Tinyint
- UTinyint
Expand All @@ -63,6 +69,8 @@ Storage Support:
- [x] Truncate
- DQL
- [x] Select
- SeqScan
- IndexScan
- [x] Where
- [x] Distinct
- [x] Alias
Expand Down
15 changes: 11 additions & 4 deletions src/binder/create_table.rs
Original file line number Diff line number Diff line change
Expand Up @@ -34,8 +34,15 @@ impl<S: Storage> Binder<S> {
.map(|col| ColumnCatalog::from(col.clone()))
.collect_vec();

if columns.iter().find(|col| col.desc.is_primary).is_none() {
return Err(BindError::InvalidTable("At least one primary key field exists".to_string()));
let primary_key_count = columns
.iter()
.filter(|col| col.desc.is_primary)
.count();

if primary_key_count != 1 {
return Err(BindError::InvalidTable(
"The primary key field must exist and have at least one".to_string()
));
}

let plan = LogicalPlan {
Expand Down Expand Up @@ -75,10 +82,10 @@ mod tests {
assert_eq!(op.table_name, Arc::new("t1".to_string()));
assert_eq!(op.columns[0].name, "id".to_string());
assert_eq!(op.columns[0].nullable, false);
assert_eq!(op.columns[0].desc, ColumnDesc::new(LogicalType::Integer, true));
assert_eq!(op.columns[0].desc, ColumnDesc::new(LogicalType::Integer, true, false));
assert_eq!(op.columns[1].name, "name".to_string());
assert_eq!(op.columns[1].nullable, true);
assert_eq!(op.columns[1].desc, ColumnDesc::new(LogicalType::Varchar(Some(10)), false));
assert_eq!(op.columns[1].desc, ColumnDesc::new(LogicalType::Varchar(Some(10)), false, false));
}
_ => unreachable!()
}
Expand Down
2 changes: 1 addition & 1 deletion src/binder/expr.rs
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,7 @@ impl<S: Storage> Binder<S> {
let table_catalog = self
.context
.storage
.table_catalog(table)
.table(table)
.await
.ok_or_else(|| BindError::InvalidTable(table.to_string()))?;

Expand Down
2 changes: 1 addition & 1 deletion src/binder/insert.rs
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ impl<S: Storage> Binder<S> {
let (_, name) = split_name(&name)?;
let table_name = Arc::new(name.to_string());

if let Some(table) = self.context.storage.table_catalog(&table_name).await {
if let Some(table) = self.context.storage.table(&table_name).await {
let mut columns = Vec::new();

if idents.is_empty() {
Expand Down
9 changes: 4 additions & 5 deletions src/binder/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -193,16 +193,16 @@ pub mod test {
let _ = storage.create_table(
Arc::new("t1".to_string()),
vec![
ColumnCatalog::new("c1".to_string(), false, ColumnDesc::new(Integer, true)),
ColumnCatalog::new("c2".to_string(), false, ColumnDesc::new(Integer, false)),
ColumnCatalog::new("c1".to_string(), false, ColumnDesc::new(Integer, true, false)),
ColumnCatalog::new("c2".to_string(), false, ColumnDesc::new(Integer, false, true)),
]
).await?;

let _ = storage.create_table(
Arc::new("t2".to_string()),
vec![
ColumnCatalog::new("c3".to_string(), false, ColumnDesc::new(Integer, true)),
ColumnCatalog::new("c4".to_string(), false, ColumnDesc::new(Integer, false)),
ColumnCatalog::new("c3".to_string(), false, ColumnDesc::new(Integer, true, false)),
ColumnCatalog::new("c4".to_string(), false, ColumnDesc::new(Integer, false, false)),
]
).await?;

Expand All @@ -211,7 +211,6 @@ pub mod test {

pub async fn select_sql_run(sql: &str) -> Result<LogicalPlan, ExecutorError> {
let temp_dir = TempDir::new().expect("unable to create temporary working directory");

let storage = build_test_catalog(temp_dir.path()).await?;
let binder = Binder::new(BinderContext::new(storage));
let stmt = crate::parser::parse_sql(sql)?;
Expand Down
8 changes: 4 additions & 4 deletions src/binder/select.rs
Original file line number Diff line number Diff line change
Expand Up @@ -167,7 +167,7 @@ impl<S: Storage> Binder<S> {
let table_catalog = self
.context
.storage
.table_catalog(&table_name)
.table(&table_name)
.await
.ok_or_else(|| BindError::InvalidTable(format!("bind table {}", table)))?;

Expand Down Expand Up @@ -215,7 +215,7 @@ impl<S: Storage> Binder<S> {
for table_name in self.context.bind_table.keys().cloned() {
let table = self.context
.storage
.table_catalog(&table_name)
.table(&table_name)
.await
.ok_or_else(|| BindError::InvalidTable(table_name.to_string()))?;
for col in table.all_columns() {
Expand Down Expand Up @@ -244,12 +244,12 @@ impl<S: Storage> Binder<S> {
let (right_table, right) = self.bind_single_table_ref(relation, Some(join_type)).await?;

let left_table = self.context.storage
.table_catalog(&left_table)
.table(&left_table)
.await
.cloned()
.ok_or_else(|| BindError::InvalidTable(format!("Left: {} not found", left_table)))?;
let right_table = self.context.storage
.table_catalog(&right_table)
.table(&right_table)
.await
.cloned()
.ok_or_else(|| BindError::InvalidTable(format!("Right: {} not found", right_table)))?;
Expand Down
28 changes: 18 additions & 10 deletions src/catalog/column.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,13 +3,13 @@ use serde::{Deserialize, Serialize};
use sqlparser::ast::{ColumnDef, ColumnOption};
use crate::catalog::TableName;

use crate::types::{ColumnId, IdGenerator, LogicalType};
use crate::types::{ColumnId, LogicalType};

pub type ColumnRef = Arc<ColumnCatalog>;

#[derive(Debug, Clone, Serialize, Deserialize, Hash, Eq, PartialEq)]
pub struct ColumnCatalog {
pub id: ColumnId,
pub id: Option<ColumnId>,
pub name: String,
pub table_name: Option<TableName>,
pub nullable: bool,
Expand All @@ -19,7 +19,7 @@ pub struct ColumnCatalog {
impl ColumnCatalog {
pub(crate) fn new(column_name: String, nullable: bool, column_desc: ColumnDesc) -> ColumnCatalog {
ColumnCatalog {
id: IdGenerator::build(),
id: None,
name: column_name,
table_name: None,
nullable,
Expand All @@ -29,11 +29,11 @@ impl ColumnCatalog {

pub(crate) fn new_dummy(column_name: String)-> ColumnCatalog {
ColumnCatalog {
id: 0,
id: Some(0),
name: column_name,
table_name: None,
nullable: false,
desc: ColumnDesc::new(LogicalType::Varchar(None), false),
desc: ColumnDesc::new(LogicalType::Varchar(None), false, false),
}
}

Expand All @@ -51,6 +51,7 @@ impl From<ColumnDef> for ColumnCatalog {
let column_name = column_def.name.to_string();
let mut column_desc = ColumnDesc::new(
LogicalType::try_from(column_def.data_type).unwrap(),
false,
false
);
let mut nullable = false;
Expand All @@ -60,10 +61,15 @@ impl From<ColumnDef> for ColumnCatalog {
match option_def.option {
ColumnOption::Null => nullable = true,
ColumnOption::NotNull => (),
ColumnOption::Unique { is_primary: true } => {
column_desc.is_primary = true;
// Skip other options when using primary key
break;
ColumnOption::Unique { is_primary } => {
if is_primary {
column_desc.is_primary = true;
nullable = false;
// Skip other options when using primary key
break;
} else {
column_desc.is_unique = true;
}
},
_ => todo!()
}
Expand All @@ -78,13 +84,15 @@ impl From<ColumnDef> for ColumnCatalog {
pub struct ColumnDesc {
pub(crate) column_datatype: LogicalType,
pub(crate) is_primary: bool,
pub(crate) is_unique: bool,
}

impl ColumnDesc {
pub(crate) const fn new(column_datatype: LogicalType, is_primary: bool) -> ColumnDesc {
pub(crate) const fn new(column_datatype: LogicalType, is_primary: bool, is_unique: bool) -> ColumnDesc {
ColumnDesc {
column_datatype,
is_primary,
is_unique,
}
}
}
4 changes: 2 additions & 2 deletions src/catalog/root.rs
Original file line number Diff line number Diff line change
Expand Up @@ -67,12 +67,12 @@ mod tests {
let col0 = ColumnCatalog::new(
"a".to_string(),
false,
ColumnDesc::new(LogicalType::Integer, false),
ColumnDesc::new(LogicalType::Integer, false, false),
);
let col1 = ColumnCatalog::new(
"b".to_string(),
false,
ColumnDesc::new(LogicalType::Boolean, false),
ColumnDesc::new(LogicalType::Boolean, false, false),
);
let col_catalogs = vec![col0, col1];

Expand Down
38 changes: 34 additions & 4 deletions src/catalog/table.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ use std::sync::Arc;

use crate::catalog::{CatalogError, ColumnCatalog, ColumnRef};
use crate::types::ColumnId;
use crate::types::index::{IndexMeta, IndexMetaRef};

pub type TableName = Arc<String>;

Expand All @@ -12,9 +13,16 @@ pub struct TableCatalog {
/// Mapping from column names to column ids
column_idxs: BTreeMap<String, ColumnId>,
pub(crate) columns: BTreeMap<ColumnId, ColumnRef>,
pub indexes: Vec<IndexMetaRef>
}

impl TableCatalog {
pub(crate) fn get_unique_index(&self, col_id: &ColumnId) -> Option<&IndexMetaRef> {
self.indexes
.iter()
.find(|meta| meta.is_unique && &meta.column_ids[0] == col_id)
}

pub(crate) fn get_column_by_id(&self, id: &ColumnId) -> Option<&ColumnRef> {
self.columns.get(id)
}
Expand Down Expand Up @@ -54,23 +62,34 @@ impl TableCatalog {
return Err(CatalogError::Duplicated("column", col.name.clone()));
}

let col_id = col.id;
let col_id = self.columns.len() as u32;

col.id = Some(col_id);
col.table_name = Some(self.name.clone());
self.column_idxs.insert(col.name.clone(), col_id);
self.columns.insert(col_id, Arc::new(col));

Ok(col_id)
}

pub(crate) fn add_index_meta(&mut self, mut index: IndexMeta) -> &IndexMeta {
let index_id = self.indexes.len();

index.id = index_id as u32;
self.indexes.push(Arc::new(index));

&self.indexes[index_id]
}

pub(crate) fn new(
name: TableName,
columns: Vec<ColumnCatalog>,
columns: Vec<ColumnCatalog>
) -> Result<TableCatalog, CatalogError> {
let mut table_catalog = TableCatalog {
name,
column_idxs: BTreeMap::new(),
columns: BTreeMap::new(),
indexes: vec![],
};

for col_catalog in columns.into_iter() {
Expand All @@ -79,6 +98,17 @@ impl TableCatalog {

Ok(table_catalog)
}

pub(crate) fn new_with_indexes(
name: TableName,
columns: Vec<ColumnCatalog>,
indexes: Vec<IndexMetaRef>
) -> Result<TableCatalog, CatalogError> {
let mut catalog = TableCatalog::new(name, columns)?;
catalog.indexes = indexes;

Ok(catalog)
}
}

#[cfg(test)]
Expand All @@ -93,8 +123,8 @@ mod tests {
// | 1 | true |
// | 2 | false |
fn test_table_catalog() {
let col0 = ColumnCatalog::new("a".into(), false, ColumnDesc::new(LogicalType::Integer, false));
let col1 = ColumnCatalog::new("b".into(), false, ColumnDesc::new(LogicalType::Boolean, false));
let col0 = ColumnCatalog::new("a".into(), false, ColumnDesc::new(LogicalType::Integer, false, false));
let col1 = ColumnCatalog::new("b".into(), false, ColumnDesc::new(LogicalType::Boolean, false, false));
let col_catalogs = vec![col0, col1];
let table_catalog = TableCatalog::new(Arc::new("test".to_string()), col_catalogs).unwrap();

Expand Down
Loading

0 comments on commit 6691037

Please sign in to comment.