Skip to content

Commit

Permalink
wip sql: add schema goldentests
Browse files Browse the repository at this point in the history
  • Loading branch information
erikgrinaker committed Jun 29, 2024
1 parent 0787f87 commit 9d2ce53
Show file tree
Hide file tree
Showing 11 changed files with 345 additions and 40 deletions.
2 changes: 1 addition & 1 deletion Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

25 changes: 25 additions & 0 deletions src/encoding/format.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
use super::{bincode, Key as _};
use crate::raft;
use crate::sql;
use crate::storage::mvcc;

use itertools::Itertools as _;
Expand Down Expand Up @@ -148,3 +149,27 @@ impl<I: Formatter> Formatter for MVCC<I> {
}
}
}

/// Formats SQL keys/values.
pub struct SQL;

impl Formatter for SQL {
fn key(key: &[u8]) -> String {
let Ok(key) = sql::engine::Key::decode(key) else { return Raw::key(key) };
format!("sql:{key:?}")
}

fn value(key: &[u8], value: &[u8]) -> String {
let Ok(key) = sql::engine::Key::decode(key) else { return Raw::key(value) };
match key {
sql::engine::Key::Table(_) => {
let Ok(table) = bincode::deserialize::<sql::types::Table>(value) else {
return Raw::bytes(value);
};
let re = regex::Regex::new(r#"\n\s*"#).expect("regex failed");
re.replace_all(&format!("{table}"), " ").into_owned()
}
_ => todo!(),
}
}
}
6 changes: 3 additions & 3 deletions src/sql/engine/local.rs
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ use std::collections::{HashMap, HashSet};
/// node-local SQL storage.
pub struct Local<E: storage::Engine + 'static> {
/// The local MVCC storage engine.
pub(super) mvcc: mvcc::MVCC<E>,
pub(crate) mvcc: mvcc::MVCC<E>,
}

impl<E: storage::Engine> Local<E> {
Expand Down Expand Up @@ -410,8 +410,8 @@ impl<E: storage::Engine> Catalog for Transaction<E> {
/// table/column names, so this is fine.
///
/// Uses Cow to allow encoding borrowed values but decoding owned values.
#[derive(Deserialize, Serialize)]
enum Key<'a> {
#[derive(Debug, Deserialize, Serialize)]
pub enum Key<'a> {
/// A table schema by table name.
Table(Cow<'a, str>),
/// An index entry, by table name, index name, and index value.
Expand Down
2 changes: 1 addition & 1 deletion src/sql/engine/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,6 @@ mod raft;
mod session;

pub use engine::{Catalog, Engine, IndexScan, Transaction};
pub use local::Local;
pub use local::{Key, Local};
pub use raft::{Raft, Status};
pub use session::{Session, StatementResult};
138 changes: 137 additions & 1 deletion src/sql/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,24 +6,160 @@ pub mod types;

#[cfg(test)]
mod tests {
use crate::encoding::format::{self, Formatter as _};
use crate::sql::engine::{Engine, Local};
use crate::sql::planner::{Planner, Scope};
use crate::sql::types::Value;
use crate::storage;
use crate::storage::engine::test::{Emit, Mirror, Operation};
use crate::storage::Engine as _;
use crossbeam::channel::Receiver;
use itertools::Itertools as _;
use std::error::Error;
use std::fmt::Write as _;
use std::result::Result;
use test_each_file::test_each_path;

use super::engine::{Catalog as _, Session};
use super::parser::Parser;

// Run goldenscript tests in src/sql/testscripts.
test_each_path! { in "src/sql/testscripts/expressions" as expressions => test_goldenscript }
test_each_path! { in "src/sql/testscripts/schema" as schema => test_goldenscript }
test_each_path! { in "src/sql/testscripts/expressions" as expressions => test_goldenscript_expr }

fn test_goldenscript(path: &std::path::Path) {
// Since the runner's Session can't reference an Engine stored in the
// same struct, we pass in the session. Use both a BitCask and a Memory
// engine, and mirror operations across them. Emit engine operations to
// op_rx.
let (op_tx, op_rx) = crossbeam::channel::unbounded();
let tempdir = tempfile::TempDir::with_prefix("toydb").expect("tempdir failed");
let bitcask =
storage::BitCask::new(tempdir.path().join("bitcask")).expect("bitcask failed");
let memory = storage::Memory::new();
let engine = Local::new(Emit::new(Mirror::new(bitcask, memory), op_tx));
let mut runner = SQLRunner::new(&engine, op_rx);

goldenscript::run(&mut runner, path).expect("goldenscript failed")
}

fn test_goldenscript_expr(path: &std::path::Path) {
goldenscript::run(&mut ExpressionRunner::new(), path).expect("goldenscript failed")
}

/// A SQL test runner.
struct SQLRunner<'a> {
engine: &'a TestEngine,
session: Session<'a, TestEngine>,
op_rx: Receiver<Operation>,
}

type TestEngine = Local<Emit<Mirror<storage::BitCask, storage::Memory>>>;

impl<'a> SQLRunner<'a> {
fn new(engine: &'a TestEngine, op_rx: Receiver<Operation>) -> Self {
let session = engine.session();
Self { engine, session, op_rx }
}
}

impl<'a> goldenscript::Runner for SQLRunner<'a> {
fn run(&mut self, command: &goldenscript::Command) -> Result<String, Box<dyn Error>> {
let mut output = String::new();

// Handle runner commands.
match command.name.as_str() {
// dump
"dump" => {
command.consume_args().reject_rest()?;
let mut engine = self.engine.mvcc.engine.lock().expect("mutex failed");
let mut iter = engine.scan(..);
while let Some((key, value)) = iter.next().transpose()? {
writeln!(
output,
"{} [{}]",
format::MVCC::<format::SQL>::key_value(&key, &value),
format::Raw::key_value(&key, &value)
)?;
}
return Ok(output);
}

// schema [TABLE...]
"schema" => {
let mut args = command.consume_args();
let tables = args.rest_pos().iter().map(|arg| arg.value.clone()).collect_vec();
args.reject_rest()?;

let schemas = if tables.is_empty() {
self.session.with_txn(true, |txn| txn.list_tables())?
} else {
tables
.into_iter()
.map(|t| self.session.with_txn(true, |txn| txn.must_get_table(&t)))
.collect::<Result<_, _>>()?
};
return Ok(schemas.into_iter().map(|s| s.to_string()).join("\n"));
}

// Otherwise, fall through to SQL execution.
_ => {}
}

// The entire command is the statement to execute. There are no args.
if !command.args.is_empty() {
return Err("expressions should be given as a command with no args".into());
}
let input = &command.name;
let mut tags = command.tags.clone();

// Execute the statement.
let result = self.session.execute(input)?;

// Output the result if requested.
if tags.remove("result") {
writeln!(output, "{result:?}")?;
}

// Output engine ops if requested.
if tags.remove("ops") {
while let Ok(op) = self.op_rx.try_recv() {
match op {
Operation::Delete { key } => writeln!(
output,
"storage delete {} [{}]",
format::MVCC::<format::SQL>::key(&key),
format::Raw::key(&key),
)?,
Operation::Flush => writeln!(output, "storage flush")?,
Operation::Set { key, value } => writeln!(
output,
"storage set {} [{}]",
format::MVCC::<format::SQL>::key_value(&key, &value),
format::Raw::key_value(&key, &value),
)?,
}
}
}

// Reject unknown tags.
if let Some(tag) = tags.iter().next() {
return Err(format!("unknown tag {tag}").into());
}

Ok(output)
}

/// If requested via [ops] tag, output engine operations for the command.
fn end_command(&mut self, _: &goldenscript::Command) -> Result<String, Box<dyn Error>> {
// Drain unconsumed operations.
while self.op_rx.try_recv().is_ok() {}
Ok(String::new())
}
}

/// A test runner for expressions specifically. Evaluates expressions to
/// values, and can optionally emit the expression tree.
struct ExpressionRunner {
engine: Local<storage::Memory>,
}
Expand Down
46 changes: 46 additions & 0 deletions src/sql/testscripts/schema/create_table
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
# Tests basic CREATE TABLE functionality.

# The result contains the table name. The table is written to storage.
[result,ops]> CREATE TABLE test (id INTEGER PRIMARY KEY)
---
CreateTable { name: "test" }
storage set mvcc:NextVersion → 2 ["\x00" → "\x02"]
storage set mvcc:TxnActive(1) → "" ["\x01\x00\x00\x00\x00\x00\x00\x00\x01" → ""]
storage set mvcc:TxnWrite(1, sql:Table("test")) → "" ["\x03\x00\x00\x00\x00\x00\x00\x00\x01\x00\xfftest\x00\xff\x00\xff\x00\x00" → ""]
storage set mvcc:Version(sql:Table("test"), 1) → CREATE TABLE test ( id INTEGER PRIMARY KEY ) ["\x04\x00\xfftest\x00\xff\x00\xff\x00\x00\x00\x00\x00\x00\x00\x00\x00\x01" → "\x01\x10\x04test\x00\x01\x02id\x01\x00\x00\x01\x00\x00"]
storage delete mvcc:TxnWrite(1, sql:Table("test")) ["\x03\x00\x00\x00\x00\x00\x00\x00\x01\x00\xfftest\x00\xff\x00\xff\x00\x00"]
storage delete mvcc:TxnActive(1) ["\x01\x00\x00\x00\x00\x00\x00\x00\x01"]

# Creating a table with multiple columns, indexes and foreign keys
# still only results in a single stored schema entry.
[ops]> CREATE TABLE indexed (id INTEGER PRIMARY KEY, "index" INTEGER INDEX, "unique" INTEGER UNIQUE, test_id INTEGER REFERENCES test)
---
storage set mvcc:NextVersion → 3 ["\x00" → "\x03"]
storage set mvcc:TxnActive(2) → "" ["\x01\x00\x00\x00\x00\x00\x00\x00\x02" → ""]
storage set mvcc:TxnWrite(2, sql:Table("indexed")) → "" ["\x03\x00\x00\x00\x00\x00\x00\x00\x02\x00\xffindexed\x00\xff\x00\xff\x00\x00" → ""]
storage set mvcc:Version(sql:Table("indexed"), 2) → CREATE TABLE indexed ( id INTEGER PRIMARY KEY, "index" INTEGER DEFAULT NULL INDEX, "unique" INTEGER DEFAULT NULL UNIQUE INDEX, test_id INTEGER DEFAULT NULL INDEX REFERENCES test ) ["\x04\x00\xffindexed\x00\xff\x00\xff\x00\x00\x00\x00\x00\x00\x00\x00\x00\x02" → "\x01B\x07indexed\x00\x04\x02id\x01\x00\x00\x01\x00\x00\x05index\x01\x01\x01\x00\x00\x01\x00\x06unique\x01\x01\x01\x00\x01\x01\x00\x07test_id\x01\x01\x01\x00\x00\x01\x01\x04test"]
storage delete mvcc:TxnWrite(2, sql:Table("indexed")) ["\x03\x00\x00\x00\x00\x00\x00\x00\x02\x00\xffindexed\x00\xff\x00\xff\x00\x00"]
storage delete mvcc:TxnActive(2) ["\x01\x00\x00\x00\x00\x00\x00\x00\x02"]

dump
---
mvcc:NextVersion → 3 ["\x00" → "\x03"]
mvcc:Version(sql:Table("indexed"), 2) → CREATE TABLE indexed ( id INTEGER PRIMARY KEY, "index" INTEGER DEFAULT NULL INDEX, "unique" INTEGER DEFAULT NULL UNIQUE INDEX, test_id INTEGER DEFAULT NULL INDEX REFERENCES test ) ["\x04\x00\xffindexed\x00\xff\x00\xff\x00\x00\x00\x00\x00\x00\x00\x00\x00\x02" → "\x01B\x07indexed\x00\x04\x02id\x01\x00\x00\x01\x00\x00\x05index\x01\x01\x01\x00\x00\x01\x00\x06unique\x01\x01\x01\x00\x01\x01\x00\x07test_id\x01\x01\x01\x00\x00\x01\x01\x04test"]
mvcc:Version(sql:Table("test"), 1) → CREATE TABLE test ( id INTEGER PRIMARY KEY ) ["\x04\x00\xfftest\x00\xff\x00\xff\x00\x00\x00\x00\x00\x00\x00\x00\x00\x01" → "\x01\x10\x04test\x00\x01\x02id\x01\x00\x00\x01\x00\x00"]

schema
---
CREATE TABLE indexed (
id INTEGER PRIMARY KEY,
"index" INTEGER DEFAULT NULL INDEX,
"unique" INTEGER DEFAULT NULL UNIQUE INDEX,
test_id INTEGER DEFAULT NULL INDEX REFERENCES test
)
CREATE TABLE test (
id INTEGER PRIMARY KEY
)

# Errors if table already exists.
!> CREATE TABLE test (id INTEGER PRIMARY KEY)
---
Error: invalid input: table test already exists
41 changes: 41 additions & 0 deletions src/sql/testscripts/schema/create_table_datatypes
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
# Tests CREATE TABLE datatypes.

# Create columns with all datatypes.
> CREATE TABLE datatypes ( \
id INTEGER PRIMARY KEY, \
"bool" BOOL, \
"boolean" BOOLEAN, \
"double" DOUBLE, \
"float" FLOAT, \
"int" INT, \
"integer" INTEGER, \
"string" STRING, \
"text" TEXT, \
"varchar" VARCHAR \
)
schema
---
CREATE TABLE datatypes (
id INTEGER PRIMARY KEY,
"bool" BOOLEAN DEFAULT NULL,
"boolean" BOOLEAN DEFAULT NULL,
"double" FLOAT DEFAULT NULL,
"float" FLOAT DEFAULT NULL,
"int" INTEGER DEFAULT NULL,
"integer" INTEGER DEFAULT NULL,
"string" STRING DEFAULT NULL,
"text" STRING DEFAULT NULL,
"varchar" STRING DEFAULT NULL
)

# Missing datatype errors.
!> CREATE TABLE test (id INTEGER PRIMARY KEY, value)
---
Error: invalid input: unexpected token )

# Unknown datatype errors.
!> CREATE TABLE test (id INTEGER PRIMARY KEY, value FOO)
!> CREATE TABLE test (id INTEGER PRIMARY KEY, value INDEX)
---
Error: invalid input: unexpected token foo
Error: invalid input: unexpected token INDEX
72 changes: 72 additions & 0 deletions src/sql/testscripts/schema/create_table_names
Original file line number Diff line number Diff line change
@@ -0,0 +1,72 @@
# Tests CREATE TABLE table and column name validation.

# A couple of valid names.
> CREATE TABLE a_123 (a_123 INTEGER PRIMARY KEY)
> CREATE TABLE 表 (身元 INTEGER PRIMARY KEY, 名前 STRING)
schema
---
CREATE TABLE a_123 (
a_123 INTEGER PRIMARY KEY
)
CREATE TABLE 表 (
身元 INTEGER PRIMARY KEY,
名前 STRING DEFAULT NULL
)

# Mixed case is valid, but interpreted as lower case. Quoted identifiers retain
# their case.
> CREATE TABLE mIxEd_cAsE (ÄÅÆ STRING PRIMARY KEY)
> CREATE TABLE "mIxEd_cAsE" ("ÄÅÆ" STRING PRIMARY KEY)
schema mixed_case
schema mIxEd_cAsE
---
CREATE TABLE mixed_case (
äåæ STRING PRIMARY KEY
)
CREATE TABLE mIxEd_cAsE (
ÄÅÆ STRING PRIMARY KEY
)

# Unquoted _, number, keyword, and emoji errors.
!> CREATE TABLE _name (id INTEGER PRIMARY KEY)
!> CREATE TABLE 123 (1 INTEGER PRIMARY KEY)
!> CREATE TABLE table (primary INTEGER PRIMARY KEY)
!> CREATE TABLE 👋 (🆔 INTEGER PRIMARY KEY)
---
Error: invalid input: unexpected character _
Error: invalid input: expected identifier, got 123
Error: invalid input: expected identifier, got TABLE
Error: invalid input: unexpected character 👋

# Double quotes allow them.
> CREATE TABLE "_name" (id INTEGER PRIMARY KEY)
> CREATE TABLE "123" ("1" INTEGER PRIMARY KEY)
> CREATE TABLE "table" ("primary" INTEGER PRIMARY KEY)
> CREATE TABLE "👋" ("🆔" INTEGER PRIMARY KEY)
schema _name 123 table "👋"
---
CREATE TABLE "_name" (
id INTEGER PRIMARY KEY
)
CREATE TABLE "123" (
"1" INTEGER PRIMARY KEY
)
CREATE TABLE "table" (
"primary" INTEGER PRIMARY KEY
)
CREATE TABLE "👋" (
"🆔" INTEGER PRIMARY KEY
)

# "" escapes " in identifiers.
> CREATE TABLE "name with ""quotes""" (id INTEGER PRIMARY KEY);
schema 'name with "quotes"'
---
CREATE TABLE "name with ""quotes""" (
id INTEGER PRIMARY KEY
)

# ' are for string literals, not identifiers.
!> CREATE TABLE 'name' (id INTEGER PRIMARY KEY)
---
Error: invalid input: expected identifier, got name
Loading

0 comments on commit 9d2ce53

Please sign in to comment.