Skip to content

Commit

Permalink
Modify parser&AST to allow orderby/limit/offset on children of setop (#…
Browse files Browse the repository at this point in the history
…401)

* Modify parser&AST to allow orderby/limit/offset on children of setop

* Add docs, update changelog, additional parse tests, minor refactor

---------

Co-authored-by: Alan Cai <[email protected]>
  • Loading branch information
jpschorr and alancai98 authored Jun 28, 2023
1 parent bd7716d commit 5629e88
Show file tree
Hide file tree
Showing 8 changed files with 174 additions and 48 deletions.
3 changes: 3 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -9,11 +9,14 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
### Changed
- *BREAKING:* partiql-logical-planner: moves `NameResolver` to `partiql-ast-passes`
- *BREAKING:* partiql-values: removes `partiql` from value macro_rules; e.g. `partiql_bag` renames to `bag`.
- *BREAKING:* partiql-ast: changed modeling of `Query` and `SetExpr` nodes to support `ORDER BY`, `LIMIT`, `OFFSET` in children of set operators
- Also affects the `Visitor` trait

### Added
- Add ability for partiql-extension-ion extension encoding/decoding of `Value` to/from Ion `Element`
- Add `partiql-types` crate that includes data models for PartiQL Types.
- Add `partiql_ast_passes::static_typer` for type annotating the AST.
- Add ability to parse `ORDER BY`, `LIMIT`, `OFFSET` in children of set operators

### Fixes

Expand Down
6 changes: 3 additions & 3 deletions partiql-ast-passes/src/partiql_typer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -221,8 +221,8 @@ mod tests {
use super::*;
use assert_matches::assert_matches;
use partiql_ast::ast;
use partiql_catalog::{PartiqlCatalog, TypeEnvEntry};
use partiql_types::{PartiqlType, StructConstraint, StructField, TypeKind};
use partiql_catalog::PartiqlCatalog;
use partiql_types::{PartiqlType, TypeKind};

#[test]
fn simple_test() {
Expand Down Expand Up @@ -263,7 +263,7 @@ mod tests {

let typer = AstPartiqlTyper::new(catalog);
if let ast::Expr::Query(q) = parsed.ast.as_ref() {
typer.type_nodes(&q)
typer.type_nodes(q)
} else {
panic!("Typing statement other than `Query` are unsupported")
}
Expand Down
12 changes: 9 additions & 3 deletions partiql-ast/src/ast.rs
Original file line number Diff line number Diff line change
Expand Up @@ -236,8 +236,14 @@ pub enum ConflictAction {

#[derive(Visit, Clone, Debug, PartialEq)]
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
pub struct Query {
pub struct TopLevelQuery {
pub with: Option<AstNode<WithClause>>,
pub query: AstNode<Query>,
}

#[derive(Visit, Clone, Debug, PartialEq)]
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
pub struct Query {
pub set: AstNode<QuerySet>,
pub order_by: Option<Box<AstNode<OrderByExpr>>>,
pub limit_offset: Option<Box<AstNode<LimitOffsetClause>>>,
Expand Down Expand Up @@ -278,8 +284,8 @@ pub struct SetExpr {
pub setop: SetOperator,
#[visit(skip)]
pub setq: SetQuantifier,
pub lhs: Box<AstNode<QuerySet>>,
pub rhs: Box<AstNode<QuerySet>>,
pub lhs: Box<AstNode<Query>>,
pub rhs: Box<AstNode<Query>>,
}

#[derive(Clone, Debug, PartialEq, Eq)]
Expand Down
6 changes: 6 additions & 0 deletions partiql-ast/src/visit.rs
Original file line number Diff line number Diff line change
Expand Up @@ -207,6 +207,12 @@ pub trait Visitor<'ast> {
fn exit_on_conflict(&mut self, _on_conflict: &'ast ast::OnConflict) -> Traverse {
Traverse::Continue
}
fn enter_top_level_query(&mut self, _query: &'ast ast::TopLevelQuery) -> Traverse {
Traverse::Continue
}
fn exit_top_level_query(&mut self, _query: &'ast ast::TopLevelQuery) -> Traverse {
Traverse::Continue
}
fn enter_query(&mut self, _query: &'ast ast::Query) -> Traverse {
Traverse::Continue
}
Expand Down
21 changes: 0 additions & 21 deletions partiql-ast/tests/test_ast.rs

This file was deleted.

49 changes: 42 additions & 7 deletions partiql-parser/src/parse/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ mod grammar {

type LalrpopError<'input> =
lpop::ParseError<ByteOffset, lexer::Token<'input>, ParseError<'input, BytePosition>>;
type LalrpopResult<'input> = Result<Box<ast::Expr>, LalrpopError<'input>>;
type LalrpopResult<'input> = Result<ast::AstNode<ast::TopLevelQuery>, LalrpopError<'input>>;
type LalrpopErrorRecovery<'input> =
lpop::ErrorRecovery<ByteOffset, lexer::Token<'input>, ParseError<'input, BytePosition>>;

Expand Down Expand Up @@ -65,7 +65,7 @@ fn parse_partiql_with_state<'input, Id: IdGenerator>(
let lexer = PreprocessingPartiqlLexer::new(s, &mut offsets, &BUILT_INS);
let lexer = CommentSkippingLexer::new(lexer);

let result: LalrpopResult = grammar::QueryParser::new().parse(s, &mut state, lexer);
let result: LalrpopResult = grammar::TopLevelQueryParser::new().parse(s, &mut state, lexer);

let ParserState {
locations, errors, ..
Expand All @@ -87,11 +87,14 @@ fn parse_partiql_with_state<'input, Id: IdGenerator>(
errors.push(ParseError::from(e));
Err(ErrorData { errors, offsets })
}
(Ok(ast), true) => Ok(AstData {
ast,
locations,
offsets,
}),
(Ok(ast), true) => {
let ast = Box::new(ast::Expr::Query(ast.node.query));
Ok(AstData {
ast,
locations,
offsets,
})
}
}
}

Expand Down Expand Up @@ -606,6 +609,38 @@ mod tests {
assert_ne!(l, r2);
assert_ne!(r, r2);
}

#[test]
fn complex_set() {
parse_null_id!(
r#"(SELECT a1 FROM b1 ORDER BY c1 LIMIT d1 OFFSET e1)
OUTER UNION ALL
(SELECT a2 FROM b2 ORDER BY c2 LIMIT d2 OFFSET e2)
ORDER BY c3 LIMIT d3 OFFSET e3"#
);
parse_null_id!(
r#"(SELECT a1 FROM b1 ORDER BY c1 LIMIT d1 OFFSET e1)
OUTER INTERSECT ALL
(SELECT a2 FROM b2 ORDER BY c2 LIMIT d2 OFFSET e2)
ORDER BY c3 LIMIT d3 OFFSET e3"#
);
parse_null_id!(
r#"(SELECT a1 FROM b1 ORDER BY c1 LIMIT d1 OFFSET e1)
OUTER EXCEPT ALL
(SELECT a2 FROM b2 ORDER BY c2 LIMIT d2 OFFSET e2)
ORDER BY c3 LIMIT d3 OFFSET e3"#
);
parse_null_id!(
r#"(
(SELECT a1 FROM b1 ORDER BY c1 LIMIT d1 OFFSET e1)
UNION DISTINCT
(SELECT a2 FROM b2 ORDER BY c2 LIMIT d2 OFFSET e2)
)
OUTER UNION ALL
(SELECT a3 FROM b3 ORDER BY c3 LIMIT d3 OFFSET e3)
ORDER BY c4 LIMIT d4 OFFSET e4"#
);
}
}

mod case_expr {
Expand Down
101 changes: 94 additions & 7 deletions partiql-parser/src/parse/parse_util.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
use partiql_ast::ast;

use crate::parse::parser_state::{IdGenerator, ParserState};
use bitflags::bitflags;
use partiql_source_map::location::ByteOffset;

bitflags! {
/// Set of AST node attributes to use as synthesized attributes.
Expand Down Expand Up @@ -59,13 +61,98 @@ pub(crate) enum CallSite {
}

#[inline]
// if this is just a parenthesized expr, lift it out of the query AST, otherwise return input
// e.g. `(1+2)` should be a ExprKind::Expr, not wrapped deep in a ExprKind::Query
pub(crate) fn strip_query(q: Box<ast::Expr>) -> Box<ast::Expr> {
if let ast::Expr::Query(ast::AstNode {
// Removes extra `Query` nesting if it exists, otherwise return the input.
// e.g. `(SELECT a FROM b ORDER BY c LIMIT d OFFSET e)` should be a Query with no additional nesting.
// Put another way: if `q` is a Query(QuerySet::Expr(Query(inner_q), ...), return Query(inner_q).
// Otherwise, return `q`.
pub(crate) fn strip_query(q: ast::AstNode<ast::Query>) -> ast::AstNode<ast::Query> {
let outer_id = q.id;
if let ast::AstNode {
node: ast::QuerySet::Expr(e),
id: inner_id,
} = q.node.set
{
if let ast::Expr::Query(
inner_q @ ast::AstNode {
node: ast::Query { .. },
..
},
) = *e
{
inner_q
} else {
let set = ast::AstNode {
id: inner_id,
node: ast::QuerySet::Expr(e),
};
ast::AstNode {
id: outer_id,
node: ast::Query {
set,
order_by: None,
limit_offset: None,
},
}
}
} else {
q
}
}

#[inline]
// If `qs` is a `QuerySet::Expr(Expr::Query(inner_q))`, return Query(inner_q). Otherwise, return `qs` wrapped
// in a `Query` with `None` as the `OrderBy` and `LimitOffset`
pub(crate) fn strip_query_set<Id>(
qs: ast::AstNode<ast::QuerySet>,
state: &mut ParserState<Id>,
lo: ByteOffset,
hi: ByteOffset,
) -> ast::AstNode<ast::Query>
where
Id: IdGenerator,
{
if let ast::AstNode {
node: ast::QuerySet::Expr(q),
id: inner_id,
} = qs
{
if let ast::Expr::Query(
inner_q @ ast::AstNode {
node: ast::Query { .. },
..
},
) = *q
{
// preserve query including limit/offset & order by if present
inner_q
} else {
let query = ast::Query {
set: ast::AstNode {
id: inner_id,
node: ast::QuerySet::Expr(q),
},
order_by: None,
limit_offset: None,
};
state.node(query, lo..hi)
}
} else {
let query = ast::Query {
set: qs,
order_by: None,
limit_offset: None,
};
state.node(query, lo..hi)
}
}

#[inline]
// If this is just a parenthesized expr, lift it out of the query AST, otherwise return input
// e.g. `(1+2)` should be an `Expr`, not wrapped deep in a `Query`
pub(crate) fn strip_expr(q: ast::AstNode<ast::Query>) -> Box<ast::Expr> {
if let ast::AstNode {
node:
ast::Query {
with: None,
set:
ast::AstNode {
node: ast::QuerySet::Expr(e),
Expand All @@ -75,10 +162,10 @@ pub(crate) fn strip_query(q: Box<ast::Expr>) -> Box<ast::Expr> {
limit_offset: None,
},
..
}) = *q
} = q
{
e
} else {
q
Box::new(ast::Expr::Query(q))
}
}
24 changes: 17 additions & 7 deletions partiql-parser/src/parse/partiql.lalrpop
Original file line number Diff line number Diff line change
Expand Up @@ -9,20 +9,28 @@ use partiql_ast::ast;

use partiql_source_map::location::{ByteOffset, BytePosition, Location, ToLocated};

use crate::parse::parse_util::{strip_query, CallSite, Attrs, Synth};
use crate::parse::parse_util::{strip_expr, strip_query, strip_query_set, CallSite, Attrs, Synth};
use crate::parse::parser_state::{ParserState, IdGenerator};

grammar<'input, 'state, Id>(input: &'input str, state: &'state mut ParserState<'input, Id>) where Id: IdGenerator;


pub(crate) Query: Box<ast::Expr> = {
pub(crate) TopLevelQuery: ast::AstNode<ast::TopLevelQuery> = {
<lo:@L>
<with:WithClause?>
<query:Query>
<hi:@R> => {
state.node(ast::TopLevelQuery { with, query }, lo..hi)
}
}

Query: ast::AstNode<ast::Query> = {
<lo:@L>
<set:QuerySet>
<order_by:OrderByClause?>
<limit_offset:LimitOffsetClause>
<hi:@R> => {
Box::new(ast::Expr::Query( state.node(ast::Query { with, set, order_by, limit_offset }, lo..hi) ) )
state.node(ast::Query { set, order_by, limit_offset }, lo..hi)
}
}

Expand Down Expand Up @@ -94,7 +102,9 @@ WithCycleClause : () = {
// - all set operations are left-associative and are thus expressed as left-self-recursive rules

QuerySet: ast::AstNode<ast::QuerySet> = {
<lo:@L> <lhs:QuerySet> <setop:SetOp> <setq:SetQuantifier> <rhs:SingleQuery> <hi:@R> => {
<lo:@L> <lhs:Query> <setop:SetOp> <setq:SetQuantifier> <rhs:SingleQuery> <hi:@R> => {
let lhs = strip_query(lhs);
let rhs = strip_query_set(rhs, state, lo, hi);
let set_expr = state.node(ast::SetExpr {
setop,
setq,
Expand Down Expand Up @@ -133,7 +143,7 @@ SetQuantifier: ast::SetQuantifier = {
SingleQuery: ast::AstNode<ast::QuerySet> = {
<lo:@L> <expr:ExprQuery> <hi:@R> => {
match *expr {
ast::Expr::Query(ast::AstNode{ node: ast::Query{with: None, set, order_by:None, limit_offset:None} , .. }) => set,
ast::Expr::Query(ast::AstNode{ node: ast::Query{set, order_by:None, limit_offset:None} , .. }) => set,
_ => state.node(ast::QuerySet::Expr( expr ), lo..hi),
}
},
Expand Down Expand Up @@ -907,7 +917,7 @@ ExprTerm: Synth<ast::Expr> = {
}

SubQuery: ast::Expr = {
"(" <q:Query> ")" => *strip_query(q),
"(" <q:Query> ")" => *strip_expr(q),
}

SubQueryAst: ast::AstNode<ast::Expr> = {
Expand Down Expand Up @@ -1050,7 +1060,7 @@ FunctionCallArgs: Vec<ast::AstNode<ast::CallArg>> = {
// Special case subquery when it is the only sub-expression of a function call (e.g., `SELECT AVG(SELECT VALUE price FROM g AS v))... `)
<lo:@L> <subq:SfwQuery> <hi:@R> => {
let qset = state.node(ast::QuerySet::Select(Box::new(subq)), lo..hi);
let query = state.node(ast::Query{ with: None, set: qset, order_by: None, limit_offset:None }, lo..hi);
let query = state.node(ast::Query{ set: qset, order_by: None, limit_offset:None }, lo..hi);
vec![state.node(ast::CallArg::Positional(Box::new(ast::Expr::Query(query))), lo..hi)]
},
}
Expand Down

1 comment on commit 5629e88

@github-actions
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

PartiQL (rust) Benchmark

Benchmark suite Current: 5629e88 Previous: bd7716d Ratio
parse-1 5870 ns/iter (± 159) 6001 ns/iter (± 18) 0.98
parse-15 58046 ns/iter (± 1846) 56753 ns/iter (± 155) 1.02
parse-30 109543 ns/iter (± 2610) 110650 ns/iter (± 354) 0.99
compile-1 5608 ns/iter (± 107) 4982 ns/iter (± 29) 1.13
compile-15 41215 ns/iter (± 756) 36153 ns/iter (± 33) 1.14
compile-30 82711 ns/iter (± 1861) 73160 ns/iter (± 42) 1.13
plan-1 22839 ns/iter (± 491) 19469 ns/iter (± 20) 1.17
plan-15 428149 ns/iter (± 6372) 357747 ns/iter (± 896) 1.20
plan-30 837027 ns/iter (± 21035) 723337 ns/iter (± 1558) 1.16
eval-1 26140310 ns/iter (± 662213) 26210844 ns/iter (± 1825101) 1.00
eval-15 140488315 ns/iter (± 2297844) 121760759 ns/iter (± 1415036) 1.15
eval-30 267017773 ns/iter (± 4134560) 229787046 ns/iter (± 1845399) 1.16
join 16619 ns/iter (± 385) 14369 ns/iter (± 27) 1.16
simple 8001 ns/iter (± 82) 6816 ns/iter (± 15) 1.17
simple-no 770 ns/iter (± 15) 650 ns/iter (± 0) 1.18
numbers 170 ns/iter (± 2) 144 ns/iter (± 0) 1.18
parse-simple 868 ns/iter (± 4) 735 ns/iter (± 0) 1.18
parse-ion 2709 ns/iter (± 40) 2660 ns/iter (± 5) 1.02
parse-group 8764 ns/iter (± 108) 8593 ns/iter (± 18) 1.02
parse-complex 22901 ns/iter (± 351) 21988 ns/iter (± 47) 1.04
parse-complex-fexpr 36577 ns/iter (± 596) 34789 ns/iter (± 83) 1.05

This comment was automatically generated by workflow using github-action-benchmark.

Please sign in to comment.