Skip to content

Commit

Permalink
Add experimental draft support for GPML-style graph query
Browse files Browse the repository at this point in the history
  • Loading branch information
jpschorr committed Jul 6, 2022
1 parent f57ee41 commit e226bda
Show file tree
Hide file tree
Showing 6 changed files with 538 additions and 5 deletions.
13 changes: 13 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -13,5 +13,18 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
- AST for the currently parsed subset of PartiQL
- Tracking of locations in source text for ASTs and Errors
- Conformance tests via test generation from [partiql-tests](https://github.com/partiql/partiql-tests/)
- An experimental (pending [#15](https://github.com/partiql/partiql-docs/issues/15)) embedding of a subset of
the [GPML (Graph Pattern Matching Language)](https://arxiv.org/abs/2112.06217) graph query into the `FROM` clause,
supporting. The use within the grammar is based on the assumption of a new graph data type being added to the
specification of data types within PartiQL, and should be considered experimental until the semantics of the graph
data type are specified.
- basic and abbreviated node and edge patterns (section 4.1 of the GPML paper)
- concatenated path patterns (section 4.2 of the GPML paper)
- path variables (section 4.2 of the GPML paper)
- graph patterns (i.e., comma separated path patterns) (section 4.3 of the GPML paper)
- parenthesized patterns (section 4.4 of the GPML paper)
- path quantifiers (section 4.4 of the GPML paper)
- restrictors and selector (section 5.1 of the GPML paper)
- pre-filters and post-filters (section 5.2 of the GPML paper)


147 changes: 147 additions & 0 deletions partiql-ast/src/ast.rs
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ use rust_decimal::Decimal as RustDecimal;
use serde::{Deserialize, Serialize};
use std::fmt;
use std::fmt::Display;
use std::num::NonZeroU32;
use std::ops::Range;

/// Provides the required methods for AstNode conversations.
Expand Down Expand Up @@ -235,6 +236,13 @@ pub type CallAst = AstBytePos<Call>;
pub type CaseAst = AstBytePos<Case>;
pub type FromClauseAst = AstBytePos<FromClause>;
pub type FromLetAst = AstBytePos<FromLet>;
pub type GraphMatchAst = AstBytePos<GraphMatch>;
pub type GraphMatchExprAst = AstBytePos<GraphMatchExpr>;
pub type GraphMatchEdgeAst = AstBytePos<GraphMatchEdge>;
pub type GraphMatchNodeAst = AstBytePos<GraphMatchNode>;
pub type GraphMatchPatternAst = AstBytePos<GraphMatchPattern>;
pub type GraphMatchPatternPartAst = AstBytePos<GraphMatchPatternPart>;
pub type GraphMatchQuantifierAst = AstBytePos<GraphMatchQuantifier>;
pub type GroupByExprAst = AstBytePos<GroupByExpr>;
pub type GroupKeyAst = AstBytePos<GroupKey>;
pub type InAst = AstBytePos<In>;
Expand Down Expand Up @@ -655,6 +663,9 @@ pub enum FromClause {
FromLet(FromLetAst),
/// <from_source> JOIN \[INNER | LEFT | RIGHT | FULL\] <from_source> ON <expr>
Join(JoinAst),

/// <expr> MATCH <graph_pattern>
GraphMatch(GraphMatchAst),
}

#[derive(Clone, Debug, PartialEq, Deserialize, Serialize)]
Expand Down Expand Up @@ -699,6 +710,142 @@ pub enum JoinKind {
Cross,
}

#[derive(Clone, Debug, PartialEq, Deserialize, Serialize)]
pub struct GraphMatch {
pub expr: Box<Expr>,
pub graph_expr: Box<GraphMatchExprAst>,
}

/// The direction of an edge
/// | Orientation | Edge pattern | Abbreviation |
/// |---------------------------+--------------+--------------|
/// | Pointing left | <−[ spec ]− | <− |
/// | Undirected | ~[ spec ]~ | ~ |
/// | Pointing right | −[ spec ]−> | −> |
/// | Left or undirected | <~[ spec ]~ | <~ |
/// | Undirected or right | ~[ spec ]~> | ~> |
/// | Left or right | <−[ spec ]−> | <−> |
/// | Left, undirected or right | −[ spec ]− | − |
///
/// Fig. 5. Table of edge patterns:
/// https://arxiv.org/abs/2112.06217
#[derive(Clone, Debug, PartialEq, Deserialize, Serialize)]
pub enum GraphMatchDirection {
Left,
Undirected,
Right,
LeftOrUndirected,
UndirectedOrRight,
LeftOrRight,
LeftOrUndirectedOrRight,
}

/// A part of a graph pattern
#[derive(Clone, Debug, PartialEq, Deserialize, Serialize)]
pub enum GraphMatchPatternPart {
/// A single node in a graph pattern.
Node(GraphMatchNodeAst),

/// A single edge in a graph pattern.
Edge(GraphMatchEdgeAst),

/// A sub-pattern.
Pattern(GraphMatchPatternAst),
}

/// A quantifier for graph edges or patterns. (e.g., the `{2,5}` in `MATCH (x)->{2,5}(y)`)
#[derive(Clone, Debug, PartialEq, Deserialize, Serialize)]
pub struct GraphMatchQuantifier {
pub lower: u32,
pub upper: Option<NonZeroU32>,
}

/// A path restrictor
/// | Keyword | Description
/// |----------------+--------------
/// | TRAIL | No repeated edges.
/// | ACYCLIC | No repeated nodes.
/// | SIMPLE | No repeated nodes, except that the first and last nodes may be the same.
///
/// Fig. 7. Table of restrictors:
/// https://arxiv.org/abs/2112.06217
#[derive(Clone, Debug, PartialEq, Deserialize, Serialize)]
pub enum GraphMatchRestrictor {
Trail,
Acyclic,
Simple,
}

/// A single node in a graph pattern.
#[derive(Clone, Debug, PartialEq, Deserialize, Serialize)]
pub struct GraphMatchNode {
/// an optional node pre-filter, e.g.: `WHERE c.name='Alarm'` in `MATCH (c WHERE c.name='Alarm')`
pub prefilter: Option<Box<Expr>>,
/// the optional element variable of the node match, e.g.: `x` in `MATCH (x)`
pub variable: Option<SymbolPrimitive>,
/// the optional label(s) to match for the node, e.g.: `Entity` in `MATCH (x:Entity)`
pub label: Option<Vec<SymbolPrimitive>>,
}

/// A single edge in a graph pattern.
#[derive(Clone, Debug, PartialEq, Deserialize, Serialize)]
pub struct GraphMatchEdge {
/// edge direction
pub direction: GraphMatchDirection,
/// an optional quantifier for the edge match
pub quantifier: Option<GraphMatchQuantifierAst>,
/// an optional edge pre-filter, e.g.: `WHERE t.capacity>100` in `MATCH −[t:hasSupply WHERE t.capacity>100]−>`
pub prefilter: Option<Box<Expr>>,
/// the optional element variable of the edge match, e.g.: `t` in `MATCH −[t]−>`
pub variable: Option<SymbolPrimitive>,
/// the optional label(s) to match for the edge. e.g.: `Target` in `MATCH −[t:Target]−>`
pub label: Option<Vec<SymbolPrimitive>>,
}

/// A single graph match pattern.
#[derive(Clone, Debug, PartialEq, Deserialize, Serialize)]
pub struct GraphMatchPattern {
pub restrictor: Option<GraphMatchRestrictor>,
/// an optional quantifier for the entire pattern match
pub quantifier: Option<GraphMatchQuantifierAst>,
/// an optional pattern pre-filter, e.g.: `WHERE a.name=b.name` in `MATCH [(a)->(b) WHERE a.name=b.name]`
pub prefilter: Option<Box<Expr>>,
/// the optional element variable of the pattern, e.g.: `p` in `MATCH p = (a) −[t]−> (b)`
pub variable: Option<SymbolPrimitive>,
/// the ordered pattern parts
pub parts: Vec<GraphMatchPatternPart>,
}

/// A path selector
/// | Keyword
/// |------------------
/// | ANY SHORTEST
/// | ALL SHORTEST
/// | ANY
/// | ANY k
/// | SHORTEST k
/// | SHORTEST k GROUP
///
/// Fig. 8. Table of restrictors:
/// https://arxiv.org/abs/2112.06217
#[derive(Clone, Debug, PartialEq, Deserialize, Serialize)]
pub enum GraphMatchSelector {
AnyShortest,
AllShortest,
Any,
AnyK(NonZeroU32),
ShortestK(NonZeroU32),
ShortestKGroup(NonZeroU32),
}

/// A graph match clause as defined in GPML
/// See https://arxiv.org/abs/2112.06217
#[derive(Clone, Debug, PartialEq, Deserialize, Serialize)]
pub struct GraphMatchExpr {
pub selector: Option<GraphMatchSelector>,
pub patterns: Vec<GraphMatchPatternAst>,
}

/// A generic pair of expressions. Used in the `pub struct`, `searched_case`
/// and `simple_case` expr variants above.
#[derive(Clone, Debug, PartialEq, Deserialize, Serialize)]
Expand Down
17 changes: 17 additions & 0 deletions partiql-parser/benches/bench_parse.rs
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,20 @@ const Q_COMPLEX_FEXPR: &str = r#"
AS deltas FROM SOURCE_VIEW_DELTA_FULL_TRANSACTIONS delta_full_transactions
"#;

const Q_COMPLEX_MATCH: &str = r#"
SELECT (
SELECT numRec, data
FROM
(deltaGraph MATCH (t) -[:hasChange]-> (dt), (dt) -[:checkPointedBy]-> (t1)),
(
SELECT foo(u.id), bar(review), rindex
FROM delta.data as u CROSS JOIN UNPIVOT u.reviews as review AT rindex
) as data,
delta.numRec as numRec
)
AS deltas FROM SOURCE_VIEW_DELTA_FULL_TRANSACTIONS delta_full_transactions
"#;

fn parse_bench(c: &mut Criterion) {
fn parse(text: &str) -> ParserResult {
Parser::default().parse(text)
Expand All @@ -45,6 +59,9 @@ fn parse_bench(c: &mut Criterion) {
c.bench_function("parse-complex-fexpr", |b| {
b.iter(|| parse(black_box(Q_COMPLEX_FEXPR)))
});
c.bench_function("parse-complex-match", |b| {
b.iter(|| parse(black_box(Q_COMPLEX_MATCH)))
});
}

criterion_group! {
Expand Down
36 changes: 32 additions & 4 deletions partiql-parser/src/lexer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -465,6 +465,8 @@ pub enum Token<'input> {
Caret,
#[token(".")]
Period,
#[token("~")]
Tilde,
#[token("||")]
DblPipe,

Expand Down Expand Up @@ -510,10 +512,14 @@ pub enum Token<'input> {
// Keywords
#[regex("(?i:All)")]
All,
#[regex("(?i:Acyclic)")]
Acyclic,
#[regex("(?i:Asc)")]
Asc,
#[regex("(?i:And)")]
And,
#[regex("(?i:Any)")]
Any,
#[regex("(?i:As)")]
As,
#[regex("(?i:At)")]
Expand Down Expand Up @@ -572,6 +578,8 @@ pub enum Token<'input> {
Like,
#[regex("(?i:Limit)")]
Limit,
#[regex("(?i:Match)")]
Match,
#[regex("(?i:Missing)")]
Missing,
#[regex("(?i:Natural)")]
Expand Down Expand Up @@ -602,8 +610,14 @@ pub enum Token<'input> {
Right,
#[regex("(?i:Select)")]
Select,
#[regex("(?i:Simple)")]
Simple,
#[regex("(?i:Shortest)")]
Shortest,
#[regex("(?i:Then)")]
Then,
#[regex("(?i:Trail)")]
Trail,
#[regex("(?i:True)")]
True,
#[regex("(?i:Union)")]
Expand All @@ -628,9 +642,11 @@ impl<'input> Token<'input> {
pub fn is_keyword(&self) -> bool {
matches!(
self,
Token::All
Token::Acyclic
| Token::All
| Token::Asc
| Token::And
| Token::Any
| Token::As
| Token::At
| Token::Between
Expand All @@ -656,6 +672,7 @@ impl<'input> Token<'input> {
| Token::Left
| Token::Like
| Token::Limit
| Token::Match
| Token::Missing
| Token::Natural
| Token::Not
Expand All @@ -671,7 +688,10 @@ impl<'input> Token<'input> {
| Token::Preserve
| Token::Right
| Token::Select
| Token::Simple
| Token::Shortest
| Token::Then
| Token::Trail
| Token::Union
| Token::Unpivot
| Token::Using
Expand Down Expand Up @@ -717,6 +737,7 @@ impl<'input> fmt::Display for Token<'input> {
Token::Slash => write!(f, "/"),
Token::Caret => write!(f, "^"),
Token::Period => write!(f, "."),
Token::Tilde => write!(f, "~"),
Token::DblPipe => write!(f, "||"),
Token::UnquotedIdent(id) => write!(f, "<{}:UNQUOTED_IDENT>", id),
Token::QuotedIdent(id) => write!(f, "<{}:QUOTED_IDENT>", id),
Expand All @@ -729,9 +750,11 @@ impl<'input> fmt::Display for Token<'input> {
Token::EmbeddedIonQuote => write!(f, "<ION>"),
Token::Ion(txt) => write!(f, "<{}:ION>", txt),

Token::All
Token::Acyclic
| Token::All
| Token::Asc
| Token::And
| Token::Any
| Token::As
| Token::At
| Token::Between
Expand Down Expand Up @@ -761,6 +784,7 @@ impl<'input> fmt::Display for Token<'input> {
| Token::Left
| Token::Like
| Token::Limit
| Token::Match
| Token::Missing
| Token::Natural
| Token::Not
Expand All @@ -776,7 +800,10 @@ impl<'input> fmt::Display for Token<'input> {
| Token::Preserve
| Token::Right
| Token::Select
| Token::Simple
| Token::Shortest
| Token::Then
| Token::Trail
| Token::True
| Token::Union
| Token::Unpivot
Expand Down Expand Up @@ -811,7 +838,8 @@ mod tests {
"WiTH Where Value uSiNg Unpivot UNION True Select right Preserve pivoT Outer Order Or \
On Offset Nulls Null Not Natural Missing Limit Like Left Lateral Last Join \
Intersect Is Inner In Having Group From For Full First False Except Escape Desc \
Cross By Between At As And Asc All Values Case When Then Else End";
Cross By Between At As And Asc All Values Case When Then Else End Match Any Shortest \
Trail Acyclic Simple";
let symbols = symbols.split(' ').chain(primitives.split(' '));
let keywords = keywords.split(' ');

Expand All @@ -833,7 +861,7 @@ mod tests {
"<quoted_ident:QUOTED_IDENT>", "IN", "<unquoted_atident:UNQUOTED_ATIDENT>", "HAVING",
"<quoted_atident:QUOTED_ATIDENT>", "GROUP", "FROM", "FOR", "FULL", "FIRST", "FALSE", "EXCEPT",
"ESCAPE", "DESC", "CROSS", "BY", "BETWEEN", "AT", "AS", "AND", "ASC", "ALL", "VALUES",
"CASE", "WHEN", "THEN", "ELSE", "END",
"CASE", "WHEN", "THEN", "ELSE", "END", "MATCH", "ANY", "SHORTEST", "TRAIL", "ACYCLIC", "SIMPLE",
];
let displayed = toks
.into_iter()
Expand Down
Loading

0 comments on commit e226bda

Please sign in to comment.