Skip to content

Commit

Permalink
Refactor Value & Enable Lazy Ion literals. (#523)
Browse files Browse the repository at this point in the history
* Change Lexing/Parsing of embedded docs to not eagerly validate (#507)

This changes the lexer and parser to pass through strings enclosed in backticks un-parsed. (At current, these documents are parsed during lowering).

Since embedded documents may themselves contain backticks, beginning and ending delimiters consist of an arbitrary odd numbers of backticks (e.g., `` ` ``, `` ``` ``, `` ````` `` etc.) that must be paired (e.g., `` `$ion_data_here::[]` ``, `` ```$ion_data_here::[ $string_with_embedded_backtick:"`" ]``` ``, etc.).

As opening and closing delimiters are required to be odd in count of backticks, a contiguous string of backticks that is even is interpreted as an empty document.

* Behavior-preserving refactor of `Value` into a module. (#509)

* Behavior-preserving refactor of Value into a module. (#510)

* Change modeling of Literals in the AST remove ambiguity (#517)

Change parsing and AST-modeling of literals to not share AST structures with non-scalar expressions.

* Change modeling of boxed ion literals to be lazy until evaluator. (#519)

Changes the logical plan to have a distinct `Lit` type to hold literals instead of embedded `Value`

* Refactor lifetimes for new rust warnings
  • Loading branch information
jpschorr authored Dec 5, 2024
1 parent 4a4b143 commit a07bdfc
Show file tree
Hide file tree
Showing 36 changed files with 2,055 additions and 1,611 deletions.
2 changes: 2 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
## [Unreleased]
### Changed
- *BREAKING* partiql-parser: Added a source location to `ParseError::UnexpectedEndOfInput`
- *BREAKING* partiql-ast: Changed the modelling of parsed literals.
- *BREAKING* partiql-logical: Changed the modelling of logical plan literals.
- partiql-eval: Fixed behavior of comparison and `BETWEEN` operations w.r.t. type mismatches

### Added
Expand Down
2 changes: 1 addition & 1 deletion extension/partiql-extension-visualize/src/ast_to_dot.rs
Original file line number Diff line number Diff line change
Expand Up @@ -185,7 +185,7 @@ fn lit_to_str(ast: &ast::Lit) -> String {
Lit::FloatLit(l) => l.to_string(),
Lit::DoubleLit(l) => l.to_string(),
Lit::BoolLit(l) => (if *l { "TRUE" } else { "FALSE" }).to_string(),
Lit::IonStringLit(l) => format!("`{}`", l),
Lit::EmbeddedDocLit(l) => format!("`{}`", l),
Lit::CharStringLit(l) => format!("'{}'", l),
Lit::NationalCharStringLit(l) => format!("'{}'", l),
Lit::BitStringLit(l) => format!("b'{}'", l),
Expand Down
33 changes: 29 additions & 4 deletions partiql-ast/src/ast.rs
Original file line number Diff line number Diff line change
Expand Up @@ -444,7 +444,7 @@ pub enum Lit {
#[visit(skip)]
BoolLit(bool),
#[visit(skip)]
IonStringLit(String),
EmbeddedDocLit(String),
#[visit(skip)]
CharStringLit(String),
#[visit(skip)]
Expand All @@ -454,16 +454,41 @@ pub enum Lit {
#[visit(skip)]
HexStringLit(String),
#[visit(skip)]
StructLit(AstNode<Struct>),
StructLit(AstNode<StructLit>),
#[visit(skip)]
BagLit(AstNode<Bag>),
BagLit(AstNode<BagLit>),
#[visit(skip)]
ListLit(AstNode<List>),
ListLit(AstNode<ListLit>),
/// E.g. `TIME WITH TIME ZONE` in `SELECT TIME WITH TIME ZONE '12:00' FROM ...`
#[visit(skip)]
TypedLit(String, Type),
}

#[derive(Clone, Debug, PartialEq)]
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
pub struct LitField {
pub first: String,
pub second: AstNode<Lit>,
}

#[derive(Clone, Debug, PartialEq)]
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
pub struct StructLit {
pub fields: Vec<LitField>,
}

#[derive(Clone, Debug, PartialEq)]
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
pub struct BagLit {
pub values: Vec<Lit>,
}

#[derive(Clone, Debug, PartialEq)]
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
pub struct ListLit {
pub values: Vec<Lit>,
}

#[derive(Visit, Clone, Debug, PartialEq, Eq)]
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
pub struct VarRef {
Expand Down
63 changes: 62 additions & 1 deletion partiql-ast/src/pretty.rs
Original file line number Diff line number Diff line change
Expand Up @@ -394,7 +394,7 @@ impl PrettyDoc for Lit {
Lit::FloatLit(inner) => arena.text(inner.to_string()),
Lit::DoubleLit(inner) => arena.text(inner.to_string()),
Lit::BoolLit(inner) => arena.text(inner.to_string()),
Lit::IonStringLit(inner) => inner.pretty_doc(arena),
Lit::EmbeddedDocLit(inner) => inner.pretty_doc(arena), // TODO better pretty for embedded doc: https://github.com/partiql/partiql-lang-rust/issues/508
Lit::CharStringLit(inner) => inner.pretty_doc(arena),
Lit::NationalCharStringLit(inner) => inner.pretty_doc(arena),
Lit::BitStringLit(inner) => inner.pretty_doc(arena),
Expand Down Expand Up @@ -699,6 +699,38 @@ impl PrettyDoc for StructExprPair {
}
}

impl PrettyDoc for StructLit {
fn pretty_doc<'b, D, A>(&'b self, arena: &'b D) -> DocBuilder<'b, D, A>
where
D: DocAllocator<'b, A>,
D::Doc: Clone,
A: Clone,
{
let wrapped = self.fields.iter().map(|p| unsafe {
let x: &'b StructLitField = std::mem::transmute(p);
x
});
pretty_seq(wrapped, "{", "}", ",", PRETTY_INDENT_MINOR_NEST, arena)
}
}

pub struct StructLitField(pub LitField);

impl PrettyDoc for StructLitField {
fn pretty_doc<'b, D, A>(&'b self, arena: &'b D) -> DocBuilder<'b, D, A>
where
D: DocAllocator<'b, A>,
D::Doc: Clone,
A: Clone,
{
let k = self.0.first.pretty_doc(arena);
let v = self.0.second.pretty_doc(arena);
let sep = arena.text(": ");

k.append(sep).group().append(v).group()
}
}

impl PrettyDoc for Bag {
fn pretty_doc<'b, D, A>(&'b self, arena: &'b D) -> DocBuilder<'b, D, A>
where
Expand Down Expand Up @@ -728,6 +760,35 @@ impl PrettyDoc for List {
}
}

impl PrettyDoc for BagLit {
fn pretty_doc<'b, D, A>(&'b self, arena: &'b D) -> DocBuilder<'b, D, A>
where
D: DocAllocator<'b, A>,
D::Doc: Clone,
A: Clone,
{
pretty_seq(
&self.values,
"<<",
">>",
",",
PRETTY_INDENT_MINOR_NEST,
arena,
)
}
}

impl PrettyDoc for ListLit {
fn pretty_doc<'b, D, A>(&'b self, arena: &'b D) -> DocBuilder<'b, D, A>
where
D: DocAllocator<'b, A>,
D::Doc: Clone,
A: Clone,
{
pretty_seq(&self.values, "[", "]", ",", PRETTY_INDENT_MINOR_NEST, arena)
}
}

impl PrettyDoc for Sexp {
fn pretty_doc<'b, D, A>(&'b self, _arena: &'b D) -> DocBuilder<'b, D, A>
where
Expand Down
75 changes: 44 additions & 31 deletions partiql-eval/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -162,7 +162,12 @@ mod tests {
// TODO: once eval conformance tests added and/or modified evaluation API (to support other values
// in evaluator output), change or delete tests using this function
#[track_caller]
fn eval_bin_op(op: BinaryOp, lhs: Value, rhs: Value, expected_first_elem: Value) {
fn eval_bin_op<I: Into<logical::Lit>>(
op: BinaryOp,
lhs: Value,
rhs_lit: I,
expected_first_elem: Value,
) {
let mut plan = LogicalPlan::new();
let scan = plan.add_operator(BindingsOp::Scan(logical::Scan {
expr: ValueExpr::VarRef(
Expand All @@ -187,7 +192,7 @@ mod tests {
"lhs".to_string().into(),
))],
)),
Box::new(ValueExpr::Lit(Box::new(rhs))),
Box::new(ValueExpr::Lit(Box::new(rhs_lit.into()))),
),
)]),
}));
Expand Down Expand Up @@ -643,13 +648,13 @@ mod tests {
#[test]
fn and_or_null() {
#[track_caller]
fn eval_to_null(op: BinaryOp, lhs: Value, rhs: Value) {
fn eval_to_null<I: Into<logical::Lit>>(op: BinaryOp, lhs: I, rhs: I) {
let mut plan = LogicalPlan::new();
let expq = plan.add_operator(BindingsOp::ExprQuery(ExprQuery {
expr: ValueExpr::BinaryExpr(
op,
Box::new(ValueExpr::Lit(Box::new(lhs))),
Box::new(ValueExpr::Lit(Box::new(rhs))),
Box::new(ValueExpr::Lit(Box::new(lhs.into()))),
Box::new(ValueExpr::Lit(Box::new(rhs.into()))),
),
}));

Expand Down Expand Up @@ -697,8 +702,8 @@ mod tests {
"value".to_string().into(),
))],
)),
from: Box::new(ValueExpr::Lit(Box::new(from))),
to: Box::new(ValueExpr::Lit(Box::new(to))),
from: Box::new(ValueExpr::Lit(Box::new(from.into()))),
to: Box::new(ValueExpr::Lit(Box::new(to.into()))),
}),
)]),
}));
Expand Down Expand Up @@ -908,7 +913,7 @@ mod tests {
kind: JoinKind::Left,
left: Box::new(from_lhs),
right: Box::new(from_rhs),
on: Some(ValueExpr::Lit(Box::new(Value::from(true)))),
on: Some(ValueExpr::Lit(Box::new(Value::from(true).into()))),
}));

let sink = lg.add_operator(BindingsOp::Sink);
Expand Down Expand Up @@ -936,17 +941,21 @@ mod tests {
expr: Box::new(path_var("n", "a")),
cases: vec![
(
Box::new(ValueExpr::Lit(Box::new(Value::Integer(1)))),
Box::new(ValueExpr::Lit(Box::new(Value::from("one".to_string())))),
Box::new(ValueExpr::Lit(Box::new(Value::Integer(1).into()))),
Box::new(ValueExpr::Lit(Box::new(
Value::from("one".to_string()).into(),
))),
),
(
Box::new(ValueExpr::Lit(Box::new(Value::Integer(2)))),
Box::new(ValueExpr::Lit(Box::new(Value::from("two".to_string())))),
Box::new(ValueExpr::Lit(Box::new(Value::Integer(2).into()))),
Box::new(ValueExpr::Lit(Box::new(
Value::from("two".to_string()).into(),
))),
),
],
default: Some(Box::new(ValueExpr::Lit(Box::new(Value::from(
"other".to_string(),
))))),
default: Some(Box::new(ValueExpr::Lit(Box::new(
Value::from("other".to_string()).into(),
)))),
}
}

Expand All @@ -957,22 +966,26 @@ mod tests {
Box::new(ValueExpr::BinaryExpr(
BinaryOp::Eq,
Box::new(path_var("n", "a")),
Box::new(ValueExpr::Lit(Box::new(Value::Integer(1)))),
Box::new(ValueExpr::Lit(Box::new(Value::Integer(1).into()))),
)),
Box::new(ValueExpr::Lit(Box::new(Value::from("one".to_string())))),
Box::new(ValueExpr::Lit(Box::new(
Value::from("one".to_string()).into(),
))),
),
(
Box::new(ValueExpr::BinaryExpr(
BinaryOp::Eq,
Box::new(path_var("n", "a")),
Box::new(ValueExpr::Lit(Box::new(Value::Integer(2)))),
Box::new(ValueExpr::Lit(Box::new(Value::Integer(2).into()))),
)),
Box::new(ValueExpr::Lit(Box::new(Value::from("two".to_string())))),
Box::new(ValueExpr::Lit(Box::new(
Value::from("two".to_string()).into(),
))),
),
],
default: Some(Box::new(ValueExpr::Lit(Box::new(Value::from(
"other".to_string(),
))))),
default: Some(Box::new(ValueExpr::Lit(Box::new(
Value::from("other".to_string()).into(),
)))),
}
}

Expand Down Expand Up @@ -1236,7 +1249,7 @@ mod tests {
"lhs".to_string().into(),
))],
)),
rhs: Box::new(ValueExpr::Lit(Box::new(rhs))),
rhs: Box::new(ValueExpr::Lit(Box::new(rhs.into()))),
}),
)]),
}));
Expand Down Expand Up @@ -1387,7 +1400,7 @@ mod tests {
println!("{:?}", &out);
assert_eq!(out, expected);
}
let list = ValueExpr::Lit(Box::new(Value::List(Box::new(list![1, 2, 3]))));
let list = ValueExpr::Lit(Box::new(Value::List(Box::new(list![1, 2, 3])).into()));

// `[1,2,3][0]` -> `1`
let index = ValueExpr::Path(Box::new(list.clone()), vec![PathComponent::Index(0)]);
Expand All @@ -1406,7 +1419,7 @@ mod tests {
test(index, Value::Integer(3));

// `{'a':10}[''||'a']` -> `10`
let tuple = ValueExpr::Lit(Box::new(Value::Tuple(Box::new(tuple![("a", 10)]))));
let tuple = ValueExpr::Lit(Box::new(Value::Tuple(Box::new(tuple![("a", 10)])).into()));
let index_expr = ValueExpr::BinaryExpr(
BinaryOp::Concat,
Box::new(ValueExpr::Lit(Box::new("".into()))),
Expand Down Expand Up @@ -1499,7 +1512,7 @@ mod tests {
expr: ValueExpr::BinaryExpr(
BinaryOp::Mul,
Box::new(va),
Box::new(ValueExpr::Lit(Box::new(Value::Integer(2)))),
Box::new(ValueExpr::Lit(Box::new(Value::Integer(2).into()))),
),
}));

Expand Down Expand Up @@ -1578,7 +1591,7 @@ mod tests {
tuple_expr.values.push(ValueExpr::BinaryExpr(
BinaryOp::Mul,
Box::new(va),
Box::new(ValueExpr::Lit(Box::new(Value::Integer(2)))),
Box::new(ValueExpr::Lit(Box::new(Value::Integer(2).into()))),
));

let project = lg.add_operator(ProjectValue(logical::ProjectValue {
Expand Down Expand Up @@ -1740,7 +1753,7 @@ mod tests {
list_expr.elements.push(ValueExpr::BinaryExpr(
BinaryOp::Mul,
Box::new(va),
Box::new(ValueExpr::Lit(Box::new(Value::Integer(2)))),
Box::new(ValueExpr::Lit(Box::new(Value::Integer(2).into()))),
));

let select_value = lg.add_operator(ProjectValue(logical::ProjectValue {
Expand Down Expand Up @@ -1966,7 +1979,7 @@ mod tests {
"balance".to_string().into(),
))],
)),
Box::new(ValueExpr::Lit(Box::new(Value::Integer(0)))),
Box::new(ValueExpr::Lit(Box::new(Value::Integer(0).into()))),
),
}));

Expand Down Expand Up @@ -2096,7 +2109,7 @@ mod tests {
ValueExpr::BinaryExpr(
BinaryOp::Mul,
Box::new(va),
Box::new(ValueExpr::Lit(Box::new(Value::Integer(2)))),
Box::new(ValueExpr::Lit(Box::new(Value::Integer(2).into()))),
),
)]),
}));
Expand Down Expand Up @@ -2170,7 +2183,7 @@ mod tests {
ValueExpr::BinaryExpr(
BinaryOp::Mul,
Box::new(va),
Box::new(ValueExpr::Lit(Box::new(Value::Integer(2)))),
Box::new(ValueExpr::Lit(Box::new(Value::Integer(2).into()))),
),
)]),
}));
Expand Down
Loading

1 comment on commit a07bdfc

@github-actions
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

PartiQL (rust) Benchmark

Benchmark suite Current: a07bdfc Previous: 4a4b143 Ratio
arith_agg-avg 765417 ns/iter (± 9720) 790105 ns/iter (± 11027) 0.97
arith_agg-avg_distinct 857628 ns/iter (± 2290) 866700 ns/iter (± 2429) 0.99
arith_agg-count 823325 ns/iter (± 14147) 833098 ns/iter (± 8364) 0.99
arith_agg-count_distinct 849976 ns/iter (± 1321) 868307 ns/iter (± 4014) 0.98
arith_agg-min 825972 ns/iter (± 45164) 840261 ns/iter (± 5673) 0.98
arith_agg-min_distinct 852656 ns/iter (± 2511) 870972 ns/iter (± 40419) 0.98
arith_agg-max 830428 ns/iter (± 5999) 844735 ns/iter (± 6067) 0.98
arith_agg-max_distinct 860720 ns/iter (± 30809) 878921 ns/iter (± 22966) 0.98
arith_agg-sum 826283 ns/iter (± 12568) 840506 ns/iter (± 7062) 0.98
arith_agg-sum_distinct 856047 ns/iter (± 5027) 868109 ns/iter (± 11415) 0.99
arith_agg-avg-count-min-max-sum 995733 ns/iter (± 12537) 993679 ns/iter (± 4817) 1.00
arith_agg-avg-count-min-max-sum-group_by 1221238 ns/iter (± 30151) 1261944 ns/iter (± 11475) 0.97
arith_agg-avg-count-min-max-sum-group_by-group_as 1843394 ns/iter (± 7066) 1869825 ns/iter (± 11386) 0.99
arith_agg-avg_distinct-count_distinct-min_distinct-max_distinct-sum_distinct 1202863 ns/iter (± 32389) 1228616 ns/iter (± 6146) 0.98
arith_agg-avg_distinct-count_distinct-min_distinct-max_distinct-sum_distinct-group_by 1478093 ns/iter (± 36300) 1515867 ns/iter (± 115785) 0.98
arith_agg-avg_distinct-count_distinct-min_distinct-max_distinct-sum_distinct-group_by-group_as 2072542 ns/iter (± 9664) 2129616 ns/iter (± 9534) 0.97
parse-1 6165 ns/iter (± 25) 6130 ns/iter (± 230) 1.01
parse-15 51563 ns/iter (± 204) 51127 ns/iter (± 2468) 1.01
parse-30 98311 ns/iter (± 355) 99090 ns/iter (± 368) 0.99
compile-1 4246 ns/iter (± 34) 4399 ns/iter (± 26) 0.97
compile-15 30444 ns/iter (± 118) 31298 ns/iter (± 342) 0.97
compile-30 63588 ns/iter (± 140) 64065 ns/iter (± 3328) 0.99
plan-1 70683 ns/iter (± 180) 70921 ns/iter (± 1513) 1.00
plan-15 1101877 ns/iter (± 15617) 1102491 ns/iter (± 48335) 1.00
plan-30 2204988 ns/iter (± 16988) 2193118 ns/iter (± 10706) 1.01
eval-1 11996073 ns/iter (± 261515) 12146862 ns/iter (± 99488) 0.99
eval-15 76529036 ns/iter (± 455327) 76690162 ns/iter (± 364929) 1.00
eval-30 146551997 ns/iter (± 516782) 146088058 ns/iter (± 456686) 1.00
join 9914 ns/iter (± 122) 10025 ns/iter (± 177) 0.99
simple 2486 ns/iter (± 6) 2569 ns/iter (± 5) 0.97
simple-no 460 ns/iter (± 1) 508 ns/iter (± 1) 0.91
numbers 57 ns/iter (± 0) 57 ns/iter (± 0) 1
parse-simple 961 ns/iter (± 4) 971 ns/iter (± 4) 0.99
parse-ion 2621 ns/iter (± 5) 2601 ns/iter (± 92) 1.01
parse-group 7912 ns/iter (± 24) 8076 ns/iter (± 280) 0.98
parse-complex 20359 ns/iter (± 197) 21305 ns/iter (± 71) 0.96
parse-complex-fexpr 27825 ns/iter (± 93) 28541 ns/iter (± 1102) 0.97

This comment was automatically generated by workflow using github-action-benchmark.

Please sign in to comment.