Refactor Value & Enable Lazy Ion literals. (#523)

* Change Lexing/Parsing of embedded docs to not eagerly validate (#507) This changes the lexer and parser to pass through strings enclosed in backticks un-parsed. (At current, these documents are parsed during lowering). Since embedded documents may themselves contain backticks, beginning and ending delimiters consist of an arbitrary odd numbers of backticks (e.g., `` ` ``, `` ``` ``, `` ````` `` etc.) that must be paired (e.g., `` `$ion_data_here::[]` ``, `` ```$ion_data_here::[ $string_with_embedded_backtick:"`" ]``` ``, etc.). As opening and closing delimiters are required to be odd in count of backticks, a contiguous string of backticks that is even is interpreted as an empty document. * Behavior-preserving refactor of `Value` into a module. (#509) * Behavior-preserving refactor of Value into a module. (#510) * Change modeling of Literals in the AST remove ambiguity (#517) Change parsing and AST-modeling of literals to not share AST structures with non-scalar expressions. * Change modeling of boxed ion literals to be lazy until evaluator. (#519) Changes the logical plan to have a distinct `Lit` type to hold literals instead of embedded `Value` * Refactor lifetimes for new rust warnings
partiql · Dec 5, 2024 · a07bdfc · a07bdfc · github-actions · Dec 5, 2024
1 parent 4a4b143
commit a07bdfc
Show file tree

Hide file tree

Showing 36 changed files with 2,055 additions and 1,611 deletions.
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -9,6 +9,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 ## [Unreleased]
 ### Changed
 - *BREAKING* partiql-parser: Added a source location to `ParseError::UnexpectedEndOfInput`
+- *BREAKING* partiql-ast: Changed the modelling of parsed literals.
+- *BREAKING* partiql-logical: Changed the modelling of logical plan literals.
 - partiql-eval: Fixed behavior of comparison and `BETWEEN` operations w.r.t. type mismatches
 
 ### Added

diff --git a/extension/partiql-extension-visualize/src/ast_to_dot.rs b/extension/partiql-extension-visualize/src/ast_to_dot.rs
@@ -185,7 +185,7 @@ fn lit_to_str(ast: &ast::Lit) -> String {
         Lit::FloatLit(l) => l.to_string(),
         Lit::DoubleLit(l) => l.to_string(),
         Lit::BoolLit(l) => (if *l { "TRUE" } else { "FALSE" }).to_string(),
-        Lit::IonStringLit(l) => format!("`{}`", l),
+        Lit::EmbeddedDocLit(l) => format!("`{}`", l),
         Lit::CharStringLit(l) => format!("'{}'", l),
         Lit::NationalCharStringLit(l) => format!("'{}'", l),
         Lit::BitStringLit(l) => format!("b'{}'", l),

diff --git a/partiql-ast/src/ast.rs b/partiql-ast/src/ast.rs
@@ -444,7 +444,7 @@ pub enum Lit {
     #[visit(skip)]
     BoolLit(bool),
     #[visit(skip)]
-    IonStringLit(String),
+    EmbeddedDocLit(String),
     #[visit(skip)]
     CharStringLit(String),
     #[visit(skip)]
@@ -454,16 +454,41 @@ pub enum Lit {
     #[visit(skip)]
     HexStringLit(String),
     #[visit(skip)]
-    StructLit(AstNode<Struct>),
+    StructLit(AstNode<StructLit>),
     #[visit(skip)]
-    BagLit(AstNode<Bag>),
+    BagLit(AstNode<BagLit>),
     #[visit(skip)]
-    ListLit(AstNode<List>),
+    ListLit(AstNode<ListLit>),
     /// E.g. `TIME WITH TIME ZONE` in `SELECT TIME WITH TIME ZONE '12:00' FROM ...`
     #[visit(skip)]
     TypedLit(String, Type),
 }
 
+#[derive(Clone, Debug, PartialEq)]
+#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
+pub struct LitField {
+    pub first: String,
+    pub second: AstNode<Lit>,
+}
+
+#[derive(Clone, Debug, PartialEq)]
+#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
+pub struct StructLit {
+    pub fields: Vec<LitField>,
+}
+
+#[derive(Clone, Debug, PartialEq)]
+#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
+pub struct BagLit {
+    pub values: Vec<Lit>,
+}
+
+#[derive(Clone, Debug, PartialEq)]
+#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
+pub struct ListLit {
+    pub values: Vec<Lit>,
+}
+
 #[derive(Visit, Clone, Debug, PartialEq, Eq)]
 #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
 pub struct VarRef {

diff --git a/partiql-ast/src/pretty.rs b/partiql-ast/src/pretty.rs
@@ -394,7 +394,7 @@ impl PrettyDoc for Lit {
             Lit::FloatLit(inner) => arena.text(inner.to_string()),
             Lit::DoubleLit(inner) => arena.text(inner.to_string()),
             Lit::BoolLit(inner) => arena.text(inner.to_string()),
-            Lit::IonStringLit(inner) => inner.pretty_doc(arena),
+            Lit::EmbeddedDocLit(inner) => inner.pretty_doc(arena), // TODO better pretty for embedded doc: https://github.com/partiql/partiql-lang-rust/issues/508
             Lit::CharStringLit(inner) => inner.pretty_doc(arena),
             Lit::NationalCharStringLit(inner) => inner.pretty_doc(arena),
             Lit::BitStringLit(inner) => inner.pretty_doc(arena),
@@ -699,6 +699,38 @@ impl PrettyDoc for StructExprPair {
     }
 }
 
+impl PrettyDoc for StructLit {
+    fn pretty_doc<'b, D, A>(&'b self, arena: &'b D) -> DocBuilder<'b, D, A>
+    where
+        D: DocAllocator<'b, A>,
+        D::Doc: Clone,
+        A: Clone,
+    {
+        let wrapped = self.fields.iter().map(|p| unsafe {
+            let x: &'b StructLitField = std::mem::transmute(p);
+            x
+        });
+        pretty_seq(wrapped, "{", "}", ",", PRETTY_INDENT_MINOR_NEST, arena)
+    }
+}
+
+pub struct StructLitField(pub LitField);
+
+impl PrettyDoc for StructLitField {
+    fn pretty_doc<'b, D, A>(&'b self, arena: &'b D) -> DocBuilder<'b, D, A>
+    where
+        D: DocAllocator<'b, A>,
+        D::Doc: Clone,
+        A: Clone,
+    {
+        let k = self.0.first.pretty_doc(arena);
+        let v = self.0.second.pretty_doc(arena);
+        let sep = arena.text(": ");
+
+        k.append(sep).group().append(v).group()
+    }
+}
+
 impl PrettyDoc for Bag {
     fn pretty_doc<'b, D, A>(&'b self, arena: &'b D) -> DocBuilder<'b, D, A>
     where
@@ -728,6 +760,35 @@ impl PrettyDoc for List {
     }
 }
 
+impl PrettyDoc for BagLit {
+    fn pretty_doc<'b, D, A>(&'b self, arena: &'b D) -> DocBuilder<'b, D, A>
+    where
+        D: DocAllocator<'b, A>,
+        D::Doc: Clone,
+        A: Clone,
+    {
+        pretty_seq(
+            &self.values,
+            "<<",
+            ">>",
+            ",",
+            PRETTY_INDENT_MINOR_NEST,
+            arena,
+        )
+    }
+}
+
+impl PrettyDoc for ListLit {
+    fn pretty_doc<'b, D, A>(&'b self, arena: &'b D) -> DocBuilder<'b, D, A>
+    where
+        D: DocAllocator<'b, A>,
+        D::Doc: Clone,
+        A: Clone,
+    {
+        pretty_seq(&self.values, "[", "]", ",", PRETTY_INDENT_MINOR_NEST, arena)
+    }
+}
+
 impl PrettyDoc for Sexp {
     fn pretty_doc<'b, D, A>(&'b self, _arena: &'b D) -> DocBuilder<'b, D, A>
     where

diff --git a/partiql-eval/src/lib.rs b/partiql-eval/src/lib.rs
@@ -162,7 +162,12 @@ mod tests {
     // TODO: once eval conformance tests added and/or modified evaluation API (to support other values
     //  in evaluator output), change or delete tests using this function
     #[track_caller]
-    fn eval_bin_op(op: BinaryOp, lhs: Value, rhs: Value, expected_first_elem: Value) {
+    fn eval_bin_op<I: Into<logical::Lit>>(
+        op: BinaryOp,
+        lhs: Value,
+        rhs_lit: I,
+        expected_first_elem: Value,
+    ) {
         let mut plan = LogicalPlan::new();
         let scan = plan.add_operator(BindingsOp::Scan(logical::Scan {
             expr: ValueExpr::VarRef(
@@ -187,7 +192,7 @@ mod tests {
                             "lhs".to_string().into(),
                         ))],
                     )),
-                    Box::new(ValueExpr::Lit(Box::new(rhs))),
+                    Box::new(ValueExpr::Lit(Box::new(rhs_lit.into()))),
                 ),
             )]),
         }));
@@ -643,13 +648,13 @@ mod tests {
     #[test]
     fn and_or_null() {
         #[track_caller]
-        fn eval_to_null(op: BinaryOp, lhs: Value, rhs: Value) {
+        fn eval_to_null<I: Into<logical::Lit>>(op: BinaryOp, lhs: I, rhs: I) {
             let mut plan = LogicalPlan::new();
             let expq = plan.add_operator(BindingsOp::ExprQuery(ExprQuery {
                 expr: ValueExpr::BinaryExpr(
                     op,
-                    Box::new(ValueExpr::Lit(Box::new(lhs))),
-                    Box::new(ValueExpr::Lit(Box::new(rhs))),
+                    Box::new(ValueExpr::Lit(Box::new(lhs.into()))),
+                    Box::new(ValueExpr::Lit(Box::new(rhs.into()))),
                 ),
             }));
 
@@ -697,8 +702,8 @@ mod tests {
                                 "value".to_string().into(),
                             ))],
                         )),
-                        from: Box::new(ValueExpr::Lit(Box::new(from))),
-                        to: Box::new(ValueExpr::Lit(Box::new(to))),
+                        from: Box::new(ValueExpr::Lit(Box::new(from.into()))),
+                        to: Box::new(ValueExpr::Lit(Box::new(to.into()))),
                     }),
                 )]),
             }));
@@ -908,7 +913,7 @@ mod tests {
             kind: JoinKind::Left,
             left: Box::new(from_lhs),
             right: Box::new(from_rhs),
-            on: Some(ValueExpr::Lit(Box::new(Value::from(true)))),
+            on: Some(ValueExpr::Lit(Box::new(Value::from(true).into()))),
         }));
 
         let sink = lg.add_operator(BindingsOp::Sink);
@@ -936,17 +941,21 @@ mod tests {
             expr: Box::new(path_var("n", "a")),
             cases: vec![
                 (
-                    Box::new(ValueExpr::Lit(Box::new(Value::Integer(1)))),
-                    Box::new(ValueExpr::Lit(Box::new(Value::from("one".to_string())))),
+                    Box::new(ValueExpr::Lit(Box::new(Value::Integer(1).into()))),
+                    Box::new(ValueExpr::Lit(Box::new(
+                        Value::from("one".to_string()).into(),
+                    ))),
                 ),
                 (
-                    Box::new(ValueExpr::Lit(Box::new(Value::Integer(2)))),
-                    Box::new(ValueExpr::Lit(Box::new(Value::from("two".to_string())))),
+                    Box::new(ValueExpr::Lit(Box::new(Value::Integer(2).into()))),
+                    Box::new(ValueExpr::Lit(Box::new(
+                        Value::from("two".to_string()).into(),
+                    ))),
                 ),
             ],
-            default: Some(Box::new(ValueExpr::Lit(Box::new(Value::from(
-                "other".to_string(),
-            ))))),
+            default: Some(Box::new(ValueExpr::Lit(Box::new(
+                Value::from("other".to_string()).into(),
+            )))),
         }
     }
 
@@ -957,22 +966,26 @@ mod tests {
                     Box::new(ValueExpr::BinaryExpr(
                         BinaryOp::Eq,
                         Box::new(path_var("n", "a")),
-                        Box::new(ValueExpr::Lit(Box::new(Value::Integer(1)))),
+                        Box::new(ValueExpr::Lit(Box::new(Value::Integer(1).into()))),
                     )),
-                    Box::new(ValueExpr::Lit(Box::new(Value::from("one".to_string())))),
+                    Box::new(ValueExpr::Lit(Box::new(
+                        Value::from("one".to_string()).into(),
+                    ))),
                 ),
                 (
                     Box::new(ValueExpr::BinaryExpr(
                         BinaryOp::Eq,
                         Box::new(path_var("n", "a")),
-                        Box::new(ValueExpr::Lit(Box::new(Value::Integer(2)))),
+                        Box::new(ValueExpr::Lit(Box::new(Value::Integer(2).into()))),
                     )),
-                    Box::new(ValueExpr::Lit(Box::new(Value::from("two".to_string())))),
+                    Box::new(ValueExpr::Lit(Box::new(
+                        Value::from("two".to_string()).into(),
+                    ))),
                 ),
             ],
-            default: Some(Box::new(ValueExpr::Lit(Box::new(Value::from(
-                "other".to_string(),
-            ))))),
+            default: Some(Box::new(ValueExpr::Lit(Box::new(
+                Value::from("other".to_string()).into(),
+            )))),
         }
     }
 
@@ -1236,7 +1249,7 @@ mod tests {
                             "lhs".to_string().into(),
                         ))],
                     )),
-                    rhs: Box::new(ValueExpr::Lit(Box::new(rhs))),
+                    rhs: Box::new(ValueExpr::Lit(Box::new(rhs.into()))),
                 }),
             )]),
         }));
@@ -1387,7 +1400,7 @@ mod tests {
             println!("{:?}", &out);
             assert_eq!(out, expected);
         }
-        let list = ValueExpr::Lit(Box::new(Value::List(Box::new(list![1, 2, 3]))));
+        let list = ValueExpr::Lit(Box::new(Value::List(Box::new(list![1, 2, 3])).into()));
 
         // `[1,2,3][0]` -> `1`
         let index = ValueExpr::Path(Box::new(list.clone()), vec![PathComponent::Index(0)]);
@@ -1406,7 +1419,7 @@ mod tests {
         test(index, Value::Integer(3));
 
         // `{'a':10}[''||'a']` -> `10`
-        let tuple = ValueExpr::Lit(Box::new(Value::Tuple(Box::new(tuple![("a", 10)]))));
+        let tuple = ValueExpr::Lit(Box::new(Value::Tuple(Box::new(tuple![("a", 10)])).into()));
         let index_expr = ValueExpr::BinaryExpr(
             BinaryOp::Concat,
             Box::new(ValueExpr::Lit(Box::new("".into()))),
@@ -1499,7 +1512,7 @@ mod tests {
             expr: ValueExpr::BinaryExpr(
                 BinaryOp::Mul,
                 Box::new(va),
-                Box::new(ValueExpr::Lit(Box::new(Value::Integer(2)))),
+                Box::new(ValueExpr::Lit(Box::new(Value::Integer(2).into()))),
             ),
         }));
 
@@ -1578,7 +1591,7 @@ mod tests {
         tuple_expr.values.push(ValueExpr::BinaryExpr(
             BinaryOp::Mul,
             Box::new(va),
-            Box::new(ValueExpr::Lit(Box::new(Value::Integer(2)))),
+            Box::new(ValueExpr::Lit(Box::new(Value::Integer(2).into()))),
         ));
 
         let project = lg.add_operator(ProjectValue(logical::ProjectValue {
@@ -1740,7 +1753,7 @@ mod tests {
         list_expr.elements.push(ValueExpr::BinaryExpr(
             BinaryOp::Mul,
             Box::new(va),
-            Box::new(ValueExpr::Lit(Box::new(Value::Integer(2)))),
+            Box::new(ValueExpr::Lit(Box::new(Value::Integer(2).into()))),
         ));
 
         let select_value = lg.add_operator(ProjectValue(logical::ProjectValue {
@@ -1966,7 +1979,7 @@ mod tests {
                         "balance".to_string().into(),
                     ))],
                 )),
-                Box::new(ValueExpr::Lit(Box::new(Value::Integer(0)))),
+                Box::new(ValueExpr::Lit(Box::new(Value::Integer(0).into()))),
             ),
         }));
 
@@ -2096,7 +2109,7 @@ mod tests {
                 ValueExpr::BinaryExpr(
                     BinaryOp::Mul,
                     Box::new(va),
-                    Box::new(ValueExpr::Lit(Box::new(Value::Integer(2)))),
+                    Box::new(ValueExpr::Lit(Box::new(Value::Integer(2).into()))),
                 ),
             )]),
         }));
@@ -2170,7 +2183,7 @@ mod tests {
                 ValueExpr::BinaryExpr(
                     BinaryOp::Mul,
                     Box::new(va),
-                    Box::new(ValueExpr::Lit(Box::new(Value::Integer(2)))),
+                    Box::new(ValueExpr::Lit(Box::new(Value::Integer(2).into()))),
                 ),
             )]),
         }));
Benchmark suite	Current: `a07bdfc`	Previous: `4a4b143`	Ratio
`arith_agg-avg`	`765417` ns/iter (`± 9720`)	`790105` ns/iter (`± 11027`)	`0.97`
`arith_agg-avg_distinct`	`857628` ns/iter (`± 2290`)	`866700` ns/iter (`± 2429`)	`0.99`
`arith_agg-count`	`823325` ns/iter (`± 14147`)	`833098` ns/iter (`± 8364`)	`0.99`
`arith_agg-count_distinct`	`849976` ns/iter (`± 1321`)	`868307` ns/iter (`± 4014`)	`0.98`
`arith_agg-min`	`825972` ns/iter (`± 45164`)	`840261` ns/iter (`± 5673`)	`0.98`
`arith_agg-min_distinct`	`852656` ns/iter (`± 2511`)	`870972` ns/iter (`± 40419`)	`0.98`
`arith_agg-max`	`830428` ns/iter (`± 5999`)	`844735` ns/iter (`± 6067`)	`0.98`
`arith_agg-max_distinct`	`860720` ns/iter (`± 30809`)	`878921` ns/iter (`± 22966`)	`0.98`
`arith_agg-sum`	`826283` ns/iter (`± 12568`)	`840506` ns/iter (`± 7062`)	`0.98`
`arith_agg-sum_distinct`	`856047` ns/iter (`± 5027`)	`868109` ns/iter (`± 11415`)	`0.99`
`arith_agg-avg-count-min-max-sum`	`995733` ns/iter (`± 12537`)	`993679` ns/iter (`± 4817`)	`1.00`
`arith_agg-avg-count-min-max-sum-group_by`	`1221238` ns/iter (`± 30151`)	`1261944` ns/iter (`± 11475`)	`0.97`
`arith_agg-avg-count-min-max-sum-group_by-group_as`	`1843394` ns/iter (`± 7066`)	`1869825` ns/iter (`± 11386`)	`0.99`
`arith_agg-avg_distinct-count_distinct-min_distinct-max_distinct-sum_distinct`	`1202863` ns/iter (`± 32389`)	`1228616` ns/iter (`± 6146`)	`0.98`
`arith_agg-avg_distinct-count_distinct-min_distinct-max_distinct-sum_distinct-group_by`	`1478093` ns/iter (`± 36300`)	`1515867` ns/iter (`± 115785`)	`0.98`
`arith_agg-avg_distinct-count_distinct-min_distinct-max_distinct-sum_distinct-group_by-group_as`	`2072542` ns/iter (`± 9664`)	`2129616` ns/iter (`± 9534`)	`0.97`
`parse-1`	`6165` ns/iter (`± 25`)	`6130` ns/iter (`± 230`)	`1.01`
`parse-15`	`51563` ns/iter (`± 204`)	`51127` ns/iter (`± 2468`)	`1.01`
`parse-30`	`98311` ns/iter (`± 355`)	`99090` ns/iter (`± 368`)	`0.99`
`compile-1`	`4246` ns/iter (`± 34`)	`4399` ns/iter (`± 26`)	`0.97`
`compile-15`	`30444` ns/iter (`± 118`)	`31298` ns/iter (`± 342`)	`0.97`
`compile-30`	`63588` ns/iter (`± 140`)	`64065` ns/iter (`± 3328`)	`0.99`
`plan-1`	`70683` ns/iter (`± 180`)	`70921` ns/iter (`± 1513`)	`1.00`
`plan-15`	`1101877` ns/iter (`± 15617`)	`1102491` ns/iter (`± 48335`)	`1.00`
`plan-30`	`2204988` ns/iter (`± 16988`)	`2193118` ns/iter (`± 10706`)	`1.01`
`eval-1`	`11996073` ns/iter (`± 261515`)	`12146862` ns/iter (`± 99488`)	`0.99`
`eval-15`	`76529036` ns/iter (`± 455327`)	`76690162` ns/iter (`± 364929`)	`1.00`
`eval-30`	`146551997` ns/iter (`± 516782`)	`146088058` ns/iter (`± 456686`)	`1.00`
`join`	`9914` ns/iter (`± 122`)	`10025` ns/iter (`± 177`)	`0.99`
`simple`	`2486` ns/iter (`± 6`)	`2569` ns/iter (`± 5`)	`0.97`
`simple-no`	`460` ns/iter (`± 1`)	`508` ns/iter (`± 1`)	`0.91`
`numbers`	`57` ns/iter (`± 0`)	`57` ns/iter (`± 0`)	`1`
`parse-simple`	`961` ns/iter (`± 4`)	`971` ns/iter (`± 4`)	`0.99`
`parse-ion`	`2621` ns/iter (`± 5`)	`2601` ns/iter (`± 92`)	`1.01`
`parse-group`	`7912` ns/iter (`± 24`)	`8076` ns/iter (`± 280`)	`0.98`
`parse-complex`	`20359` ns/iter (`± 197`)	`21305` ns/iter (`± 71`)	`0.96`
`parse-complex-fexpr`	`27825` ns/iter (`± 93`)	`28541` ns/iter (`± 1102`)	`0.97`