From 203f77cf68404c64702dabc6c7171b4dbcc99ede Mon Sep 17 00:00:00 2001
From: Josh Pschorr <joshps@amazon.com>
Date: Fri, 10 Feb 2023 12:56:06 -0800
Subject: [PATCH] Add `iai` versions of the multi-like evaluation benchmarks

---
 partiql/Cargo.toml                           |   6 +
 partiql/benches/bench_eval_multi_like.rs     | 313 +------------------
 partiql/benches/bench_eval_multi_like_iai.rs | 129 ++++++++
 partiql/benches/multi_like_data.rs           | 313 +++++++++++++++++++
 4 files changed, 451 insertions(+), 310 deletions(-)
 create mode 100644 partiql/benches/bench_eval_multi_like_iai.rs
 create mode 100644 partiql/benches/multi_like_data.rs

diff --git a/partiql/Cargo.toml b/partiql/Cargo.toml
index ec217ead..9f142d64 100644
--- a/partiql/Cargo.toml
+++ b/partiql/Cargo.toml
@@ -32,8 +32,14 @@ partiql-eval = { path = "../partiql-eval" }
 
 itertools = "0.10"
 criterion = "0.4"
+iai = { git = "https://github.com/bheisler/iai" }
 rand = "0.8"
+once_cell = "1.17"
 
 [[bench]]
 name = "bench_eval_multi_like"
 harness = false
+
+[[bench]]
+name = "bench_eval_multi_like_iai"
+harness = false
diff --git a/partiql/benches/bench_eval_multi_like.rs b/partiql/benches/bench_eval_multi_like.rs
index 385569dc..35e4cfcb 100644
--- a/partiql/benches/bench_eval_multi_like.rs
+++ b/partiql/benches/bench_eval_multi_like.rs
@@ -9,6 +9,7 @@ use partiql_eval::eval::EvalPlan;
 use partiql_eval::plan::EvaluatorPlanner;
 use partiql_logical::{BindingsOp, LogicalPlan};
 
+use crate::multi_like_data::{employee_data, QUERY_1, QUERY_15, QUERY_30};
 use partiql_parser::{Parser, ParserResult};
 use partiql_value::{partiql_tuple, Bag, Tuple, Value};
 
@@ -21,315 +22,7 @@ use partiql_value::{partiql_tuple, Bag, Tuple, Value};
 // of queries that  filter against 1, 15, or 30 `OR`ed `LIKE` expressions
 // over 10201 rows of tuples containing an id and a string
 
-fn employee_data() -> Vec<Value> {
-    let name1 = vec![
-        "Bob",
-        "Madden",
-        "Brycen",
-        "Bryanna",
-        "Zayne",
-        "Jocelynn",
-        "Breanna",
-        "Margaret",
-        "Jasmine",
-        "Kenyon",
-        "Aryanna",
-        "Zackery",
-        "Jorden",
-        "Malia",
-        "Raven",
-        "Neveah",
-        "Finley",
-        "Austin",
-        "Jaxson",
-        "Tobias",
-        "Dominique",
-        "Devan",
-        "Colby",
-        "Tanner",
-        "Mckenna",
-        "Kristina",
-        "Cristal",
-        "River",
-        "Taliyah",
-        "Abagail",
-        "Spencer",
-        "Gage",
-        "Ronnie",
-        "Amari",
-        "Jabari",
-        "Alanna",
-        "Anderson",
-        "Saniya",
-        "Baylee",
-        "Elisa",
-        "Savannah",
-        "Jakobe",
-        "Sandra",
-        "Simone",
-        "Frank",
-        "Braedon",
-        "Clark",
-        "Francisco",
-        "Roman",
-        "Matias",
-        "Messi",
-        "Elisha",
-        "Alexander",
-        "Kadence",
-        "Karsyn",
-        "Adonis",
-        "Ishaan",
-        "Trevon",
-        "Ryan",
-        "Jaelynn",
-        "Marilyn",
-        "Emma",
-        "Avah",
-        "Jordan",
-        "Riley",
-        "Amelie",
-        "Denisse",
-        "Darion",
-        "Lydia",
-        "Marley",
-        "Brogan",
-        "Trace",
-        "Maeve",
-        "Elijah",
-        "Kareem",
-        "Erick",
-        "Hope",
-        "Elisabeth",
-        "Antwan",
-        "Francesca",
-        "Layla",
-        "Jase",
-        "Angel",
-        "Addyson",
-        "Mckinley",
-        "Julianna",
-        "Winston",
-        "Royce",
-        "Paola",
-        "Issac",
-        "Zachary",
-        "Niko",
-        "Shania",
-        "Colin",
-        "Jesse",
-        "Pedro",
-        "Cheyenne",
-        "Ashley",
-        "Karli",
-        "Bianca",
-        "Mario",
-    ];
-    let name2 = vec![
-        "Smith",
-        "Oconnell",
-        "Whitehead",
-        "Carrillo",
-        "Parrish",
-        "Monroe",
-        "Summers",
-        "Hurst",
-        "Durham",
-        "Hardin",
-        "Hunt",
-        "Mitchell",
-        "Pennington",
-        "Woodward",
-        "Franklin",
-        "Martinez",
-        "Shepard",
-        "Khan",
-        "Mcfarland",
-        "Frey",
-        "Mckenzie",
-        "Blair",
-        "Mercer",
-        "Callahan",
-        "Cameron",
-        "Gilmore",
-        "Bowers",
-        "Donovan",
-        "Meyers",
-        "Horne",
-        "Rice",
-        "Castillo",
-        "Cain",
-        "Dickson",
-        "Valenzuela",
-        "Silva",
-        "Prince",
-        "Vance",
-        "Berry",
-        "Coffey",
-        "Young",
-        "Walker",
-        "Burch",
-        "Ross",
-        "Mejia",
-        "Zuniga",
-        "Haney",
-        "Jordan",
-        "Love",
-        "Larsen",
-        "Bowman",
-        "Werner",
-        "Greer",
-        "Krause",
-        "Bishop",
-        "Day",
-        "Luna",
-        "Patrick",
-        "Adkins",
-        "Benson",
-        "Mcconnell",
-        "Sanchez",
-        "Villa",
-        "Wu",
-        "Duke",
-        "Fisher",
-        "Hess",
-        "Lawrence",
-        "Perry",
-        "Hardy",
-        "Wyatt",
-        "Mcknight",
-        "Thomas",
-        "Trevino",
-        "Flowers",
-        "Cisneros",
-        "Coleman",
-        "Sanders",
-        "Good",
-        "Newton",
-        "Carpenter",
-        "Garza",
-        "Barber",
-        "Swanson",
-        "Owen",
-        "Anderson",
-        "Bright",
-        "Beck",
-        "Lawson",
-        "Jones",
-        "Davila",
-        "Porter",
-        "Dougherty",
-        "Stevenson",
-        "Malone",
-        "Garrison",
-        "Bates",
-        "Wheeler",
-        "Petty",
-        "Rojas",
-        "Townsend",
-    ];
-
-    // cartesian product of name1 x name2 (e.g., "Bob Smith", ... "Mario Townsend")
-    let combined = name1
-        .iter()
-        .cartesian_product(name2.iter())
-        .map(|(n1, n2)| format!("{n1} {n2}"));
-
-    // seed the rng with a known value to assure same data across runs
-    let mut rng = rand::rngs::StdRng::from_seed([42; 32]);
-    use rand::distributions::Distribution;
-    let chars = rand::distributions::Alphanumeric;
-    let random_size = rand::distributions::uniform::Uniform::from(5..=100);
-
-    // add random string prefix and suffix to each combined name
-    let employee_data: Vec<Value> = combined
-        .enumerate()
-        .map(|(id, person)| {
-            let prefix_size = random_size.sample(&mut rng);
-            let suffix_size = random_size.sample(&mut rng);
-            let prefix: String = (0..prefix_size)
-                .map(|_| rng.sample(chars) as char)
-                .collect();
-            let suffix: String = (0..suffix_size)
-                .map(|_| rng.sample(chars) as char)
-                .collect();
-            let full_name = format!("{prefix} {person} {suffix}");
-            partiql_tuple![("id", id), ("name", full_name)].into()
-        })
-        .collect_vec();
-
-    employee_data
-}
-
-fn data() -> MapBindings<Value> {
-    let data = partiql_tuple![(
-        "hr",
-        partiql_tuple![("employees", Bag::from(employee_data()))]
-    )];
-
-    data.into()
-}
-
-const QUERY_1: &str = "
-            SELECT *
-            FROM hr.employees as emp
-            WHERE lower(emp.name) LIKE '%bob smith%'
-            ";
-
-const QUERY_15: &str = "
-            SELECT *
-            FROM hr.employees as emp
-            WHERE lower(emp.name) LIKE '%bob smith%'
-               OR lower(emp.name) LIKE '%gage swanson%'
-               OR lower(emp.name) LIKE '%riley perry%'
-               OR lower(emp.name) LIKE '%sandra woodward%'
-               OR lower(emp.name) LIKE '%abagail oconnell%'
-               OR lower(emp.name) LIKE '%amari duke%'
-               OR lower(emp.name) LIKE '%elisha wyatt%'
-               OR lower(emp.name) LIKE '%aryanna hess%'
-               OR lower(emp.name) LIKE '%bryanna jones%'
-               OR lower(emp.name) LIKE '%trace gilmore%'
-               OR lower(emp.name) LIKE '%antwan stevenson%'
-               OR lower(emp.name) LIKE '%julianna callahan%'
-               OR lower(emp.name) LIKE '%jaelynn trevino%'
-               OR lower(emp.name) LIKE '%kadence bates%'
-               OR lower(emp.name) LIKE '%jakobe townsend%'
-            ";
-
-const QUERY_30: &str = "
-            SELECT *
-            FROM hr.employees as emp
-            WHERE lower(emp.name) LIKE '%bob smith%'
-               OR lower(emp.name) LIKE '%gage swanson%'
-               OR lower(emp.name) LIKE '%riley perry%'
-               OR lower(emp.name) LIKE '%sandra woodward%'
-               OR lower(emp.name) LIKE '%abagail oconnell%'
-               OR lower(emp.name) LIKE '%amari duke%'
-               OR lower(emp.name) LIKE '%elisha wyatt%'
-               OR lower(emp.name) LIKE '%aryanna hess%'
-               OR lower(emp.name) LIKE '%bryanna jones%'
-               OR lower(emp.name) LIKE '%trace gilmore%'
-               OR lower(emp.name) LIKE '%antwan stevenson%'
-               OR lower(emp.name) LIKE '%julianna callahan%'
-               OR lower(emp.name) LIKE '%jaelynn trevino%'
-               OR lower(emp.name) LIKE '%kadence bates%'
-               OR lower(emp.name) LIKE '%jakobe townsend%'
-               OR lower(emp.name) LIKE '%austin pennington%'
-               OR lower(emp.name) LIKE '%colby woodward%'
-               OR lower(emp.name) LIKE '%brycen blair%'
-               OR lower(emp.name) LIKE '%cristal mercer%'
-               OR lower(emp.name) LIKE '%river gilmore%'
-               OR lower(emp.name) LIKE '%saniya bowers%'
-               OR lower(emp.name) LIKE '%braedon ross%'
-               OR lower(emp.name) LIKE '%clark mejia%'
-               OR lower(emp.name) LIKE '%ryan day%'
-               OR lower(emp.name) LIKE '%marilyn luna%'
-               OR lower(emp.name) LIKE '%avah sanchez%'
-               OR lower(emp.name) LIKE '%amelie wu%'
-               OR lower(emp.name) LIKE '%paola duke%'
-               OR lower(emp.name) LIKE '%jesse trevino%'
-               OR lower(emp.name) LIKE '%bianca cisneros%'
-            ";
+mod multi_like_data;
 
 #[inline]
 fn parse(text: &str) -> ParserResult {
@@ -413,7 +106,7 @@ fn bench_eval(c: &mut Criterion) {
     let compiled_15 = compile(&parse(QUERY_15).unwrap());
     let compiled_30 = compile(&parse(QUERY_30).unwrap());
 
-    let bindings = data();
+    let bindings = employee_data();
 
     c.bench_function("eval-1", |b| {
         b.iter(|| {
diff --git a/partiql/benches/bench_eval_multi_like_iai.rs b/partiql/benches/bench_eval_multi_like_iai.rs
new file mode 100644
index 00000000..8e4eeaa3
--- /dev/null
+++ b/partiql/benches/bench_eval_multi_like_iai.rs
@@ -0,0 +1,129 @@
+use std::time::Duration;
+
+use criterion::{black_box, criterion_group, criterion_main, Criterion};
+use itertools::Itertools;
+use once_cell::sync::Lazy;
+use rand::{Rng, SeedableRng};
+
+use partiql_eval::env::basic::MapBindings;
+use partiql_eval::eval::EvalPlan;
+use partiql_eval::plan::EvaluatorPlanner;
+use partiql_logical::{BindingsOp, LogicalPlan};
+
+use crate::multi_like_data::{employee_data, QUERY_1, QUERY_15, QUERY_30};
+use partiql_parser::{Parsed, Parser, ParserResult};
+use partiql_value::{partiql_tuple, Bag, Tuple, Value};
+
+// Benchmarks:
+//  - parsing,
+//  - compiling
+//  - planning
+//  - evaluation
+//
+// of queries that  filter against 1, 15, or 30 `OR`ed `LIKE` expressions
+// over 10201 rows of tuples containing an id and a string
+
+mod multi_like_data;
+
+#[inline]
+fn parse(text: &str) -> ParserResult {
+    Parser::default().parse(text)
+}
+#[inline]
+fn compile(parsed: &partiql_parser::Parsed) -> LogicalPlan<BindingsOp> {
+    partiql_logical_planner::lower(parsed)
+}
+#[inline]
+fn plan(logical: &LogicalPlan<BindingsOp>) -> EvalPlan {
+    EvaluatorPlanner::default().compile(logical)
+}
+#[inline]
+pub(crate) fn evaluate(mut eval: EvalPlan, bindings: MapBindings<Value>) -> Value {
+    if let Ok(out) = eval.execute_mut(bindings) {
+        out.result
+    } else {
+        Value::Missing
+    }
+}
+
+/// benchmark parsing of query that filters 1 `LIKE` expressions over 10201 rows of tuples containing an id and a string
+fn bench_parse_1() -> ParserResult<'static> {
+    parse(black_box(QUERY_1))
+}
+/// benchmark parsing of query that filters 15 `OR`ed `LIKE` expressions over 10201 rows of tuples containing an id and a string
+fn bench_parse_15() -> ParserResult<'static> {
+    parse(black_box(QUERY_15))
+}
+/// benchmark parsing of query that filters 30 `OR`ed `LIKE` expressions over 10201 rows of tuples containing an id and a string
+fn bench_parse_30() -> ParserResult<'static> {
+    parse(black_box(QUERY_30))
+}
+
+//pub(crate) static BUILT_INS: Lazy<FnExprSet<'static>> = Lazy::new(built_ins);
+
+static PARSED_1: Lazy<Parsed<'static>> = Lazy::new(|| parse(QUERY_1).unwrap());
+static PARSED_15: Lazy<Parsed<'static>> = Lazy::new(|| parse(QUERY_15).unwrap());
+static PARSED_30: Lazy<Parsed<'static>> = Lazy::new(|| parse(QUERY_30).unwrap());
+
+/// benchmark compiling of query that filters 1 `LIKE` expressions over 10201 rows of tuples containing an id and a string
+fn bench_compile_1() -> LogicalPlan<BindingsOp> {
+    compile(black_box(&PARSED_1))
+}
+/// benchmark compiling of query that filters 15 `OR`ed `LIKE` expressions over 10201 rows of tuples containing an id and a string
+fn bench_compile_15() -> LogicalPlan<BindingsOp> {
+    compile(black_box(&PARSED_15))
+}
+/// benchmark compiling of query that filters 30 `OR`ed `LIKE` expressions over 10201 rows of tuples containing an id and a string
+fn bench_compile_30() -> LogicalPlan<BindingsOp> {
+    compile(black_box(&PARSED_30))
+}
+
+static COMPILED_1: Lazy<LogicalPlan<BindingsOp>> = Lazy::new(|| compile(&PARSED_1));
+static COMPILED_15: Lazy<LogicalPlan<BindingsOp>> = Lazy::new(|| compile(&PARSED_15));
+static COMPILED_30: Lazy<LogicalPlan<BindingsOp>> = Lazy::new(|| compile(&PARSED_30));
+
+/// benchmark planning of query that filters 1 `LIKE` expressions over 10201 rows of tuples containing an id and a string
+fn bench_plan_1() -> EvalPlan {
+    plan(black_box(&COMPILED_1))
+}
+/// benchmark planning of query that filters 15 `OR`ed `LIKE` expressions over 10201 rows of tuples containing an id and a string
+fn bench_plan_15() -> EvalPlan {
+    plan(black_box(&COMPILED_15))
+}
+/// benchmark planning of query that filters 30 `OR`ed `LIKE` expressions over 10201 rows of tuples containing an id and a string
+fn bench_plan_30() -> EvalPlan {
+    plan(black_box(&COMPILED_30))
+}
+/// benchmark evaluating of query that filters 1 `LIKE` expressions over 10201 rows of tuples containing an id and a string
+fn bench_eval_1() -> Value {
+    let bindings = employee_data();
+    let evaluator = plan(black_box(&COMPILED_1));
+    evaluate(evaluator, bindings)
+}
+/// benchmark evaluating of query that filters 15 `OR`ed `LIKE` expressions over 10201 rows of tuples containing an id and a string
+fn bench_eval_15() -> Value {
+    let bindings = employee_data();
+    let evaluator = plan(black_box(&COMPILED_15));
+    evaluate(evaluator, bindings)
+}
+/// benchmark evaluating of query that filters 30 `OR`ed `LIKE` expressions over 10201 rows of tuples containing an id and a string
+fn bench_eval_30() -> Value {
+    let bindings = employee_data();
+    let evaluator = plan(black_box(&COMPILED_30));
+    evaluate(evaluator, bindings)
+}
+
+iai::main!(
+    bench_parse_1,
+    bench_parse_15,
+    bench_parse_30,
+    bench_compile_1,
+    bench_compile_15,
+    bench_compile_30,
+    bench_plan_1,
+    bench_plan_15,
+    bench_plan_30,
+    bench_eval_1,
+    bench_eval_15,
+    bench_eval_30,
+);
diff --git a/partiql/benches/multi_like_data.rs b/partiql/benches/multi_like_data.rs
new file mode 100644
index 00000000..7bfceb34
--- /dev/null
+++ b/partiql/benches/multi_like_data.rs
@@ -0,0 +1,313 @@
+use itertools::Itertools;
+use partiql_eval::env::basic::MapBindings;
+use partiql_value::{partiql_tuple, Bag, Tuple, Value};
+use rand::{Rng, SeedableRng};
+
+pub const QUERY_1: &str = "
+            SELECT *
+            FROM hr.employees as emp
+            WHERE lower(emp.name) LIKE '%bob smith%'
+            ";
+
+pub const QUERY_15: &str = "
+            SELECT *
+            FROM hr.employees as emp
+            WHERE lower(emp.name) LIKE '%bob smith%'
+               OR lower(emp.name) LIKE '%gage swanson%'
+               OR lower(emp.name) LIKE '%riley perry%'
+               OR lower(emp.name) LIKE '%sandra woodward%'
+               OR lower(emp.name) LIKE '%abagail oconnell%'
+               OR lower(emp.name) LIKE '%amari duke%'
+               OR lower(emp.name) LIKE '%elisha wyatt%'
+               OR lower(emp.name) LIKE '%aryanna hess%'
+               OR lower(emp.name) LIKE '%bryanna jones%'
+               OR lower(emp.name) LIKE '%trace gilmore%'
+               OR lower(emp.name) LIKE '%antwan stevenson%'
+               OR lower(emp.name) LIKE '%julianna callahan%'
+               OR lower(emp.name) LIKE '%jaelynn trevino%'
+               OR lower(emp.name) LIKE '%kadence bates%'
+               OR lower(emp.name) LIKE '%jakobe townsend%'
+            ";
+
+pub const QUERY_30: &str = "
+            SELECT *
+            FROM hr.employees as emp
+            WHERE lower(emp.name) LIKE '%bob smith%'
+               OR lower(emp.name) LIKE '%gage swanson%'
+               OR lower(emp.name) LIKE '%riley perry%'
+               OR lower(emp.name) LIKE '%sandra woodward%'
+               OR lower(emp.name) LIKE '%abagail oconnell%'
+               OR lower(emp.name) LIKE '%amari duke%'
+               OR lower(emp.name) LIKE '%elisha wyatt%'
+               OR lower(emp.name) LIKE '%aryanna hess%'
+               OR lower(emp.name) LIKE '%bryanna jones%'
+               OR lower(emp.name) LIKE '%trace gilmore%'
+               OR lower(emp.name) LIKE '%antwan stevenson%'
+               OR lower(emp.name) LIKE '%julianna callahan%'
+               OR lower(emp.name) LIKE '%jaelynn trevino%'
+               OR lower(emp.name) LIKE '%kadence bates%'
+               OR lower(emp.name) LIKE '%jakobe townsend%'
+               OR lower(emp.name) LIKE '%austin pennington%'
+               OR lower(emp.name) LIKE '%colby woodward%'
+               OR lower(emp.name) LIKE '%brycen blair%'
+               OR lower(emp.name) LIKE '%cristal mercer%'
+               OR lower(emp.name) LIKE '%river gilmore%'
+               OR lower(emp.name) LIKE '%saniya bowers%'
+               OR lower(emp.name) LIKE '%braedon ross%'
+               OR lower(emp.name) LIKE '%clark mejia%'
+               OR lower(emp.name) LIKE '%ryan day%'
+               OR lower(emp.name) LIKE '%marilyn luna%'
+               OR lower(emp.name) LIKE '%avah sanchez%'
+               OR lower(emp.name) LIKE '%amelie wu%'
+               OR lower(emp.name) LIKE '%paola duke%'
+               OR lower(emp.name) LIKE '%jesse trevino%'
+               OR lower(emp.name) LIKE '%bianca cisneros%'
+            ";
+
+/// Return a sequence of 10201 `Value`s where each is a `Tuple` of the form
+/// `{id: <num>, name: "<random prefix> <name1> <name2> <random suffix>"}`
+pub fn employees() -> Vec<Value> {
+    let name1 = vec![
+        "Bob",
+        "Madden",
+        "Brycen",
+        "Bryanna",
+        "Zayne",
+        "Jocelynn",
+        "Breanna",
+        "Margaret",
+        "Jasmine",
+        "Kenyon",
+        "Aryanna",
+        "Zackery",
+        "Jorden",
+        "Malia",
+        "Raven",
+        "Neveah",
+        "Finley",
+        "Austin",
+        "Jaxson",
+        "Tobias",
+        "Dominique",
+        "Devan",
+        "Colby",
+        "Tanner",
+        "Mckenna",
+        "Kristina",
+        "Cristal",
+        "River",
+        "Taliyah",
+        "Abagail",
+        "Spencer",
+        "Gage",
+        "Ronnie",
+        "Amari",
+        "Jabari",
+        "Alanna",
+        "Anderson",
+        "Saniya",
+        "Baylee",
+        "Elisa",
+        "Savannah",
+        "Jakobe",
+        "Sandra",
+        "Simone",
+        "Frank",
+        "Braedon",
+        "Clark",
+        "Francisco",
+        "Roman",
+        "Matias",
+        "Messi",
+        "Elisha",
+        "Alexander",
+        "Kadence",
+        "Karsyn",
+        "Adonis",
+        "Ishaan",
+        "Trevon",
+        "Ryan",
+        "Jaelynn",
+        "Marilyn",
+        "Emma",
+        "Avah",
+        "Jordan",
+        "Riley",
+        "Amelie",
+        "Denisse",
+        "Darion",
+        "Lydia",
+        "Marley",
+        "Brogan",
+        "Trace",
+        "Maeve",
+        "Elijah",
+        "Kareem",
+        "Erick",
+        "Hope",
+        "Elisabeth",
+        "Antwan",
+        "Francesca",
+        "Layla",
+        "Jase",
+        "Angel",
+        "Addyson",
+        "Mckinley",
+        "Julianna",
+        "Winston",
+        "Royce",
+        "Paola",
+        "Issac",
+        "Zachary",
+        "Niko",
+        "Shania",
+        "Colin",
+        "Jesse",
+        "Pedro",
+        "Cheyenne",
+        "Ashley",
+        "Karli",
+        "Bianca",
+        "Mario",
+    ];
+    let name2 = vec![
+        "Smith",
+        "Oconnell",
+        "Whitehead",
+        "Carrillo",
+        "Parrish",
+        "Monroe",
+        "Summers",
+        "Hurst",
+        "Durham",
+        "Hardin",
+        "Hunt",
+        "Mitchell",
+        "Pennington",
+        "Woodward",
+        "Franklin",
+        "Martinez",
+        "Shepard",
+        "Khan",
+        "Mcfarland",
+        "Frey",
+        "Mckenzie",
+        "Blair",
+        "Mercer",
+        "Callahan",
+        "Cameron",
+        "Gilmore",
+        "Bowers",
+        "Donovan",
+        "Meyers",
+        "Horne",
+        "Rice",
+        "Castillo",
+        "Cain",
+        "Dickson",
+        "Valenzuela",
+        "Silva",
+        "Prince",
+        "Vance",
+        "Berry",
+        "Coffey",
+        "Young",
+        "Walker",
+        "Burch",
+        "Ross",
+        "Mejia",
+        "Zuniga",
+        "Haney",
+        "Jordan",
+        "Love",
+        "Larsen",
+        "Bowman",
+        "Werner",
+        "Greer",
+        "Krause",
+        "Bishop",
+        "Day",
+        "Luna",
+        "Patrick",
+        "Adkins",
+        "Benson",
+        "Mcconnell",
+        "Sanchez",
+        "Villa",
+        "Wu",
+        "Duke",
+        "Fisher",
+        "Hess",
+        "Lawrence",
+        "Perry",
+        "Hardy",
+        "Wyatt",
+        "Mcknight",
+        "Thomas",
+        "Trevino",
+        "Flowers",
+        "Cisneros",
+        "Coleman",
+        "Sanders",
+        "Good",
+        "Newton",
+        "Carpenter",
+        "Garza",
+        "Barber",
+        "Swanson",
+        "Owen",
+        "Anderson",
+        "Bright",
+        "Beck",
+        "Lawson",
+        "Jones",
+        "Davila",
+        "Porter",
+        "Dougherty",
+        "Stevenson",
+        "Malone",
+        "Garrison",
+        "Bates",
+        "Wheeler",
+        "Petty",
+        "Rojas",
+        "Townsend",
+    ];
+
+    // cartesian product of name1 x name2 (e.g., "Bob Smith", ... "Mario Townsend")
+    let combined = name1
+        .iter()
+        .cartesian_product(name2.iter())
+        .map(|(n1, n2)| format!("{n1} {n2}"));
+
+    // seed the rng with a known value to assure same data across runs
+    let mut rng = rand::rngs::StdRng::from_seed([42; 32]);
+    use rand::distributions::Distribution;
+    let chars = rand::distributions::Alphanumeric;
+    let random_size = rand::distributions::uniform::Uniform::from(5..=100);
+
+    // add random string prefix and suffix to each combined name
+    let employee_data: Vec<Value> = combined
+        .enumerate()
+        .map(|(id, person)| {
+            let prefix_size = random_size.sample(&mut rng);
+            let suffix_size = random_size.sample(&mut rng);
+            let prefix: String = (0..prefix_size)
+                .map(|_| rng.sample(chars) as char)
+                .collect();
+            let suffix: String = (0..suffix_size)
+                .map(|_| rng.sample(chars) as char)
+                .collect();
+            let full_name = format!("{prefix} {person} {suffix}");
+            partiql_tuple![("id", id), ("name", full_name)].into()
+        })
+        .collect_vec();
+
+    employee_data
+}
+
+pub fn employee_data() -> MapBindings<Value> {
+    let data = partiql_tuple![("hr", partiql_tuple![("employees", Bag::from(employees()))])];
+
+    data.into()
+}