From 203f77cf68404c64702dabc6c7171b4dbcc99ede Mon Sep 17 00:00:00 2001 From: Josh Pschorr <joshps@amazon.com> Date: Fri, 10 Feb 2023 12:56:06 -0800 Subject: [PATCH] Add `iai` versions of the multi-like evaluation benchmarks --- partiql/Cargo.toml | 6 + partiql/benches/bench_eval_multi_like.rs | 313 +------------------ partiql/benches/bench_eval_multi_like_iai.rs | 129 ++++++++ partiql/benches/multi_like_data.rs | 313 +++++++++++++++++++ 4 files changed, 451 insertions(+), 310 deletions(-) create mode 100644 partiql/benches/bench_eval_multi_like_iai.rs create mode 100644 partiql/benches/multi_like_data.rs diff --git a/partiql/Cargo.toml b/partiql/Cargo.toml index ec217ead..9f142d64 100644 --- a/partiql/Cargo.toml +++ b/partiql/Cargo.toml @@ -32,8 +32,14 @@ partiql-eval = { path = "../partiql-eval" } itertools = "0.10" criterion = "0.4" +iai = { git = "https://github.com/bheisler/iai" } rand = "0.8" +once_cell = "1.17" [[bench]] name = "bench_eval_multi_like" harness = false + +[[bench]] +name = "bench_eval_multi_like_iai" +harness = false diff --git a/partiql/benches/bench_eval_multi_like.rs b/partiql/benches/bench_eval_multi_like.rs index 385569dc..35e4cfcb 100644 --- a/partiql/benches/bench_eval_multi_like.rs +++ b/partiql/benches/bench_eval_multi_like.rs @@ -9,6 +9,7 @@ use partiql_eval::eval::EvalPlan; use partiql_eval::plan::EvaluatorPlanner; use partiql_logical::{BindingsOp, LogicalPlan}; +use crate::multi_like_data::{employee_data, QUERY_1, QUERY_15, QUERY_30}; use partiql_parser::{Parser, ParserResult}; use partiql_value::{partiql_tuple, Bag, Tuple, Value}; @@ -21,315 +22,7 @@ use partiql_value::{partiql_tuple, Bag, Tuple, Value}; // of queries that filter against 1, 15, or 30 `OR`ed `LIKE` expressions // over 10201 rows of tuples containing an id and a string -fn employee_data() -> Vec<Value> { - let name1 = vec![ - "Bob", - "Madden", - "Brycen", - "Bryanna", - "Zayne", - "Jocelynn", - "Breanna", - "Margaret", - "Jasmine", - "Kenyon", - "Aryanna", - "Zackery", - "Jorden", - "Malia", - "Raven", - "Neveah", - "Finley", - "Austin", - "Jaxson", - "Tobias", - "Dominique", - "Devan", - "Colby", - "Tanner", - "Mckenna", - "Kristina", - "Cristal", - "River", - "Taliyah", - "Abagail", - "Spencer", - "Gage", - "Ronnie", - "Amari", - "Jabari", - "Alanna", - "Anderson", - "Saniya", - "Baylee", - "Elisa", - "Savannah", - "Jakobe", - "Sandra", - "Simone", - "Frank", - "Braedon", - "Clark", - "Francisco", - "Roman", - "Matias", - "Messi", - "Elisha", - "Alexander", - "Kadence", - "Karsyn", - "Adonis", - "Ishaan", - "Trevon", - "Ryan", - "Jaelynn", - "Marilyn", - "Emma", - "Avah", - "Jordan", - "Riley", - "Amelie", - "Denisse", - "Darion", - "Lydia", - "Marley", - "Brogan", - "Trace", - "Maeve", - "Elijah", - "Kareem", - "Erick", - "Hope", - "Elisabeth", - "Antwan", - "Francesca", - "Layla", - "Jase", - "Angel", - "Addyson", - "Mckinley", - "Julianna", - "Winston", - "Royce", - "Paola", - "Issac", - "Zachary", - "Niko", - "Shania", - "Colin", - "Jesse", - "Pedro", - "Cheyenne", - "Ashley", - "Karli", - "Bianca", - "Mario", - ]; - let name2 = vec![ - "Smith", - "Oconnell", - "Whitehead", - "Carrillo", - "Parrish", - "Monroe", - "Summers", - "Hurst", - "Durham", - "Hardin", - "Hunt", - "Mitchell", - "Pennington", - "Woodward", - "Franklin", - "Martinez", - "Shepard", - "Khan", - "Mcfarland", - "Frey", - "Mckenzie", - "Blair", - "Mercer", - "Callahan", - "Cameron", - "Gilmore", - "Bowers", - "Donovan", - "Meyers", - "Horne", - "Rice", - "Castillo", - "Cain", - "Dickson", - "Valenzuela", - "Silva", - "Prince", - "Vance", - "Berry", - "Coffey", - "Young", - "Walker", - "Burch", - "Ross", - "Mejia", - "Zuniga", - "Haney", - "Jordan", - "Love", - "Larsen", - "Bowman", - "Werner", - "Greer", - "Krause", - "Bishop", - "Day", - "Luna", - "Patrick", - "Adkins", - "Benson", - "Mcconnell", - "Sanchez", - "Villa", - "Wu", - "Duke", - "Fisher", - "Hess", - "Lawrence", - "Perry", - "Hardy", - "Wyatt", - "Mcknight", - "Thomas", - "Trevino", - "Flowers", - "Cisneros", - "Coleman", - "Sanders", - "Good", - "Newton", - "Carpenter", - "Garza", - "Barber", - "Swanson", - "Owen", - "Anderson", - "Bright", - "Beck", - "Lawson", - "Jones", - "Davila", - "Porter", - "Dougherty", - "Stevenson", - "Malone", - "Garrison", - "Bates", - "Wheeler", - "Petty", - "Rojas", - "Townsend", - ]; - - // cartesian product of name1 x name2 (e.g., "Bob Smith", ... "Mario Townsend") - let combined = name1 - .iter() - .cartesian_product(name2.iter()) - .map(|(n1, n2)| format!("{n1} {n2}")); - - // seed the rng with a known value to assure same data across runs - let mut rng = rand::rngs::StdRng::from_seed([42; 32]); - use rand::distributions::Distribution; - let chars = rand::distributions::Alphanumeric; - let random_size = rand::distributions::uniform::Uniform::from(5..=100); - - // add random string prefix and suffix to each combined name - let employee_data: Vec<Value> = combined - .enumerate() - .map(|(id, person)| { - let prefix_size = random_size.sample(&mut rng); - let suffix_size = random_size.sample(&mut rng); - let prefix: String = (0..prefix_size) - .map(|_| rng.sample(chars) as char) - .collect(); - let suffix: String = (0..suffix_size) - .map(|_| rng.sample(chars) as char) - .collect(); - let full_name = format!("{prefix} {person} {suffix}"); - partiql_tuple![("id", id), ("name", full_name)].into() - }) - .collect_vec(); - - employee_data -} - -fn data() -> MapBindings<Value> { - let data = partiql_tuple![( - "hr", - partiql_tuple![("employees", Bag::from(employee_data()))] - )]; - - data.into() -} - -const QUERY_1: &str = " - SELECT * - FROM hr.employees as emp - WHERE lower(emp.name) LIKE '%bob smith%' - "; - -const QUERY_15: &str = " - SELECT * - FROM hr.employees as emp - WHERE lower(emp.name) LIKE '%bob smith%' - OR lower(emp.name) LIKE '%gage swanson%' - OR lower(emp.name) LIKE '%riley perry%' - OR lower(emp.name) LIKE '%sandra woodward%' - OR lower(emp.name) LIKE '%abagail oconnell%' - OR lower(emp.name) LIKE '%amari duke%' - OR lower(emp.name) LIKE '%elisha wyatt%' - OR lower(emp.name) LIKE '%aryanna hess%' - OR lower(emp.name) LIKE '%bryanna jones%' - OR lower(emp.name) LIKE '%trace gilmore%' - OR lower(emp.name) LIKE '%antwan stevenson%' - OR lower(emp.name) LIKE '%julianna callahan%' - OR lower(emp.name) LIKE '%jaelynn trevino%' - OR lower(emp.name) LIKE '%kadence bates%' - OR lower(emp.name) LIKE '%jakobe townsend%' - "; - -const QUERY_30: &str = " - SELECT * - FROM hr.employees as emp - WHERE lower(emp.name) LIKE '%bob smith%' - OR lower(emp.name) LIKE '%gage swanson%' - OR lower(emp.name) LIKE '%riley perry%' - OR lower(emp.name) LIKE '%sandra woodward%' - OR lower(emp.name) LIKE '%abagail oconnell%' - OR lower(emp.name) LIKE '%amari duke%' - OR lower(emp.name) LIKE '%elisha wyatt%' - OR lower(emp.name) LIKE '%aryanna hess%' - OR lower(emp.name) LIKE '%bryanna jones%' - OR lower(emp.name) LIKE '%trace gilmore%' - OR lower(emp.name) LIKE '%antwan stevenson%' - OR lower(emp.name) LIKE '%julianna callahan%' - OR lower(emp.name) LIKE '%jaelynn trevino%' - OR lower(emp.name) LIKE '%kadence bates%' - OR lower(emp.name) LIKE '%jakobe townsend%' - OR lower(emp.name) LIKE '%austin pennington%' - OR lower(emp.name) LIKE '%colby woodward%' - OR lower(emp.name) LIKE '%brycen blair%' - OR lower(emp.name) LIKE '%cristal mercer%' - OR lower(emp.name) LIKE '%river gilmore%' - OR lower(emp.name) LIKE '%saniya bowers%' - OR lower(emp.name) LIKE '%braedon ross%' - OR lower(emp.name) LIKE '%clark mejia%' - OR lower(emp.name) LIKE '%ryan day%' - OR lower(emp.name) LIKE '%marilyn luna%' - OR lower(emp.name) LIKE '%avah sanchez%' - OR lower(emp.name) LIKE '%amelie wu%' - OR lower(emp.name) LIKE '%paola duke%' - OR lower(emp.name) LIKE '%jesse trevino%' - OR lower(emp.name) LIKE '%bianca cisneros%' - "; +mod multi_like_data; #[inline] fn parse(text: &str) -> ParserResult { @@ -413,7 +106,7 @@ fn bench_eval(c: &mut Criterion) { let compiled_15 = compile(&parse(QUERY_15).unwrap()); let compiled_30 = compile(&parse(QUERY_30).unwrap()); - let bindings = data(); + let bindings = employee_data(); c.bench_function("eval-1", |b| { b.iter(|| { diff --git a/partiql/benches/bench_eval_multi_like_iai.rs b/partiql/benches/bench_eval_multi_like_iai.rs new file mode 100644 index 00000000..8e4eeaa3 --- /dev/null +++ b/partiql/benches/bench_eval_multi_like_iai.rs @@ -0,0 +1,129 @@ +use std::time::Duration; + +use criterion::{black_box, criterion_group, criterion_main, Criterion}; +use itertools::Itertools; +use once_cell::sync::Lazy; +use rand::{Rng, SeedableRng}; + +use partiql_eval::env::basic::MapBindings; +use partiql_eval::eval::EvalPlan; +use partiql_eval::plan::EvaluatorPlanner; +use partiql_logical::{BindingsOp, LogicalPlan}; + +use crate::multi_like_data::{employee_data, QUERY_1, QUERY_15, QUERY_30}; +use partiql_parser::{Parsed, Parser, ParserResult}; +use partiql_value::{partiql_tuple, Bag, Tuple, Value}; + +// Benchmarks: +// - parsing, +// - compiling +// - planning +// - evaluation +// +// of queries that filter against 1, 15, or 30 `OR`ed `LIKE` expressions +// over 10201 rows of tuples containing an id and a string + +mod multi_like_data; + +#[inline] +fn parse(text: &str) -> ParserResult { + Parser::default().parse(text) +} +#[inline] +fn compile(parsed: &partiql_parser::Parsed) -> LogicalPlan<BindingsOp> { + partiql_logical_planner::lower(parsed) +} +#[inline] +fn plan(logical: &LogicalPlan<BindingsOp>) -> EvalPlan { + EvaluatorPlanner::default().compile(logical) +} +#[inline] +pub(crate) fn evaluate(mut eval: EvalPlan, bindings: MapBindings<Value>) -> Value { + if let Ok(out) = eval.execute_mut(bindings) { + out.result + } else { + Value::Missing + } +} + +/// benchmark parsing of query that filters 1 `LIKE` expressions over 10201 rows of tuples containing an id and a string +fn bench_parse_1() -> ParserResult<'static> { + parse(black_box(QUERY_1)) +} +/// benchmark parsing of query that filters 15 `OR`ed `LIKE` expressions over 10201 rows of tuples containing an id and a string +fn bench_parse_15() -> ParserResult<'static> { + parse(black_box(QUERY_15)) +} +/// benchmark parsing of query that filters 30 `OR`ed `LIKE` expressions over 10201 rows of tuples containing an id and a string +fn bench_parse_30() -> ParserResult<'static> { + parse(black_box(QUERY_30)) +} + +//pub(crate) static BUILT_INS: Lazy<FnExprSet<'static>> = Lazy::new(built_ins); + +static PARSED_1: Lazy<Parsed<'static>> = Lazy::new(|| parse(QUERY_1).unwrap()); +static PARSED_15: Lazy<Parsed<'static>> = Lazy::new(|| parse(QUERY_15).unwrap()); +static PARSED_30: Lazy<Parsed<'static>> = Lazy::new(|| parse(QUERY_30).unwrap()); + +/// benchmark compiling of query that filters 1 `LIKE` expressions over 10201 rows of tuples containing an id and a string +fn bench_compile_1() -> LogicalPlan<BindingsOp> { + compile(black_box(&PARSED_1)) +} +/// benchmark compiling of query that filters 15 `OR`ed `LIKE` expressions over 10201 rows of tuples containing an id and a string +fn bench_compile_15() -> LogicalPlan<BindingsOp> { + compile(black_box(&PARSED_15)) +} +/// benchmark compiling of query that filters 30 `OR`ed `LIKE` expressions over 10201 rows of tuples containing an id and a string +fn bench_compile_30() -> LogicalPlan<BindingsOp> { + compile(black_box(&PARSED_30)) +} + +static COMPILED_1: Lazy<LogicalPlan<BindingsOp>> = Lazy::new(|| compile(&PARSED_1)); +static COMPILED_15: Lazy<LogicalPlan<BindingsOp>> = Lazy::new(|| compile(&PARSED_15)); +static COMPILED_30: Lazy<LogicalPlan<BindingsOp>> = Lazy::new(|| compile(&PARSED_30)); + +/// benchmark planning of query that filters 1 `LIKE` expressions over 10201 rows of tuples containing an id and a string +fn bench_plan_1() -> EvalPlan { + plan(black_box(&COMPILED_1)) +} +/// benchmark planning of query that filters 15 `OR`ed `LIKE` expressions over 10201 rows of tuples containing an id and a string +fn bench_plan_15() -> EvalPlan { + plan(black_box(&COMPILED_15)) +} +/// benchmark planning of query that filters 30 `OR`ed `LIKE` expressions over 10201 rows of tuples containing an id and a string +fn bench_plan_30() -> EvalPlan { + plan(black_box(&COMPILED_30)) +} +/// benchmark evaluating of query that filters 1 `LIKE` expressions over 10201 rows of tuples containing an id and a string +fn bench_eval_1() -> Value { + let bindings = employee_data(); + let evaluator = plan(black_box(&COMPILED_1)); + evaluate(evaluator, bindings) +} +/// benchmark evaluating of query that filters 15 `OR`ed `LIKE` expressions over 10201 rows of tuples containing an id and a string +fn bench_eval_15() -> Value { + let bindings = employee_data(); + let evaluator = plan(black_box(&COMPILED_15)); + evaluate(evaluator, bindings) +} +/// benchmark evaluating of query that filters 30 `OR`ed `LIKE` expressions over 10201 rows of tuples containing an id and a string +fn bench_eval_30() -> Value { + let bindings = employee_data(); + let evaluator = plan(black_box(&COMPILED_30)); + evaluate(evaluator, bindings) +} + +iai::main!( + bench_parse_1, + bench_parse_15, + bench_parse_30, + bench_compile_1, + bench_compile_15, + bench_compile_30, + bench_plan_1, + bench_plan_15, + bench_plan_30, + bench_eval_1, + bench_eval_15, + bench_eval_30, +); diff --git a/partiql/benches/multi_like_data.rs b/partiql/benches/multi_like_data.rs new file mode 100644 index 00000000..7bfceb34 --- /dev/null +++ b/partiql/benches/multi_like_data.rs @@ -0,0 +1,313 @@ +use itertools::Itertools; +use partiql_eval::env::basic::MapBindings; +use partiql_value::{partiql_tuple, Bag, Tuple, Value}; +use rand::{Rng, SeedableRng}; + +pub const QUERY_1: &str = " + SELECT * + FROM hr.employees as emp + WHERE lower(emp.name) LIKE '%bob smith%' + "; + +pub const QUERY_15: &str = " + SELECT * + FROM hr.employees as emp + WHERE lower(emp.name) LIKE '%bob smith%' + OR lower(emp.name) LIKE '%gage swanson%' + OR lower(emp.name) LIKE '%riley perry%' + OR lower(emp.name) LIKE '%sandra woodward%' + OR lower(emp.name) LIKE '%abagail oconnell%' + OR lower(emp.name) LIKE '%amari duke%' + OR lower(emp.name) LIKE '%elisha wyatt%' + OR lower(emp.name) LIKE '%aryanna hess%' + OR lower(emp.name) LIKE '%bryanna jones%' + OR lower(emp.name) LIKE '%trace gilmore%' + OR lower(emp.name) LIKE '%antwan stevenson%' + OR lower(emp.name) LIKE '%julianna callahan%' + OR lower(emp.name) LIKE '%jaelynn trevino%' + OR lower(emp.name) LIKE '%kadence bates%' + OR lower(emp.name) LIKE '%jakobe townsend%' + "; + +pub const QUERY_30: &str = " + SELECT * + FROM hr.employees as emp + WHERE lower(emp.name) LIKE '%bob smith%' + OR lower(emp.name) LIKE '%gage swanson%' + OR lower(emp.name) LIKE '%riley perry%' + OR lower(emp.name) LIKE '%sandra woodward%' + OR lower(emp.name) LIKE '%abagail oconnell%' + OR lower(emp.name) LIKE '%amari duke%' + OR lower(emp.name) LIKE '%elisha wyatt%' + OR lower(emp.name) LIKE '%aryanna hess%' + OR lower(emp.name) LIKE '%bryanna jones%' + OR lower(emp.name) LIKE '%trace gilmore%' + OR lower(emp.name) LIKE '%antwan stevenson%' + OR lower(emp.name) LIKE '%julianna callahan%' + OR lower(emp.name) LIKE '%jaelynn trevino%' + OR lower(emp.name) LIKE '%kadence bates%' + OR lower(emp.name) LIKE '%jakobe townsend%' + OR lower(emp.name) LIKE '%austin pennington%' + OR lower(emp.name) LIKE '%colby woodward%' + OR lower(emp.name) LIKE '%brycen blair%' + OR lower(emp.name) LIKE '%cristal mercer%' + OR lower(emp.name) LIKE '%river gilmore%' + OR lower(emp.name) LIKE '%saniya bowers%' + OR lower(emp.name) LIKE '%braedon ross%' + OR lower(emp.name) LIKE '%clark mejia%' + OR lower(emp.name) LIKE '%ryan day%' + OR lower(emp.name) LIKE '%marilyn luna%' + OR lower(emp.name) LIKE '%avah sanchez%' + OR lower(emp.name) LIKE '%amelie wu%' + OR lower(emp.name) LIKE '%paola duke%' + OR lower(emp.name) LIKE '%jesse trevino%' + OR lower(emp.name) LIKE '%bianca cisneros%' + "; + +/// Return a sequence of 10201 `Value`s where each is a `Tuple` of the form +/// `{id: <num>, name: "<random prefix> <name1> <name2> <random suffix>"}` +pub fn employees() -> Vec<Value> { + let name1 = vec![ + "Bob", + "Madden", + "Brycen", + "Bryanna", + "Zayne", + "Jocelynn", + "Breanna", + "Margaret", + "Jasmine", + "Kenyon", + "Aryanna", + "Zackery", + "Jorden", + "Malia", + "Raven", + "Neveah", + "Finley", + "Austin", + "Jaxson", + "Tobias", + "Dominique", + "Devan", + "Colby", + "Tanner", + "Mckenna", + "Kristina", + "Cristal", + "River", + "Taliyah", + "Abagail", + "Spencer", + "Gage", + "Ronnie", + "Amari", + "Jabari", + "Alanna", + "Anderson", + "Saniya", + "Baylee", + "Elisa", + "Savannah", + "Jakobe", + "Sandra", + "Simone", + "Frank", + "Braedon", + "Clark", + "Francisco", + "Roman", + "Matias", + "Messi", + "Elisha", + "Alexander", + "Kadence", + "Karsyn", + "Adonis", + "Ishaan", + "Trevon", + "Ryan", + "Jaelynn", + "Marilyn", + "Emma", + "Avah", + "Jordan", + "Riley", + "Amelie", + "Denisse", + "Darion", + "Lydia", + "Marley", + "Brogan", + "Trace", + "Maeve", + "Elijah", + "Kareem", + "Erick", + "Hope", + "Elisabeth", + "Antwan", + "Francesca", + "Layla", + "Jase", + "Angel", + "Addyson", + "Mckinley", + "Julianna", + "Winston", + "Royce", + "Paola", + "Issac", + "Zachary", + "Niko", + "Shania", + "Colin", + "Jesse", + "Pedro", + "Cheyenne", + "Ashley", + "Karli", + "Bianca", + "Mario", + ]; + let name2 = vec![ + "Smith", + "Oconnell", + "Whitehead", + "Carrillo", + "Parrish", + "Monroe", + "Summers", + "Hurst", + "Durham", + "Hardin", + "Hunt", + "Mitchell", + "Pennington", + "Woodward", + "Franklin", + "Martinez", + "Shepard", + "Khan", + "Mcfarland", + "Frey", + "Mckenzie", + "Blair", + "Mercer", + "Callahan", + "Cameron", + "Gilmore", + "Bowers", + "Donovan", + "Meyers", + "Horne", + "Rice", + "Castillo", + "Cain", + "Dickson", + "Valenzuela", + "Silva", + "Prince", + "Vance", + "Berry", + "Coffey", + "Young", + "Walker", + "Burch", + "Ross", + "Mejia", + "Zuniga", + "Haney", + "Jordan", + "Love", + "Larsen", + "Bowman", + "Werner", + "Greer", + "Krause", + "Bishop", + "Day", + "Luna", + "Patrick", + "Adkins", + "Benson", + "Mcconnell", + "Sanchez", + "Villa", + "Wu", + "Duke", + "Fisher", + "Hess", + "Lawrence", + "Perry", + "Hardy", + "Wyatt", + "Mcknight", + "Thomas", + "Trevino", + "Flowers", + "Cisneros", + "Coleman", + "Sanders", + "Good", + "Newton", + "Carpenter", + "Garza", + "Barber", + "Swanson", + "Owen", + "Anderson", + "Bright", + "Beck", + "Lawson", + "Jones", + "Davila", + "Porter", + "Dougherty", + "Stevenson", + "Malone", + "Garrison", + "Bates", + "Wheeler", + "Petty", + "Rojas", + "Townsend", + ]; + + // cartesian product of name1 x name2 (e.g., "Bob Smith", ... "Mario Townsend") + let combined = name1 + .iter() + .cartesian_product(name2.iter()) + .map(|(n1, n2)| format!("{n1} {n2}")); + + // seed the rng with a known value to assure same data across runs + let mut rng = rand::rngs::StdRng::from_seed([42; 32]); + use rand::distributions::Distribution; + let chars = rand::distributions::Alphanumeric; + let random_size = rand::distributions::uniform::Uniform::from(5..=100); + + // add random string prefix and suffix to each combined name + let employee_data: Vec<Value> = combined + .enumerate() + .map(|(id, person)| { + let prefix_size = random_size.sample(&mut rng); + let suffix_size = random_size.sample(&mut rng); + let prefix: String = (0..prefix_size) + .map(|_| rng.sample(chars) as char) + .collect(); + let suffix: String = (0..suffix_size) + .map(|_| rng.sample(chars) as char) + .collect(); + let full_name = format!("{prefix} {person} {suffix}"); + partiql_tuple![("id", id), ("name", full_name)].into() + }) + .collect_vec(); + + employee_data +} + +pub fn employee_data() -> MapBindings<Value> { + let data = partiql_tuple![("hr", partiql_tuple![("employees", Bag::from(employees()))])]; + + data.into() +}