From ff3659adbad53a7a7175d1aef1a340c9c8c1bb80 Mon Sep 17 00:00:00 2001 From: Max Justus Spransy Date: Mon, 11 Nov 2024 13:38:37 -0800 Subject: [PATCH] support ClickHouse/DuckDB join variants https://clickhouse.com/docs/en/sql-reference/statements/select/join https://duckdb.org/docs/sql/query_syntax/from.html --- src/lib.rs | 28 +++++++++++++++++++++ src/tokenizer.rs | 65 ++++++++++++++++++++++++++++++++++++++---------- 2 files changed, 80 insertions(+), 13 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index 5d7d2da..76451a2 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -320,6 +320,34 @@ mod tests { assert_eq!(format(input, &QueryParams::None, &options), expected); } + #[test] + fn it_formats_select_query_with_non_standard_join() { + let input = indoc!( + " + SELECT customer_id.from, COUNT(order_id) AS total FROM customers + INNER ANY JOIN orders ON customers.customer_id = orders.customer_id + LEFT + SEMI JOIN foo ON foo.id = customers.id + PASTE + JOIN bar + ;" + ); + let options = FormatOptions::default(); + let expected = indoc!( + " + SELECT + customer_id.from, + COUNT(order_id) AS total + FROM + customers + INNER ANY JOIN orders ON customers.customer_id = orders.customer_id + LEFT SEMI JOIN foo ON foo.id = customers.id + PASTE JOIN bar;" + ); + + assert_eq!(format(input, &QueryParams::None, &options), expected); + } + #[test] fn it_formats_select_query_with_different_comments() { let input = indoc!( diff --git a/src/tokenizer.rs b/src/tokenizer.rs index 0d2ef78..25d4a33 100644 --- a/src/tokenizer.rs +++ b/src/tokenizer.rs @@ -580,24 +580,63 @@ fn get_newline_reserved_token<'a>( last_reserved_token: Option>, ) -> impl FnMut(&'a str) -> IResult<&'a str, Token<'a>> { move |input: &'a str| { - let uc_input = get_uc_words(input, 3); - let result: IResult<&str, &str> = alt(( - terminated(tag("AND"), end_of_word), - terminated(tag("CROSS APPLY"), end_of_word), - terminated(tag("CROSS JOIN"), end_of_word), - terminated(tag("ELSE"), end_of_word), - terminated(tag("INNER JOIN"), end_of_word), + let uc_input: String = get_uc_words(input, 3); + + // We have to break up the alternatives into multiple subsets + // to avoid exceeding the alt() 21 element limit. + + // Standard SQL joins + let standard_joins = alt(( terminated(tag("JOIN"), end_of_word), + terminated(tag("INNER JOIN"), end_of_word), terminated(tag("LEFT JOIN"), end_of_word), - terminated(tag("LEFT OUTER JOIN"), end_of_word), - terminated(tag("OR"), end_of_word), - terminated(tag("OUTER APPLY"), end_of_word), - terminated(tag("OUTER JOIN"), end_of_word), terminated(tag("RIGHT JOIN"), end_of_word), + terminated(tag("FULL JOIN"), end_of_word), + terminated(tag("CROSS JOIN"), end_of_word), + terminated(tag("LEFT OUTER JOIN"), end_of_word), terminated(tag("RIGHT OUTER JOIN"), end_of_word), - terminated(tag("WHEN"), end_of_word), + terminated(tag("FULL OUTER JOIN"), end_of_word), + )); + + // Warehouse-specific ANY/SEMI/ANTI joins + let specific_joins = alt(( + terminated(tag("INNER ANY JOIN"), end_of_word), + terminated(tag("LEFT ANY JOIN"), end_of_word), + terminated(tag("RIGHT ANY JOIN"), end_of_word), + terminated(tag("ANY JOIN"), end_of_word), + terminated(tag("SEMI JOIN"), end_of_word), + terminated(tag("LEFT SEMI JOIN"), end_of_word), + terminated(tag("RIGHT SEMI JOIN"), end_of_word), + terminated(tag("LEFT ANTI JOIN"), end_of_word), + terminated(tag("RIGHT ANTI JOIN"), end_of_word), + )); + + // Special joins and GLOBAL variants + let special_joins = alt(( + terminated(tag("ASOF JOIN"), end_of_word), + terminated(tag("LEFT ASOF JOIN"), end_of_word), + terminated(tag("PASTE JOIN"), end_of_word), + terminated(tag("GLOBAL INNER JOIN"), end_of_word), + terminated(tag("GLOBAL LEFT JOIN"), end_of_word), + terminated(tag("GLOBAL RIGHT JOIN"), end_of_word), + terminated(tag("GLOBAL FULL JOIN"), end_of_word), + )); + + // Legacy and logical operators + let operators = alt(( + terminated(tag("CROSS APPLY"), end_of_word), + terminated(tag("OUTER APPLY"), end_of_word), + terminated(tag("AND"), end_of_word), + terminated(tag("OR"), end_of_word), terminated(tag("XOR"), end_of_word), - ))(&uc_input); + terminated(tag("WHEN"), end_of_word), + terminated(tag("ELSE"), end_of_word), + )); + + // Combine all parsers + let result: IResult<&str, &str> = + alt((standard_joins, specific_joins, special_joins, operators))(&uc_input); + if let Ok((_, token)) = result { let final_word = token.split(' ').last().unwrap(); let input_end_pos =