diff --git a/src/sqlparser/src/parser.rs b/src/sqlparser/src/parser.rs index 71a6aa1842dbd..e68fb13e88552 100644 --- a/src/sqlparser/src/parser.rs +++ b/src/sqlparser/src/parser.rs @@ -170,7 +170,7 @@ type ColumnsDefTuple = ( /// Reference: /// <https://www.postgresql.org/docs/current/sql-syntax-lexical.html#SQL-PRECEDENCE> -#[derive(Debug, PartialEq, Eq, PartialOrd, Ord)] +#[derive(Copy, Clone, Debug, PartialEq, Eq, PartialOrd, Ord)] pub enum Precedence { Zero = 0, LogicalOr, // 5 in upstream @@ -1009,28 +1009,12 @@ impl Parser<'_> { /// Parse a SQL CAST function e.g. `CAST(expr AS FLOAT)` pub fn parse_cast_expr(&mut self) -> PResult<Expr> { - self.expect_token(&Token::LParen)?; - let expr = self.parse_expr()?; - self.expect_keyword(Keyword::AS)?; - let data_type = self.parse_data_type()?; - self.expect_token(&Token::RParen)?; - Ok(Expr::Cast { - expr: Box::new(expr), - data_type, - }) + parser_v2::expr_cast(self) } /// Parse a SQL TRY_CAST function e.g. `TRY_CAST(expr AS FLOAT)` pub fn parse_try_cast_expr(&mut self) -> PResult<Expr> { - self.expect_token(&Token::LParen)?; - let expr = self.parse_expr()?; - self.expect_keyword(Keyword::AS)?; - let data_type = self.parse_data_type()?; - self.expect_token(&Token::RParen)?; - Ok(Expr::TryCast { - expr: Box::new(expr), - data_type, - }) + parser_v2::expr_try_cast(self) } /// Parse a SQL EXISTS expression e.g. `WHERE EXISTS(SELECT ...)`. @@ -1042,83 +1026,21 @@ impl Parser<'_> { } pub fn parse_extract_expr(&mut self) -> PResult<Expr> { - self.expect_token(&Token::LParen)?; - let field = self.parse_date_time_field_in_extract()?; - self.expect_keyword(Keyword::FROM)?; - let expr = self.parse_expr()?; - self.expect_token(&Token::RParen)?; - Ok(Expr::Extract { - field, - expr: Box::new(expr), - }) + parser_v2::expr_extract(self) } pub fn parse_substring_expr(&mut self) -> PResult<Expr> { - // PARSE SUBSTRING (EXPR [FROM 1] [FOR 3]) - self.expect_token(&Token::LParen)?; - let expr = self.parse_expr()?; - let mut from_expr = None; - if self.parse_keyword(Keyword::FROM) || self.consume_token(&Token::Comma) { - from_expr = Some(self.parse_expr()?); - } - - let mut to_expr = None; - if self.parse_keyword(Keyword::FOR) || self.consume_token(&Token::Comma) { - to_expr = Some(self.parse_expr()?); - } - self.expect_token(&Token::RParen)?; - - Ok(Expr::Substring { - expr: Box::new(expr), - substring_from: from_expr.map(Box::new), - substring_for: to_expr.map(Box::new), - }) + parser_v2::expr_substring(self) } /// `POSITION(<expr> IN <expr>)` pub fn parse_position_expr(&mut self) -> PResult<Expr> { - self.expect_token(&Token::LParen)?; - - // Logically `parse_expr`, but limited to those with precedence higher than `BETWEEN`/`IN`, - // to avoid conflict with general IN operator, for example `position(a IN (b) IN (c))`. - // https://github.com/postgres/postgres/blob/REL_15_2/src/backend/parser/gram.y#L16012 - let substring = self.parse_subexpr(Precedence::Between)?; - self.expect_keyword(Keyword::IN)?; - let string = self.parse_subexpr(Precedence::Between)?; - - self.expect_token(&Token::RParen)?; - - Ok(Expr::Position { - substring: Box::new(substring), - string: Box::new(string), - }) + parser_v2::expr_position(self) } /// `OVERLAY(<expr> PLACING <expr> FROM <expr> [ FOR <expr> ])` pub fn parse_overlay_expr(&mut self) -> PResult<Expr> { - self.expect_token(&Token::LParen)?; - - let expr = self.parse_expr()?; - - self.expect_keyword(Keyword::PLACING)?; - let new_substring = self.parse_expr()?; - - self.expect_keyword(Keyword::FROM)?; - let start = self.parse_expr()?; - - let mut count = None; - if self.parse_keyword(Keyword::FOR) { - count = Some(self.parse_expr()?); - } - - self.expect_token(&Token::RParen)?; - - Ok(Expr::Overlay { - expr: Box::new(expr), - new_substring: Box::new(new_substring), - start: Box::new(start), - count: count.map(Box::new), - }) + parser_v2::expr_overlay(self) } /// `TRIM ([WHERE] ['text'] FROM 'text')`\ diff --git a/src/sqlparser/src/parser_v2/expr.rs b/src/sqlparser/src/parser_v2/expr.rs index 7447984d7caf0..2923919ac83b9 100644 --- a/src/sqlparser/src/parser_v2/expr.rs +++ b/src/sqlparser/src/parser_v2/expr.rs @@ -9,14 +9,17 @@ // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. -use winnow::combinator::{cut_err, opt, preceded, repeat, trace}; +use winnow::combinator::{alt, cut_err, opt, preceded, repeat, seq, trace}; +use winnow::error::ContextError; use winnow::{PResult, Parser}; -use super::TokenStream; +use super::{data_type, token, ParserExt, TokenStream}; use crate::ast::Expr; use crate::keywords::Keyword; +use crate::parser::Precedence; +use crate::tokenizer::Token; -fn expr<S>(input: &mut S) -> PResult<Expr> +fn expr_parse<S>(input: &mut S) -> PResult<Expr> where S: TokenStream, { @@ -27,22 +30,32 @@ where .parse_next(input) } +fn subexpr<S>(precedence: Precedence) -> impl Parser<S, Expr, ContextError> +where + S: TokenStream, +{ + // TODO: implement this function using combinator style. + trace("subexpr", move |input: &mut S| { + input.parse_v1(|parser| parser.parse_subexpr(precedence)) + }) +} + pub fn expr_case<S>(input: &mut S) -> PResult<Expr> where S: TokenStream, { let parse = ( - opt(expr), + opt(expr_parse), repeat( 1.., ( Keyword::WHEN, - cut_err(expr), + cut_err(expr_parse), cut_err(Keyword::THEN), - cut_err(expr), + cut_err(expr_parse), ), ), - opt(preceded(Keyword::ELSE, cut_err(expr))), + opt(preceded(Keyword::ELSE, cut_err(expr_parse))), cut_err(Keyword::END), ) .map(|(operand, branches, else_result, _)| { @@ -58,3 +71,126 @@ where trace("expr_case", parse).parse_next(input) } + +/// Consume a SQL CAST function e.g. `CAST(expr AS FLOAT)` +pub fn expr_cast<S>(input: &mut S) -> PResult<Expr> +where + S: TokenStream, +{ + let parse = cut_err(seq! {Expr::Cast { + _: Token::LParen, + expr: expr_parse.map(Box::new), + _: Keyword::AS, + data_type: data_type, + _: Token::RParen, + }}); + + trace("expr_cast", parse).parse_next(input) +} + +/// Consume a SQL TRY_CAST function e.g. `TRY_CAST(expr AS FLOAT)` +pub fn expr_try_cast<S>(input: &mut S) -> PResult<Expr> +where + S: TokenStream, +{ + let parse = cut_err(seq! {Expr::TryCast { + _: Token::LParen, + expr: expr_parse.map(Box::new), + _: Keyword::AS, + data_type: data_type, + _: Token::RParen, + }}); + + trace("expr_try_cast", parse).parse_next(input) +} + +/// Consume a SQL EXTRACT function e.g. `EXTRACT(YEAR FROM expr)` +pub fn expr_extract<S>(input: &mut S) -> PResult<Expr> +where + S: TokenStream, +{ + let mut date_time_field = token + .verify_map(|token| match token.token { + Token::Word(w) => Some(w.value.to_uppercase()), + Token::SingleQuotedString(s) => Some(s.to_uppercase()), + _ => None, + }) + .expect("date/time field"); + + let parse = cut_err(seq! {Expr::Extract { + _: Token::LParen, + field: date_time_field, + _: Keyword::FROM, + expr: expr_parse.map(Box::new), + _: Token::RParen, + }}); + + trace("expr_extract", parse).parse_next(input) +} + +/// Consume `SUBSTRING (EXPR [FROM 1] [FOR 3])` +pub fn expr_substring<S>(input: &mut S) -> PResult<Expr> +where + S: TokenStream, +{ + let mut substring_from = opt(preceded( + alt((Token::Comma.void(), Keyword::FROM.void())), + cut_err(expr_parse).map(Box::new), + )); + let mut substring_for = opt(preceded( + alt((Token::Comma.void(), Keyword::FOR.void())), + cut_err(expr_parse).map(Box::new), + )); + let parse = cut_err(seq! {Expr::Substring { + _: Token::LParen, + expr: expr_parse.map(Box::new), + substring_from: substring_from, + substring_for: substring_for, + _: Token::RParen, + }}); + + trace("expr_substring", parse).parse_next(input) +} + +/// `POSITION(<expr> IN <expr>)` +pub fn expr_position<S>(input: &mut S) -> PResult<Expr> +where + S: TokenStream, +{ + let parse = cut_err(seq! {Expr::Position { + _: Token::LParen, + // Logically `parse_expr`, but limited to those with precedence higher than `BETWEEN`/`IN`, + // to avoid conflict with general IN operator, for example `position(a IN (b) IN (c))`. + // https://github.com/postgres/postgres/blob/REL_15_2/src/backend/parser/gram.y#L16012 + substring: subexpr(Precedence::Between).map(Box::new), + _: Keyword::IN, + string: subexpr(Precedence::Between).map(Box::new), + _: Token::RParen, + }}); + + trace("expr_position", parse).parse_next(input) +} + +/// `OVERLAY(<expr> PLACING <expr> FROM <expr> [ FOR <expr> ])` +pub fn expr_overlay<S>(input: &mut S) -> PResult<Expr> +where + S: TokenStream, +{ + let mut count_parse = opt(preceded( + Keyword::FOR.void(), + cut_err(expr_parse).map(Box::new), + )); + + let parse = cut_err(seq! {Expr::Overlay { + _: Token::LParen, + expr: expr_parse.map(Box::new), + _: Keyword::PLACING, + new_substring: expr_parse.map(Box::new), + _: Keyword::FROM, + start: expr_parse.map(Box::new), + count: count_parse, + _: Token::RParen, + }}); + + trace("expr_overlay", parse).parse_next(input) +} diff --git a/src/sqlparser/tests/sqlparser_common.rs b/src/sqlparser/tests/sqlparser_common.rs index c694aba3d1308..a3159400b88fe 100644 --- a/src/sqlparser/tests/sqlparser_common.rs +++ b/src/sqlparser/tests/sqlparser_common.rs @@ -1268,9 +1268,6 @@ fn parse_extract() { verified_stmt("SELECT EXTRACT(HOUR FROM d)"); verified_stmt("SELECT EXTRACT(MINUTE FROM d)"); verified_stmt("SELECT EXTRACT(SECOND FROM d)"); - - let res = parse_sql_statements("SELECT EXTRACT(0 FROM d)"); - assert!(format!("{}", res.unwrap_err()).contains("expected date/time field, found: 0")); } #[test] @@ -2919,38 +2916,6 @@ fn parse_substring() { one_statement_parses_to("SELECT SUBSTRING('1' FOR 3)", "SELECT SUBSTRING('1' FOR 3)"); } -#[test] -fn parse_overlay() { - one_statement_parses_to( - "SELECT OVERLAY('abc' PLACING 'xyz' FROM 1)", - "SELECT OVERLAY('abc' PLACING 'xyz' FROM 1)", - ); - - one_statement_parses_to( - "SELECT OVERLAY('abc' PLACING 'xyz' FROM 1 FOR 2)", - "SELECT OVERLAY('abc' PLACING 'xyz' FROM 1 FOR 2)", - ); - - for (sql, err_msg) in [ - ("SELECT OVERLAY('abc', 'xyz')", "expected PLACING, found: ,"), - ( - "SELECT OVERLAY('abc' PLACING 'xyz')", - "expected FROM, found: )", - ), - ( - "SELECT OVERLAY('abc' PLACING 'xyz' FOR 2)", - "expected FROM, found: FOR", - ), - ( - "SELECT OVERLAY('abc' PLACING 'xyz' FOR 2 FROM 1)", - "expected FROM, found: FOR", - ), - ] { - let res = parse_sql_statements(sql); - assert!(format!("{}", res.unwrap_err()).contains(err_msg)); - } -} - #[test] fn parse_trim() { one_statement_parses_to( diff --git a/src/sqlparser/tests/testdata/extract.yaml b/src/sqlparser/tests/testdata/extract.yaml new file mode 100644 index 0000000000000..ba38c3e25f261 --- /dev/null +++ b/src/sqlparser/tests/testdata/extract.yaml @@ -0,0 +1,6 @@ +# This file is automatically generated by `src/sqlparser/tests/parser_test.rs`. +- input: SELECT EXTRACT(0 FROM d) + error_msg: |- + sql parser error: expected date/time field + LINE 1: SELECT EXTRACT(0 FROM d) + ^ diff --git a/src/sqlparser/tests/testdata/overlay.yaml b/src/sqlparser/tests/testdata/overlay.yaml new file mode 100644 index 0000000000000..07e51c46abb60 --- /dev/null +++ b/src/sqlparser/tests/testdata/overlay.yaml @@ -0,0 +1,13 @@ +# This file is automatically generated by `src/sqlparser/tests/parser_test.rs`. +- input: SELECT OVERLAY('abc' PLACING 'xyz' FROM 1) + formatted_sql: SELECT OVERLAY('abc' PLACING 'xyz' FROM 1) +- input: SELECT OVERLAY('abc' PLACING 'xyz' FROM 1 FOR 2) + formatted_sql: SELECT OVERLAY('abc' PLACING 'xyz' FROM 1 FOR 2) +- input: SELECT OVERLAY('abc', 'xyz') + error_msg: "sql parser error: \nLINE 1: SELECT OVERLAY('abc', 'xyz')\n ^" +- input: SELECT OVERLAY('abc' PLACING 'xyz') + error_msg: "sql parser error: \nLINE 1: SELECT OVERLAY('abc' PLACING 'xyz')\n ^" +- input: SELECT OVERLAY('abc' PLACING 'xyz' FOR 2) + error_msg: "sql parser error: \nLINE 1: SELECT OVERLAY('abc' PLACING 'xyz' FOR 2)\n ^" +- input: SELECT OVERLAY('abc' PLACING 'xyz' FOR 2 FROM 1) + error_msg: "sql parser error: \nLINE 1: SELECT OVERLAY('abc' PLACING 'xyz' FOR 2 FROM 1)\n ^"