Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

refactor(parser): use v2 parser for cast/extract/substring/position/overlay #17053

Merged
merged 7 commits into from
Jun 4, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
92 changes: 7 additions & 85 deletions src/sqlparser/src/parser.rs
Original file line number Diff line number Diff line change
Expand Up @@ -170,7 +170,7 @@ type ColumnsDefTuple = (

/// Reference:
/// <https://www.postgresql.org/docs/current/sql-syntax-lexical.html#SQL-PRECEDENCE>
#[derive(Debug, PartialEq, Eq, PartialOrd, Ord)]
#[derive(Copy, Clone, Debug, PartialEq, Eq, PartialOrd, Ord)]
pub enum Precedence {
Zero = 0,
LogicalOr, // 5 in upstream
Expand Down Expand Up @@ -1009,28 +1009,12 @@ impl Parser<'_> {

/// Parse a SQL CAST function e.g. `CAST(expr AS FLOAT)`
pub fn parse_cast_expr(&mut self) -> PResult<Expr> {
self.expect_token(&Token::LParen)?;
let expr = self.parse_expr()?;
self.expect_keyword(Keyword::AS)?;
let data_type = self.parse_data_type()?;
self.expect_token(&Token::RParen)?;
Ok(Expr::Cast {
expr: Box::new(expr),
data_type,
})
parser_v2::expr_cast(self)
wangrunji0408 marked this conversation as resolved.
Show resolved Hide resolved
}

/// Parse a SQL TRY_CAST function e.g. `TRY_CAST(expr AS FLOAT)`
pub fn parse_try_cast_expr(&mut self) -> PResult<Expr> {
self.expect_token(&Token::LParen)?;
let expr = self.parse_expr()?;
self.expect_keyword(Keyword::AS)?;
let data_type = self.parse_data_type()?;
self.expect_token(&Token::RParen)?;
Ok(Expr::TryCast {
expr: Box::new(expr),
data_type,
})
parser_v2::expr_try_cast(self)
}

/// Parse a SQL EXISTS expression e.g. `WHERE EXISTS(SELECT ...)`.
Expand All @@ -1042,83 +1026,21 @@ impl Parser<'_> {
}

pub fn parse_extract_expr(&mut self) -> PResult<Expr> {
self.expect_token(&Token::LParen)?;
let field = self.parse_date_time_field_in_extract()?;
self.expect_keyword(Keyword::FROM)?;
let expr = self.parse_expr()?;
self.expect_token(&Token::RParen)?;
Ok(Expr::Extract {
field,
expr: Box::new(expr),
})
parser_v2::expr_extract(self)
}

pub fn parse_substring_expr(&mut self) -> PResult<Expr> {
// PARSE SUBSTRING (EXPR [FROM 1] [FOR 3])
self.expect_token(&Token::LParen)?;
let expr = self.parse_expr()?;
let mut from_expr = None;
if self.parse_keyword(Keyword::FROM) || self.consume_token(&Token::Comma) {
from_expr = Some(self.parse_expr()?);
}

let mut to_expr = None;
if self.parse_keyword(Keyword::FOR) || self.consume_token(&Token::Comma) {
to_expr = Some(self.parse_expr()?);
}
self.expect_token(&Token::RParen)?;

Ok(Expr::Substring {
expr: Box::new(expr),
substring_from: from_expr.map(Box::new),
substring_for: to_expr.map(Box::new),
})
parser_v2::expr_substring(self)
}

/// `POSITION(<expr> IN <expr>)`
pub fn parse_position_expr(&mut self) -> PResult<Expr> {
self.expect_token(&Token::LParen)?;

// Logically `parse_expr`, but limited to those with precedence higher than `BETWEEN`/`IN`,
// to avoid conflict with general IN operator, for example `position(a IN (b) IN (c))`.
// https://github.com/postgres/postgres/blob/REL_15_2/src/backend/parser/gram.y#L16012
let substring = self.parse_subexpr(Precedence::Between)?;
self.expect_keyword(Keyword::IN)?;
let string = self.parse_subexpr(Precedence::Between)?;

self.expect_token(&Token::RParen)?;

Ok(Expr::Position {
substring: Box::new(substring),
string: Box::new(string),
})
parser_v2::expr_position(self)
}

/// `OVERLAY(<expr> PLACING <expr> FROM <expr> [ FOR <expr> ])`
pub fn parse_overlay_expr(&mut self) -> PResult<Expr> {
self.expect_token(&Token::LParen)?;

let expr = self.parse_expr()?;

self.expect_keyword(Keyword::PLACING)?;
let new_substring = self.parse_expr()?;

self.expect_keyword(Keyword::FROM)?;
let start = self.parse_expr()?;

let mut count = None;
if self.parse_keyword(Keyword::FOR) {
count = Some(self.parse_expr()?);
}

self.expect_token(&Token::RParen)?;

Ok(Expr::Overlay {
expr: Box::new(expr),
new_substring: Box::new(new_substring),
start: Box::new(start),
count: count.map(Box::new),
})
parser_v2::expr_overlay(self)
}

/// `TRIM ([WHERE] ['text'] FROM 'text')`\
Expand Down
150 changes: 143 additions & 7 deletions src/sqlparser/src/parser_v2/expr.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,14 +9,17 @@
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
use winnow::combinator::{cut_err, opt, preceded, repeat, trace};
use winnow::combinator::{alt, cut_err, opt, preceded, repeat, seq, trace};
use winnow::error::ContextError;
use winnow::{PResult, Parser};

use super::TokenStream;
use super::{data_type, token, ParserExt, TokenStream};
use crate::ast::Expr;
use crate::keywords::Keyword;
use crate::parser::Precedence;
use crate::tokenizer::Token;

fn expr<S>(input: &mut S) -> PResult<Expr>
fn expr_parse<S>(input: &mut S) -> PResult<Expr>
where
S: TokenStream,
{
Expand All @@ -27,22 +30,32 @@ where
.parse_next(input)
}

fn subexpr<S>(precedence: Precedence) -> impl Parser<S, Expr, ContextError>
where
S: TokenStream,
{
// TODO: implement this function using combinator style.
trace("subexpr", move |input: &mut S| {
input.parse_v1(|parser| parser.parse_subexpr(precedence))
})
}

pub fn expr_case<S>(input: &mut S) -> PResult<Expr>
where
S: TokenStream,
{
let parse = (
opt(expr),
opt(expr_parse),
repeat(
1..,
(
Keyword::WHEN,
cut_err(expr),
cut_err(expr_parse),
cut_err(Keyword::THEN),
cut_err(expr),
cut_err(expr_parse),
),
),
opt(preceded(Keyword::ELSE, cut_err(expr))),
opt(preceded(Keyword::ELSE, cut_err(expr_parse))),
cut_err(Keyword::END),
)
.map(|(operand, branches, else_result, _)| {
Expand All @@ -58,3 +71,126 @@ where

trace("expr_case", parse).parse_next(input)
}

/// Consume a SQL CAST function e.g. `CAST(expr AS FLOAT)`
pub fn expr_cast<S>(input: &mut S) -> PResult<Expr>
where
S: TokenStream,
{
let parse = cut_err(seq! {Expr::Cast {
_: Token::LParen,
expr: expr_parse.map(Box::new),
_: Keyword::AS,
data_type: data_type,
_: Token::RParen,
}});

trace("expr_cast", parse).parse_next(input)
}

/// Consume a SQL TRY_CAST function e.g. `TRY_CAST(expr AS FLOAT)`
pub fn expr_try_cast<S>(input: &mut S) -> PResult<Expr>
where
S: TokenStream,
{
let parse = cut_err(seq! {Expr::TryCast {
_: Token::LParen,
expr: expr_parse.map(Box::new),
_: Keyword::AS,
data_type: data_type,
_: Token::RParen,
}});

trace("expr_try_cast", parse).parse_next(input)
}

/// Consume a SQL EXTRACT function e.g. `EXTRACT(YEAR FROM expr)`
pub fn expr_extract<S>(input: &mut S) -> PResult<Expr>
where
S: TokenStream,
{
let mut date_time_field = token
.verify_map(|token| match token.token {
Token::Word(w) => Some(w.value.to_uppercase()),
Token::SingleQuotedString(s) => Some(s.to_uppercase()),
_ => None,
})
.expect("date/time field");

let parse = cut_err(seq! {Expr::Extract {
_: Token::LParen,
field: date_time_field,
_: Keyword::FROM,
expr: expr_parse.map(Box::new),
_: Token::RParen,
}});

trace("expr_extract", parse).parse_next(input)
}

/// Consume `SUBSTRING (EXPR [FROM 1] [FOR 3])`
pub fn expr_substring<S>(input: &mut S) -> PResult<Expr>
where
S: TokenStream,
{
let mut substring_from = opt(preceded(
alt((Token::Comma.void(), Keyword::FROM.void())),
cut_err(expr_parse).map(Box::new),
));
let mut substring_for = opt(preceded(
alt((Token::Comma.void(), Keyword::FOR.void())),
cut_err(expr_parse).map(Box::new),
));
let parse = cut_err(seq! {Expr::Substring {
_: Token::LParen,
expr: expr_parse.map(Box::new),
substring_from: substring_from,
substring_for: substring_for,
_: Token::RParen,
}});

trace("expr_substring", parse).parse_next(input)
}

/// `POSITION(<expr> IN <expr>)`
pub fn expr_position<S>(input: &mut S) -> PResult<Expr>
where
S: TokenStream,
{
let parse = cut_err(seq! {Expr::Position {
_: Token::LParen,
// Logically `parse_expr`, but limited to those with precedence higher than `BETWEEN`/`IN`,
// to avoid conflict with general IN operator, for example `position(a IN (b) IN (c))`.
// https://github.com/postgres/postgres/blob/REL_15_2/src/backend/parser/gram.y#L16012
substring: subexpr(Precedence::Between).map(Box::new),
_: Keyword::IN,
string: subexpr(Precedence::Between).map(Box::new),
_: Token::RParen,
}});

trace("expr_position", parse).parse_next(input)
}

/// `OVERLAY(<expr> PLACING <expr> FROM <expr> [ FOR <expr> ])`
pub fn expr_overlay<S>(input: &mut S) -> PResult<Expr>
where
S: TokenStream,
{
let mut count_parse = opt(preceded(
Keyword::FOR.void(),
cut_err(expr_parse).map(Box::new),
));

let parse = cut_err(seq! {Expr::Overlay {
_: Token::LParen,
expr: expr_parse.map(Box::new),
_: Keyword::PLACING,
new_substring: expr_parse.map(Box::new),
_: Keyword::FROM,
start: expr_parse.map(Box::new),
count: count_parse,
_: Token::RParen,
}});

trace("expr_overlay", parse).parse_next(input)
}
35 changes: 0 additions & 35 deletions src/sqlparser/tests/sqlparser_common.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1268,9 +1268,6 @@ fn parse_extract() {
verified_stmt("SELECT EXTRACT(HOUR FROM d)");
verified_stmt("SELECT EXTRACT(MINUTE FROM d)");
verified_stmt("SELECT EXTRACT(SECOND FROM d)");

let res = parse_sql_statements("SELECT EXTRACT(0 FROM d)");
assert!(format!("{}", res.unwrap_err()).contains("expected date/time field, found: 0"));
}

#[test]
Expand Down Expand Up @@ -2919,38 +2916,6 @@ fn parse_substring() {
one_statement_parses_to("SELECT SUBSTRING('1' FOR 3)", "SELECT SUBSTRING('1' FOR 3)");
}

#[test]
fn parse_overlay() {
one_statement_parses_to(
"SELECT OVERLAY('abc' PLACING 'xyz' FROM 1)",
"SELECT OVERLAY('abc' PLACING 'xyz' FROM 1)",
);

one_statement_parses_to(
"SELECT OVERLAY('abc' PLACING 'xyz' FROM 1 FOR 2)",
"SELECT OVERLAY('abc' PLACING 'xyz' FROM 1 FOR 2)",
);

for (sql, err_msg) in [
("SELECT OVERLAY('abc', 'xyz')", "expected PLACING, found: ,"),
(
"SELECT OVERLAY('abc' PLACING 'xyz')",
"expected FROM, found: )",
),
(
"SELECT OVERLAY('abc' PLACING 'xyz' FOR 2)",
"expected FROM, found: FOR",
),
(
"SELECT OVERLAY('abc' PLACING 'xyz' FOR 2 FROM 1)",
"expected FROM, found: FOR",
),
] {
let res = parse_sql_statements(sql);
assert!(format!("{}", res.unwrap_err()).contains(err_msg));
}
}

#[test]
fn parse_trim() {
one_statement_parses_to(
Expand Down
6 changes: 6 additions & 0 deletions src/sqlparser/tests/testdata/extract.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
# This file is automatically generated by `src/sqlparser/tests/parser_test.rs`.
- input: SELECT EXTRACT(0 FROM d)
error_msg: |-
sql parser error: expected date/time field
LINE 1: SELECT EXTRACT(0 FROM d)
^
13 changes: 13 additions & 0 deletions src/sqlparser/tests/testdata/overlay.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
# This file is automatically generated by `src/sqlparser/tests/parser_test.rs`.
- input: SELECT OVERLAY('abc' PLACING 'xyz' FROM 1)
formatted_sql: SELECT OVERLAY('abc' PLACING 'xyz' FROM 1)
- input: SELECT OVERLAY('abc' PLACING 'xyz' FROM 1 FOR 2)
formatted_sql: SELECT OVERLAY('abc' PLACING 'xyz' FROM 1 FOR 2)
- input: SELECT OVERLAY('abc', 'xyz')
error_msg: "sql parser error: \nLINE 1: SELECT OVERLAY('abc', 'xyz')\n ^"
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

should print expected FROM

- input: SELECT OVERLAY('abc' PLACING 'xyz')
error_msg: "sql parser error: \nLINE 1: SELECT OVERLAY('abc' PLACING 'xyz')\n ^"
- input: SELECT OVERLAY('abc' PLACING 'xyz' FOR 2)
error_msg: "sql parser error: \nLINE 1: SELECT OVERLAY('abc' PLACING 'xyz' FOR 2)\n ^"
- input: SELECT OVERLAY('abc' PLACING 'xyz' FOR 2 FROM 1)
error_msg: "sql parser error: \nLINE 1: SELECT OVERLAY('abc' PLACING 'xyz' FOR 2 FROM 1)\n ^"
Loading