Skip to content

Commit

Permalink
refactor(parser): use v2 parser for cast/extract/substring/position/o…
Browse files Browse the repository at this point in the history
…verlay (risingwavelabs#17053)

Signed-off-by: TennyZhuang <[email protected]>
  • Loading branch information
TennyZhuang authored Jun 4, 2024
1 parent db27ab9 commit ab04031
Show file tree
Hide file tree
Showing 5 changed files with 169 additions and 127 deletions.
92 changes: 7 additions & 85 deletions src/sqlparser/src/parser.rs
Original file line number Diff line number Diff line change
Expand Up @@ -170,7 +170,7 @@ type ColumnsDefTuple = (

/// Reference:
/// <https://www.postgresql.org/docs/current/sql-syntax-lexical.html#SQL-PRECEDENCE>
#[derive(Debug, PartialEq, Eq, PartialOrd, Ord)]
#[derive(Copy, Clone, Debug, PartialEq, Eq, PartialOrd, Ord)]
pub enum Precedence {
Zero = 0,
LogicalOr, // 5 in upstream
Expand Down Expand Up @@ -1009,28 +1009,12 @@ impl Parser<'_> {

/// Parse a SQL CAST function e.g. `CAST(expr AS FLOAT)`
pub fn parse_cast_expr(&mut self) -> PResult<Expr> {
self.expect_token(&Token::LParen)?;
let expr = self.parse_expr()?;
self.expect_keyword(Keyword::AS)?;
let data_type = self.parse_data_type()?;
self.expect_token(&Token::RParen)?;
Ok(Expr::Cast {
expr: Box::new(expr),
data_type,
})
parser_v2::expr_cast(self)
}

/// Parse a SQL TRY_CAST function e.g. `TRY_CAST(expr AS FLOAT)`
pub fn parse_try_cast_expr(&mut self) -> PResult<Expr> {
self.expect_token(&Token::LParen)?;
let expr = self.parse_expr()?;
self.expect_keyword(Keyword::AS)?;
let data_type = self.parse_data_type()?;
self.expect_token(&Token::RParen)?;
Ok(Expr::TryCast {
expr: Box::new(expr),
data_type,
})
parser_v2::expr_try_cast(self)
}

/// Parse a SQL EXISTS expression e.g. `WHERE EXISTS(SELECT ...)`.
Expand All @@ -1042,83 +1026,21 @@ impl Parser<'_> {
}

pub fn parse_extract_expr(&mut self) -> PResult<Expr> {
self.expect_token(&Token::LParen)?;
let field = self.parse_date_time_field_in_extract()?;
self.expect_keyword(Keyword::FROM)?;
let expr = self.parse_expr()?;
self.expect_token(&Token::RParen)?;
Ok(Expr::Extract {
field,
expr: Box::new(expr),
})
parser_v2::expr_extract(self)
}

pub fn parse_substring_expr(&mut self) -> PResult<Expr> {
// PARSE SUBSTRING (EXPR [FROM 1] [FOR 3])
self.expect_token(&Token::LParen)?;
let expr = self.parse_expr()?;
let mut from_expr = None;
if self.parse_keyword(Keyword::FROM) || self.consume_token(&Token::Comma) {
from_expr = Some(self.parse_expr()?);
}

let mut to_expr = None;
if self.parse_keyword(Keyword::FOR) || self.consume_token(&Token::Comma) {
to_expr = Some(self.parse_expr()?);
}
self.expect_token(&Token::RParen)?;

Ok(Expr::Substring {
expr: Box::new(expr),
substring_from: from_expr.map(Box::new),
substring_for: to_expr.map(Box::new),
})
parser_v2::expr_substring(self)
}

/// `POSITION(<expr> IN <expr>)`
pub fn parse_position_expr(&mut self) -> PResult<Expr> {
self.expect_token(&Token::LParen)?;

// Logically `parse_expr`, but limited to those with precedence higher than `BETWEEN`/`IN`,
// to avoid conflict with general IN operator, for example `position(a IN (b) IN (c))`.
// https://github.com/postgres/postgres/blob/REL_15_2/src/backend/parser/gram.y#L16012
let substring = self.parse_subexpr(Precedence::Between)?;
self.expect_keyword(Keyword::IN)?;
let string = self.parse_subexpr(Precedence::Between)?;

self.expect_token(&Token::RParen)?;

Ok(Expr::Position {
substring: Box::new(substring),
string: Box::new(string),
})
parser_v2::expr_position(self)
}

/// `OVERLAY(<expr> PLACING <expr> FROM <expr> [ FOR <expr> ])`
pub fn parse_overlay_expr(&mut self) -> PResult<Expr> {
self.expect_token(&Token::LParen)?;

let expr = self.parse_expr()?;

self.expect_keyword(Keyword::PLACING)?;
let new_substring = self.parse_expr()?;

self.expect_keyword(Keyword::FROM)?;
let start = self.parse_expr()?;

let mut count = None;
if self.parse_keyword(Keyword::FOR) {
count = Some(self.parse_expr()?);
}

self.expect_token(&Token::RParen)?;

Ok(Expr::Overlay {
expr: Box::new(expr),
new_substring: Box::new(new_substring),
start: Box::new(start),
count: count.map(Box::new),
})
parser_v2::expr_overlay(self)
}

/// `TRIM ([WHERE] ['text'] FROM 'text')`\
Expand Down
150 changes: 143 additions & 7 deletions src/sqlparser/src/parser_v2/expr.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,14 +9,17 @@
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
use winnow::combinator::{cut_err, opt, preceded, repeat, trace};
use winnow::combinator::{alt, cut_err, opt, preceded, repeat, seq, trace};
use winnow::error::ContextError;
use winnow::{PResult, Parser};

use super::TokenStream;
use super::{data_type, token, ParserExt, TokenStream};
use crate::ast::Expr;
use crate::keywords::Keyword;
use crate::parser::Precedence;
use crate::tokenizer::Token;

fn expr<S>(input: &mut S) -> PResult<Expr>
fn expr_parse<S>(input: &mut S) -> PResult<Expr>
where
S: TokenStream,
{
Expand All @@ -27,22 +30,32 @@ where
.parse_next(input)
}

fn subexpr<S>(precedence: Precedence) -> impl Parser<S, Expr, ContextError>
where
S: TokenStream,
{
// TODO: implement this function using combinator style.
trace("subexpr", move |input: &mut S| {
input.parse_v1(|parser| parser.parse_subexpr(precedence))
})
}

pub fn expr_case<S>(input: &mut S) -> PResult<Expr>
where
S: TokenStream,
{
let parse = (
opt(expr),
opt(expr_parse),
repeat(
1..,
(
Keyword::WHEN,
cut_err(expr),
cut_err(expr_parse),
cut_err(Keyword::THEN),
cut_err(expr),
cut_err(expr_parse),
),
),
opt(preceded(Keyword::ELSE, cut_err(expr))),
opt(preceded(Keyword::ELSE, cut_err(expr_parse))),
cut_err(Keyword::END),
)
.map(|(operand, branches, else_result, _)| {
Expand All @@ -58,3 +71,126 @@ where

trace("expr_case", parse).parse_next(input)
}

/// Consume a SQL CAST function e.g. `CAST(expr AS FLOAT)`
pub fn expr_cast<S>(input: &mut S) -> PResult<Expr>
where
S: TokenStream,
{
let parse = cut_err(seq! {Expr::Cast {
_: Token::LParen,
expr: expr_parse.map(Box::new),
_: Keyword::AS,
data_type: data_type,
_: Token::RParen,
}});

trace("expr_cast", parse).parse_next(input)
}

/// Consume a SQL TRY_CAST function e.g. `TRY_CAST(expr AS FLOAT)`
pub fn expr_try_cast<S>(input: &mut S) -> PResult<Expr>
where
S: TokenStream,
{
let parse = cut_err(seq! {Expr::TryCast {
_: Token::LParen,
expr: expr_parse.map(Box::new),
_: Keyword::AS,
data_type: data_type,
_: Token::RParen,
}});

trace("expr_try_cast", parse).parse_next(input)
}

/// Consume a SQL EXTRACT function e.g. `EXTRACT(YEAR FROM expr)`
pub fn expr_extract<S>(input: &mut S) -> PResult<Expr>
where
S: TokenStream,
{
let mut date_time_field = token
.verify_map(|token| match token.token {
Token::Word(w) => Some(w.value.to_uppercase()),
Token::SingleQuotedString(s) => Some(s.to_uppercase()),
_ => None,
})
.expect("date/time field");

let parse = cut_err(seq! {Expr::Extract {
_: Token::LParen,
field: date_time_field,
_: Keyword::FROM,
expr: expr_parse.map(Box::new),
_: Token::RParen,
}});

trace("expr_extract", parse).parse_next(input)
}

/// Consume `SUBSTRING (EXPR [FROM 1] [FOR 3])`
pub fn expr_substring<S>(input: &mut S) -> PResult<Expr>
where
S: TokenStream,
{
let mut substring_from = opt(preceded(
alt((Token::Comma.void(), Keyword::FROM.void())),
cut_err(expr_parse).map(Box::new),
));
let mut substring_for = opt(preceded(
alt((Token::Comma.void(), Keyword::FOR.void())),
cut_err(expr_parse).map(Box::new),
));
let parse = cut_err(seq! {Expr::Substring {
_: Token::LParen,
expr: expr_parse.map(Box::new),
substring_from: substring_from,
substring_for: substring_for,
_: Token::RParen,
}});

trace("expr_substring", parse).parse_next(input)
}

/// `POSITION(<expr> IN <expr>)`
pub fn expr_position<S>(input: &mut S) -> PResult<Expr>
where
S: TokenStream,
{
let parse = cut_err(seq! {Expr::Position {
_: Token::LParen,
// Logically `parse_expr`, but limited to those with precedence higher than `BETWEEN`/`IN`,
// to avoid conflict with general IN operator, for example `position(a IN (b) IN (c))`.
// https://github.com/postgres/postgres/blob/REL_15_2/src/backend/parser/gram.y#L16012
substring: subexpr(Precedence::Between).map(Box::new),
_: Keyword::IN,
string: subexpr(Precedence::Between).map(Box::new),
_: Token::RParen,
}});

trace("expr_position", parse).parse_next(input)
}

/// `OVERLAY(<expr> PLACING <expr> FROM <expr> [ FOR <expr> ])`
pub fn expr_overlay<S>(input: &mut S) -> PResult<Expr>
where
S: TokenStream,
{
let mut count_parse = opt(preceded(
Keyword::FOR.void(),
cut_err(expr_parse).map(Box::new),
));

let parse = cut_err(seq! {Expr::Overlay {
_: Token::LParen,
expr: expr_parse.map(Box::new),
_: Keyword::PLACING,
new_substring: expr_parse.map(Box::new),
_: Keyword::FROM,
start: expr_parse.map(Box::new),
count: count_parse,
_: Token::RParen,
}});

trace("expr_overlay", parse).parse_next(input)
}
35 changes: 0 additions & 35 deletions src/sqlparser/tests/sqlparser_common.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1268,9 +1268,6 @@ fn parse_extract() {
verified_stmt("SELECT EXTRACT(HOUR FROM d)");
verified_stmt("SELECT EXTRACT(MINUTE FROM d)");
verified_stmt("SELECT EXTRACT(SECOND FROM d)");

let res = parse_sql_statements("SELECT EXTRACT(0 FROM d)");
assert!(format!("{}", res.unwrap_err()).contains("expected date/time field, found: 0"));
}

#[test]
Expand Down Expand Up @@ -2919,38 +2916,6 @@ fn parse_substring() {
one_statement_parses_to("SELECT SUBSTRING('1' FOR 3)", "SELECT SUBSTRING('1' FOR 3)");
}

#[test]
fn parse_overlay() {
one_statement_parses_to(
"SELECT OVERLAY('abc' PLACING 'xyz' FROM 1)",
"SELECT OVERLAY('abc' PLACING 'xyz' FROM 1)",
);

one_statement_parses_to(
"SELECT OVERLAY('abc' PLACING 'xyz' FROM 1 FOR 2)",
"SELECT OVERLAY('abc' PLACING 'xyz' FROM 1 FOR 2)",
);

for (sql, err_msg) in [
("SELECT OVERLAY('abc', 'xyz')", "expected PLACING, found: ,"),
(
"SELECT OVERLAY('abc' PLACING 'xyz')",
"expected FROM, found: )",
),
(
"SELECT OVERLAY('abc' PLACING 'xyz' FOR 2)",
"expected FROM, found: FOR",
),
(
"SELECT OVERLAY('abc' PLACING 'xyz' FOR 2 FROM 1)",
"expected FROM, found: FOR",
),
] {
let res = parse_sql_statements(sql);
assert!(format!("{}", res.unwrap_err()).contains(err_msg));
}
}

#[test]
fn parse_trim() {
one_statement_parses_to(
Expand Down
6 changes: 6 additions & 0 deletions src/sqlparser/tests/testdata/extract.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
# This file is automatically generated by `src/sqlparser/tests/parser_test.rs`.
- input: SELECT EXTRACT(0 FROM d)
error_msg: |-
sql parser error: expected date/time field
LINE 1: SELECT EXTRACT(0 FROM d)
^
13 changes: 13 additions & 0 deletions src/sqlparser/tests/testdata/overlay.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
# This file is automatically generated by `src/sqlparser/tests/parser_test.rs`.
- input: SELECT OVERLAY('abc' PLACING 'xyz' FROM 1)
formatted_sql: SELECT OVERLAY('abc' PLACING 'xyz' FROM 1)
- input: SELECT OVERLAY('abc' PLACING 'xyz' FROM 1 FOR 2)
formatted_sql: SELECT OVERLAY('abc' PLACING 'xyz' FROM 1 FOR 2)
- input: SELECT OVERLAY('abc', 'xyz')
error_msg: "sql parser error: \nLINE 1: SELECT OVERLAY('abc', 'xyz')\n ^"
- input: SELECT OVERLAY('abc' PLACING 'xyz')
error_msg: "sql parser error: \nLINE 1: SELECT OVERLAY('abc' PLACING 'xyz')\n ^"
- input: SELECT OVERLAY('abc' PLACING 'xyz' FOR 2)
error_msg: "sql parser error: \nLINE 1: SELECT OVERLAY('abc' PLACING 'xyz' FOR 2)\n ^"
- input: SELECT OVERLAY('abc' PLACING 'xyz' FOR 2 FROM 1)
error_msg: "sql parser error: \nLINE 1: SELECT OVERLAY('abc' PLACING 'xyz' FOR 2 FROM 1)\n ^"

0 comments on commit ab04031

Please sign in to comment.