Skip to content

Commit

Permalink
refactor(parser): eliminate the gap between parser v1 and v2 (#17019)
Browse files Browse the repository at this point in the history
Signed-off-by: Runji Wang <[email protected]>
Signed-off-by: TennyZhuang <[email protected]>
Co-authored-by: TennyZhuang <[email protected]>
  • Loading branch information
wangrunji0408 and TennyZhuang authored May 31, 2024
1 parent 669358e commit cab8403
Show file tree
Hide file tree
Showing 24 changed files with 885 additions and 1,134 deletions.
3 changes: 2 additions & 1 deletion .typos.toml
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ steam = "stream" # You played with Steam games too much.
ser = "ser" # Serialization
# Some weird short variable names
ot = "ot"
bui = "bui" # BackwardUserIterator
bui = "bui" # BackwardUserIterator
mosquitto = "mosquitto" # This is a MQTT broker.
abd = "abd"
iy = "iy"
Expand All @@ -22,6 +22,7 @@ extend-exclude = [
"e2e_test",
"**/*.svg",
"scripts",
"src/sqlparser/tests/testdata/",
"src/frontend/planner_test/tests/testdata",
"src/tests/sqlsmith/tests/freeze",
"Cargo.lock",
Expand Down
4 changes: 2 additions & 2 deletions e2e_test/error_ui/extended/main.slt
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,9 @@ selet 1;
db error: ERROR: Failed to prepare the statement

Caused by:
sql parser error: expected an SQL statement, found: selet at line 1, column 1
sql parser error: expected statement, found: selet
LINE 1: selet 1;
^
^


query error
Expand Down
4 changes: 2 additions & 2 deletions e2e_test/error_ui/simple/main.slt
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,9 @@ selet 1;
db error: ERROR: Failed to run the query

Caused by:
sql parser error: expected an SQL statement, found: selet at line 1, column 1
sql parser error: expected statement, found: selet
LINE 1: selet 1;
^
^


statement error
Expand Down
2 changes: 1 addition & 1 deletion e2e_test/source/basic/ddl.slt
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ create source s;
db error: ERROR: Failed to run the query

Caused by:
sql parser error: expected description of the format, found: ; at line 1, column 16
sql parser error: expected description of the format, found: ;
LINE 1: create source s;
^

Expand Down
22 changes: 5 additions & 17 deletions src/frontend/src/expr/function_impl/cast_regclass.rs
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,6 @@
use risingwave_common::session_config::SearchPath;
use risingwave_expr::{capture_context, function, ExprError};
use risingwave_sqlparser::parser::{Parser, ParserError};
use risingwave_sqlparser::tokenizer::{Token, Tokenizer};
use thiserror::Error;
use thiserror_ext::AsReport;

Expand Down Expand Up @@ -63,7 +62,11 @@ fn resolve_regclass_inner(
db_name: &str,
class_name: &str,
) -> Result<u32, ResolveRegclassError> {
let obj = parse_object_name(class_name)?;
// We use the full parser here because this function needs to accept every legal way
// of identifying an object in PG SQL as a valid value for the varchar
// literal. For example: 'foo', 'public.foo', '"my table"', and
// '"my schema".foo' must all work as values passed pg_table_size.
let obj = Parser::parse_object_name_str(class_name)?;

if obj.0.len() == 1 {
let class_name = obj.0[0].real_value();
Expand All @@ -81,21 +84,6 @@ fn resolve_regclass_inner(
}
}

fn parse_object_name(name: &str) -> Result<risingwave_sqlparser::ast::ObjectName, ParserError> {
// We use the full parser here because this function needs to accept every legal way
// of identifying an object in PG SQL as a valid value for the varchar
// literal. For example: 'foo', 'public.foo', '"my table"', and
// '"my schema".foo' must all work as values passed pg_table_size.
let mut tokenizer = Tokenizer::new(name);
let tokens = tokenizer
.tokenize_with_location()
.map_err(ParserError::from)?;
let mut parser = Parser::new(tokens);
let object = parser.parse_object_name()?;
parser.expect_token(&Token::EOF)?;
Ok(object)
}

#[function("cast_regclass(varchar) -> int4")]
fn cast_regclass(class_name: &str) -> Result<i32, ExprError> {
let oid = resolve_regclass_impl_captured(class_name)?;
Expand Down
33 changes: 15 additions & 18 deletions src/sqlparser/src/ast/legacy_source.rs
Original file line number Diff line number Diff line change
Expand Up @@ -20,14 +20,15 @@ use std::fmt;
use itertools::Itertools as _;
#[cfg(feature = "serde")]
use serde::{Deserialize, Serialize};
use winnow::PResult;

use crate::ast::{
AstString, AstVec, ConnectorSchema, Encode, Format, Ident, ObjectName, ParseTo, SqlOption,
Value,
};
use crate::keywords::Keyword;
use crate::parser::{Parser, ParserError};
use crate::{impl_fmt_display, impl_parse_to};
use crate::parser::{Parser, StrError};
use crate::{impl_fmt_display, impl_parse_to, parser_err};

#[derive(Debug, Clone, PartialEq, Eq, Hash)]
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
Expand Down Expand Up @@ -64,12 +65,10 @@ impl From<ConnectorSchema> for CompatibleSourceSchema {
}
}

pub fn parse_source_schema(p: &mut Parser) -> Result<CompatibleSourceSchema, ParserError> {
pub fn parse_source_schema(p: &mut Parser<'_>) -> PResult<CompatibleSourceSchema> {
if let Some(schema_v2) = p.parse_schema()? {
if schema_v2.key_encode.is_some() {
return Err(ParserError::ParserError(
"key encode clause is not supported in source schema".to_string(),
));
parser_err!("key encode clause is not supported in source schema");
}
Ok(CompatibleSourceSchema::V2(schema_v2))
} else if p.peek_nth_any_of_keywords(0, &[Keyword::ROW])
Expand Down Expand Up @@ -109,16 +108,15 @@ pub fn parse_source_schema(p: &mut Parser) -> Result<CompatibleSourceSchema, Par
}
"BYTES" => SourceSchema::Bytes,
_ => {
return Err(ParserError::ParserError(
parser_err!(
"expected JSON | UPSERT_JSON | PROTOBUF | DEBEZIUM_JSON | DEBEZIUM_AVRO \
| AVRO | UPSERT_AVRO | MAXWELL | CANAL_JSON | BYTES | NATIVE after ROW FORMAT"
.to_string(),
));
);
}
};
Ok(CompatibleSourceSchema::RowFormat(schema))
} else {
p.expected("description of the format", p.peek_token())
p.expected("description of the format")
}
}

Expand Down Expand Up @@ -286,7 +284,7 @@ pub struct ProtobufSchema {
}

impl ParseTo for ProtobufSchema {
fn parse_to(p: &mut Parser) -> Result<Self, ParserError> {
fn parse_to(p: &mut Parser<'_>) -> PResult<Self> {
impl_parse_to!([Keyword::MESSAGE], p);
impl_parse_to!(message_name: AstString, p);
impl_parse_to!([Keyword::ROW, Keyword::SCHEMA, Keyword::LOCATION], p);
Expand Down Expand Up @@ -324,7 +322,7 @@ pub struct AvroSchema {
}

impl ParseTo for AvroSchema {
fn parse_to(p: &mut Parser) -> Result<Self, ParserError> {
fn parse_to(p: &mut Parser<'_>) -> PResult<Self> {
impl_parse_to!([Keyword::ROW, Keyword::SCHEMA, Keyword::LOCATION], p);
impl_parse_to!(use_schema_registry => [Keyword::CONFLUENT, Keyword::SCHEMA, Keyword::REGISTRY], p);
impl_parse_to!(row_schema_location: AstString, p);
Expand Down Expand Up @@ -371,7 +369,7 @@ impl fmt::Display for DebeziumAvroSchema {
}

impl ParseTo for DebeziumAvroSchema {
fn parse_to(p: &mut Parser) -> Result<Self, ParserError> {
fn parse_to(p: &mut Parser<'_>) -> PResult<Self> {
impl_parse_to!(
[
Keyword::ROW,
Expand All @@ -397,19 +395,18 @@ pub struct CsvInfo {
pub has_header: bool,
}

pub fn get_delimiter(chars: &str) -> Result<u8, ParserError> {
pub fn get_delimiter(chars: &str) -> Result<u8, StrError> {
match chars {
"," => Ok(b','), // comma
"\t" => Ok(b'\t'), // tab
other => Err(ParserError::ParserError(format!(
"The delimiter should be one of ',', E'\\t', but got {:?}",
other
other => Err(StrError(format!(
"The delimiter should be one of ',', E'\\t', but got {other:?}",
))),
}
}

impl ParseTo for CsvInfo {
fn parse_to(p: &mut Parser) -> Result<Self, ParserError> {
fn parse_to(p: &mut Parser<'_>) -> PResult<Self> {
impl_parse_to!(without_header => [Keyword::WITHOUT, Keyword::HEADER], p);
impl_parse_to!([Keyword::DELIMITED, Keyword::BY], p);
impl_parse_to!(delimiter: AstString, p);
Expand Down
17 changes: 7 additions & 10 deletions src/sqlparser/src/ast/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@ use std::sync::Arc;
use itertools::Itertools;
#[cfg(feature = "serde")]
use serde::{Deserialize, Serialize};
use winnow::PResult;

pub use self::data_type::{DataType, StructField};
pub use self::ddl::{
Expand All @@ -59,7 +60,7 @@ pub use crate::ast::ddl::{
AlterViewOperation,
};
use crate::keywords::Keyword;
use crate::parser::{IncludeOption, IncludeOptionItem, Parser, ParserError};
use crate::parser::{IncludeOption, IncludeOptionItem, Parser, ParserError, StrError};

pub type RedactSqlOptionKeywordsRef = Arc<HashSet<String>>;

Expand Down Expand Up @@ -191,7 +192,7 @@ impl From<&str> for Ident {
}

impl ParseTo for Ident {
fn parse_to(parser: &mut Parser) -> Result<Self, ParserError> {
fn parse_to(parser: &mut Parser<'_>) -> PResult<Self> {
parser.parse_identifier()
}
}
Expand Down Expand Up @@ -235,7 +236,7 @@ impl fmt::Display for ObjectName {
}

impl ParseTo for ObjectName {
fn parse_to(p: &mut Parser) -> Result<Self, ParserError> {
fn parse_to(p: &mut Parser<'_>) -> PResult<Self> {
p.parse_object_name()
}
}
Expand Down Expand Up @@ -2560,7 +2561,7 @@ impl fmt::Display for ObjectType {
}

impl ParseTo for ObjectType {
fn parse_to(parser: &mut Parser) -> Result<Self, ParserError> {
fn parse_to(parser: &mut Parser<'_>) -> PResult<Self> {
let object_type = if parser.parse_keyword(Keyword::TABLE) {
ObjectType::Table
} else if parser.parse_keyword(Keyword::VIEW) {
Expand Down Expand Up @@ -2588,7 +2589,6 @@ impl ParseTo for ObjectType {
} else {
return parser.expected(
"TABLE, VIEW, INDEX, MATERIALIZED VIEW, SOURCE, SINK, SUBSCRIPTION, SCHEMA, DATABASE, USER, SECRET or CONNECTION after DROP",
parser.peek_token(),
);
};
Ok(object_type)
Expand Down Expand Up @@ -3007,18 +3007,15 @@ impl CreateFunctionWithOptions {

/// TODO(kwannoel): Generate from the struct definition instead.
impl TryFrom<Vec<SqlOption>> for CreateFunctionWithOptions {
type Error = ParserError;
type Error = StrError;

fn try_from(with_options: Vec<SqlOption>) -> Result<Self, Self::Error> {
let mut always_retry_on_network_error = None;
for option in with_options {
if option.name.to_string().to_lowercase() == "always_retry_on_network_error" {
always_retry_on_network_error = Some(option.value == Value::Boolean(true));
} else {
return Err(ParserError::ParserError(format!(
"Unsupported option: {}",
option.name
)));
return Err(StrError(format!("Unsupported option: {}", option.name)));
}
}
Ok(Self {
Expand Down
Loading

0 comments on commit cab8403

Please sign in to comment.