Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

refactor(parser): eliminate the gap between parser v1 and v2 #17019

Merged
merged 13 commits into from
May 31, 2024
3 changes: 2 additions & 1 deletion .typos.toml
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ steam = "stream" # You played with Steam games too much.
ser = "ser" # Serialization
# Some weird short variable names
ot = "ot"
bui = "bui" # BackwardUserIterator
bui = "bui" # BackwardUserIterator
mosquitto = "mosquitto" # This is a MQTT broker.
abd = "abd"
iy = "iy"
Expand All @@ -22,6 +22,7 @@ extend-exclude = [
"e2e_test",
"**/*.svg",
"scripts",
"src/sqlparser/tests/testdata/",
"src/frontend/planner_test/tests/testdata",
"src/tests/sqlsmith/tests/freeze",
"Cargo.lock",
Expand Down
4 changes: 2 additions & 2 deletions e2e_test/error_ui/extended/main.slt
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,9 @@ selet 1;
db error: ERROR: Failed to prepare the statement

Caused by:
sql parser error: expected an SQL statement, found: selet at line 1, column 1
sql parser error: expected statement, found: selet
LINE 1: selet 1;
^
^


query error
Expand Down
4 changes: 2 additions & 2 deletions e2e_test/error_ui/simple/main.slt
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,9 @@ selet 1;
db error: ERROR: Failed to run the query

Caused by:
sql parser error: expected an SQL statement, found: selet at line 1, column 1
sql parser error: expected statement, found: selet
LINE 1: selet 1;
^
^


statement error
Expand Down
2 changes: 1 addition & 1 deletion e2e_test/source/basic/ddl.slt
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ create source s;
db error: ERROR: Failed to run the query

Caused by:
sql parser error: expected description of the format, found: ; at line 1, column 16
sql parser error: expected description of the format, found: ;
LINE 1: create source s;
^

Expand Down
22 changes: 5 additions & 17 deletions src/frontend/src/expr/function_impl/cast_regclass.rs
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,6 @@
use risingwave_common::session_config::SearchPath;
use risingwave_expr::{capture_context, function, ExprError};
use risingwave_sqlparser::parser::{Parser, ParserError};
use risingwave_sqlparser::tokenizer::{Token, Tokenizer};
use thiserror::Error;
use thiserror_ext::AsReport;

Expand Down Expand Up @@ -63,7 +62,11 @@ fn resolve_regclass_inner(
db_name: &str,
class_name: &str,
) -> Result<u32, ResolveRegclassError> {
let obj = parse_object_name(class_name)?;
// We use the full parser here because this function needs to accept every legal way
// of identifying an object in PG SQL as a valid value for the varchar
// literal. For example: 'foo', 'public.foo', '"my table"', and
// '"my schema".foo' must all work as values passed pg_table_size.
let obj = Parser::parse_object_name_str(class_name)?;

if obj.0.len() == 1 {
let class_name = obj.0[0].real_value();
Expand All @@ -81,21 +84,6 @@ fn resolve_regclass_inner(
}
}

fn parse_object_name(name: &str) -> Result<risingwave_sqlparser::ast::ObjectName, ParserError> {
// We use the full parser here because this function needs to accept every legal way
// of identifying an object in PG SQL as a valid value for the varchar
// literal. For example: 'foo', 'public.foo', '"my table"', and
// '"my schema".foo' must all work as values passed pg_table_size.
let mut tokenizer = Tokenizer::new(name);
let tokens = tokenizer
.tokenize_with_location()
.map_err(ParserError::from)?;
let mut parser = Parser::new(tokens);
let object = parser.parse_object_name()?;
parser.expect_token(&Token::EOF)?;
Ok(object)
}

#[function("cast_regclass(varchar) -> int4")]
fn cast_regclass(class_name: &str) -> Result<i32, ExprError> {
let oid = resolve_regclass_impl_captured(class_name)?;
Expand Down
33 changes: 15 additions & 18 deletions src/sqlparser/src/ast/legacy_source.rs
Original file line number Diff line number Diff line change
Expand Up @@ -20,14 +20,15 @@ use std::fmt;
use itertools::Itertools as _;
#[cfg(feature = "serde")]
use serde::{Deserialize, Serialize};
use winnow::PResult;

use crate::ast::{
AstString, AstVec, ConnectorSchema, Encode, Format, Ident, ObjectName, ParseTo, SqlOption,
Value,
};
use crate::keywords::Keyword;
use crate::parser::{Parser, ParserError};
use crate::{impl_fmt_display, impl_parse_to};
use crate::parser::{Parser, StrError};
use crate::{impl_fmt_display, impl_parse_to, parser_err};

#[derive(Debug, Clone, PartialEq, Eq, Hash)]
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
Expand Down Expand Up @@ -64,12 +65,10 @@ impl From<ConnectorSchema> for CompatibleSourceSchema {
}
}

pub fn parse_source_schema(p: &mut Parser) -> Result<CompatibleSourceSchema, ParserError> {
pub fn parse_source_schema(p: &mut Parser<'_>) -> PResult<CompatibleSourceSchema> {
if let Some(schema_v2) = p.parse_schema()? {
if schema_v2.key_encode.is_some() {
return Err(ParserError::ParserError(
"key encode clause is not supported in source schema".to_string(),
));
parser_err!("key encode clause is not supported in source schema");
}
Ok(CompatibleSourceSchema::V2(schema_v2))
} else if p.peek_nth_any_of_keywords(0, &[Keyword::ROW])
Expand Down Expand Up @@ -109,16 +108,15 @@ pub fn parse_source_schema(p: &mut Parser) -> Result<CompatibleSourceSchema, Par
}
"BYTES" => SourceSchema::Bytes,
_ => {
return Err(ParserError::ParserError(
parser_err!(
"expected JSON | UPSERT_JSON | PROTOBUF | DEBEZIUM_JSON | DEBEZIUM_AVRO \
| AVRO | UPSERT_AVRO | MAXWELL | CANAL_JSON | BYTES | NATIVE after ROW FORMAT"
.to_string(),
));
);
}
};
Ok(CompatibleSourceSchema::RowFormat(schema))
} else {
p.expected("description of the format", p.peek_token())
p.expected("description of the format")
}
}

Expand Down Expand Up @@ -286,7 +284,7 @@ pub struct ProtobufSchema {
}

impl ParseTo for ProtobufSchema {
fn parse_to(p: &mut Parser) -> Result<Self, ParserError> {
fn parse_to(p: &mut Parser<'_>) -> PResult<Self> {
impl_parse_to!([Keyword::MESSAGE], p);
impl_parse_to!(message_name: AstString, p);
impl_parse_to!([Keyword::ROW, Keyword::SCHEMA, Keyword::LOCATION], p);
Expand Down Expand Up @@ -324,7 +322,7 @@ pub struct AvroSchema {
}

impl ParseTo for AvroSchema {
fn parse_to(p: &mut Parser) -> Result<Self, ParserError> {
fn parse_to(p: &mut Parser<'_>) -> PResult<Self> {
impl_parse_to!([Keyword::ROW, Keyword::SCHEMA, Keyword::LOCATION], p);
impl_parse_to!(use_schema_registry => [Keyword::CONFLUENT, Keyword::SCHEMA, Keyword::REGISTRY], p);
impl_parse_to!(row_schema_location: AstString, p);
Expand Down Expand Up @@ -371,7 +369,7 @@ impl fmt::Display for DebeziumAvroSchema {
}

impl ParseTo for DebeziumAvroSchema {
fn parse_to(p: &mut Parser) -> Result<Self, ParserError> {
fn parse_to(p: &mut Parser<'_>) -> PResult<Self> {
impl_parse_to!(
[
Keyword::ROW,
Expand All @@ -397,19 +395,18 @@ pub struct CsvInfo {
pub has_header: bool,
}

pub fn get_delimiter(chars: &str) -> Result<u8, ParserError> {
pub fn get_delimiter(chars: &str) -> Result<u8, StrError> {
match chars {
"," => Ok(b','), // comma
"\t" => Ok(b'\t'), // tab
other => Err(ParserError::ParserError(format!(
"The delimiter should be one of ',', E'\\t', but got {:?}",
other
other => Err(StrError(format!(
"The delimiter should be one of ',', E'\\t', but got {other:?}",
))),
}
}

impl ParseTo for CsvInfo {
fn parse_to(p: &mut Parser) -> Result<Self, ParserError> {
fn parse_to(p: &mut Parser<'_>) -> PResult<Self> {
impl_parse_to!(without_header => [Keyword::WITHOUT, Keyword::HEADER], p);
impl_parse_to!([Keyword::DELIMITED, Keyword::BY], p);
impl_parse_to!(delimiter: AstString, p);
Expand Down
17 changes: 7 additions & 10 deletions src/sqlparser/src/ast/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@ use std::sync::Arc;
use itertools::Itertools;
#[cfg(feature = "serde")]
use serde::{Deserialize, Serialize};
use winnow::PResult;

pub use self::data_type::{DataType, StructField};
pub use self::ddl::{
Expand All @@ -59,7 +60,7 @@ pub use crate::ast::ddl::{
AlterViewOperation,
};
use crate::keywords::Keyword;
use crate::parser::{IncludeOption, IncludeOptionItem, Parser, ParserError};
use crate::parser::{IncludeOption, IncludeOptionItem, Parser, ParserError, StrError};

pub type RedactSqlOptionKeywordsRef = Arc<HashSet<String>>;

Expand Down Expand Up @@ -191,7 +192,7 @@ impl From<&str> for Ident {
}

impl ParseTo for Ident {
fn parse_to(parser: &mut Parser) -> Result<Self, ParserError> {
fn parse_to(parser: &mut Parser<'_>) -> PResult<Self> {
parser.parse_identifier()
}
}
Expand Down Expand Up @@ -235,7 +236,7 @@ impl fmt::Display for ObjectName {
}

impl ParseTo for ObjectName {
fn parse_to(p: &mut Parser) -> Result<Self, ParserError> {
fn parse_to(p: &mut Parser<'_>) -> PResult<Self> {
p.parse_object_name()
}
}
Expand Down Expand Up @@ -2560,7 +2561,7 @@ impl fmt::Display for ObjectType {
}

impl ParseTo for ObjectType {
fn parse_to(parser: &mut Parser) -> Result<Self, ParserError> {
fn parse_to(parser: &mut Parser<'_>) -> PResult<Self> {
let object_type = if parser.parse_keyword(Keyword::TABLE) {
ObjectType::Table
} else if parser.parse_keyword(Keyword::VIEW) {
Expand Down Expand Up @@ -2588,7 +2589,6 @@ impl ParseTo for ObjectType {
} else {
return parser.expected(
"TABLE, VIEW, INDEX, MATERIALIZED VIEW, SOURCE, SINK, SUBSCRIPTION, SCHEMA, DATABASE, USER, SECRET or CONNECTION after DROP",
parser.peek_token(),
);
};
Ok(object_type)
Expand Down Expand Up @@ -3007,18 +3007,15 @@ impl CreateFunctionWithOptions {

/// TODO(kwannoel): Generate from the struct definition instead.
impl TryFrom<Vec<SqlOption>> for CreateFunctionWithOptions {
type Error = ParserError;
type Error = StrError;

fn try_from(with_options: Vec<SqlOption>) -> Result<Self, Self::Error> {
let mut always_retry_on_network_error = None;
for option in with_options {
if option.name.to_string().to_lowercase() == "always_retry_on_network_error" {
always_retry_on_network_error = Some(option.value == Value::Boolean(true));
} else {
return Err(ParserError::ParserError(format!(
"Unsupported option: {}",
option.name
)));
return Err(StrError(format!("Unsupported option: {}", option.name)));
}
}
Ok(Self {
Expand Down
Loading
Loading