Skip to content

Commit

Permalink
feat(parser): fix token location and print sql with cursor on parse e…
Browse files Browse the repository at this point in the history
…rror (#16959)

Signed-off-by: Runji Wang <[email protected]>
  • Loading branch information
wangrunji0408 authored May 29, 2024
1 parent c7ad769 commit 2016c32
Show file tree
Hide file tree
Showing 17 changed files with 529 additions and 425 deletions.
5 changes: 3 additions & 2 deletions e2e_test/error_ui/extended/main.slt
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,9 @@ selet 1;
db error: ERROR: Failed to prepare the statement

Caused by:
sql parser error: Expected an SQL statement, found: selet at line:1, column:6
Near "selet"
sql parser error: expected an SQL statement, found: selet at line 1, column 1
LINE 1: selet 1;
^


query error
Expand Down
5 changes: 3 additions & 2 deletions e2e_test/error_ui/simple/main.slt
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,9 @@ selet 1;
db error: ERROR: Failed to run the query

Caused by:
sql parser error: Expected an SQL statement, found: selet at line:1, column:6
Near "selet"
sql parser error: expected an SQL statement, found: selet at line 1, column 1
LINE 1: selet 1;
^


statement error
Expand Down
4 changes: 2 additions & 2 deletions e2e_test/source/basic/datagen.slt
Original file line number Diff line number Diff line change
Expand Up @@ -186,9 +186,9 @@ statement ok
drop table s1;

# Do NOT allow With clause to contain a comma only.
statement error Expected identifier.*
statement error expected identifier.*
create table s1 (v1 int) with (,) FORMAT PLAIN ENCODE JSON;

# Do NOT allow an empty With clause.
statement error Expected identifier.*
statement error expected identifier.*
create table s1 (v1 int) with () FORMAT PLAIN ENCODE JSON;
5 changes: 3 additions & 2 deletions e2e_test/source/basic/ddl.slt
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,9 @@ create source s;
db error: ERROR: Failed to run the query

Caused by:
sql parser error: Expected description of the format, found: ; at line:1, column:17
Near "create source s"
sql parser error: expected description of the format, found: ; at line 1, column 16
LINE 1: create source s;
^


statement error missing WITH clause
Expand Down
4 changes: 2 additions & 2 deletions e2e_test/source/basic/old_row_format_syntax/datagen.slt
Original file line number Diff line number Diff line change
Expand Up @@ -182,9 +182,9 @@ statement ok
drop table s1;

# Do NOT allow With clause to contain a comma only.
statement error Expected identifier.*
statement error expected identifier.*
create table s1 (v1 int) with (,) ROW FORMAT JSON;

# Do NOT allow an empty With clause.
statement error Expected identifier.*
statement error expected identifier.*
create table s1 (v1 int) with () ROW FORMAT JSON;
4 changes: 2 additions & 2 deletions e2e_test/udf/sql_udf.slt
Original file line number Diff line number Diff line change
Expand Up @@ -360,7 +360,7 @@ In SQL UDF definition: `select a + b + c + not_be_displayed(c)`
^


statement error Expected end of statement, found: 💩
statement error expected end of statement, found: 💩
create function call_regexp_replace() returns varchar language sql as 'select regexp_replace('💩💩💩💩💩foo🤔️bar亲爱的😭baz这不是爱情❤️‍🔥', 'baz(...)', '这是🥵', 'ic')';

# Recursive definition can NOT be accepted at present due to semantic check
Expand Down Expand Up @@ -401,7 +401,7 @@ statement error return type mismatch detected
create function type_mismatch(INT) returns varchar language sql as 'select $1 + 114514 + $1';

# Invalid function body syntax
statement error Expected an expression:, found: EOF at the end
statement error expected an expression:, found: EOF at the end
create function add_error(INT, INT) returns int language sql as $$select $1 + $2 +$$;

######################################################################
Expand Down
59 changes: 37 additions & 22 deletions src/sqlparser/src/parser.rs
Original file line number Diff line number Diff line change
Expand Up @@ -226,22 +226,54 @@ impl Parser {
let mut tokenizer = Tokenizer::new(sql);
let tokens = tokenizer.tokenize_with_location()?;
let mut parser = Parser::new(tokens);
let ast = parser.parse_statements().map_err(|e| {
// append SQL context to the error message, e.g.:
// LINE 1: SELECT 1::int(2);
// ^
// XXX: the cursor location is not accurate
// it may be offset one token forward because the error token has been consumed
let loc = match parser.tokens.get(parser.index) {
Some(token) => token.location.clone(),
None => {
// get location of EOF
Location {
line: sql.lines().count() as u64,
column: sql.lines().last().map_or(0, |l| l.len() as u64) + 1,
}
}
};
let prefix = format!("LINE {}: ", loc.line);
let sql_line = sql.split('\n').nth(loc.line as usize - 1).unwrap();
let cursor = " ".repeat(prefix.len() + loc.column as usize - 1);
ParserError::ParserError(format!(
"{}\n{}{}\n{}^",
e.inner_msg(),
prefix,
sql_line,
cursor
))
})?;
Ok(ast)
}

/// Parse a list of semicolon-separated SQL statements.
pub fn parse_statements(&mut self) -> Result<Vec<Statement>, ParserError> {
let mut stmts = Vec::new();
let mut expecting_statement_delimiter = false;
loop {
// ignore empty statements (between successive statement delimiters)
while parser.consume_token(&Token::SemiColon) {
while self.consume_token(&Token::SemiColon) {
expecting_statement_delimiter = false;
}

if parser.peek_token() == Token::EOF {
if self.peek_token() == Token::EOF {
break;
}
if expecting_statement_delimiter {
return parser.expected("end of statement", parser.peek_token());
return self.expected("end of statement", self.peek_token());
}

let statement = parser.parse_statement()?;
let statement = self.parse_statement()?;
stmts.push(statement);
expecting_statement_delimiter = true;
}
Expand Down Expand Up @@ -1958,24 +1990,7 @@ impl Parser {

/// Report unexpected token
pub fn expected<T>(&self, expected: &str, found: TokenWithLocation) -> Result<T, ParserError> {
let start_off = self.index.saturating_sub(10);
let end_off = self.index.min(self.tokens.len());
let near_tokens = &self.tokens[start_off..end_off];
struct TokensDisplay<'a>(&'a [TokenWithLocation]);
impl<'a> fmt::Display for TokensDisplay<'a> {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
for token in self.0 {
write!(f, "{}", token.token)?;
}
Ok(())
}
}
parser_err!(format!(
"Expected {}, found: {}\nNear \"{}\"",
expected,
found,
TokensDisplay(near_tokens),
))
parser_err!(format!("expected {}, found: {}", expected, found))
}

/// Look for an expected keyword and consume it if it exists
Expand Down
Loading

0 comments on commit 2016c32

Please sign in to comment.