-
Notifications
You must be signed in to change notification settings - Fork 594
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
feat(sql-udf): add hint display for non-existent functions #15188
Changes from 25 commits
f07dd5b
876f810
fbf809c
d9f9360
5a2efab
c31e285
44781a4
a390d3d
bbf48a6
119f528
0e42253
974455a
5858a3c
dccf8fd
461fa4e
79e6bcc
c94e0e5
e9e94b4
f47a474
1005bc0
96a0c3b
a4cf35a
ce30232
bce64a6
32314b4
66cc802
85814d0
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -14,6 +14,7 @@ | |
|
||
use std::collections::HashMap; | ||
|
||
use fancy_regex::Regex; | ||
use itertools::Itertools; | ||
use pgwire::pg_response::StatementType; | ||
use risingwave_common::catalog::FunctionId; | ||
|
@@ -24,14 +25,83 @@ use risingwave_sqlparser::ast::{ | |
CreateFunctionBody, FunctionDefinition, ObjectName, OperateFunctionArg, | ||
}; | ||
use risingwave_sqlparser::parser::{Parser, ParserError}; | ||
use thiserror_ext::AsReport; | ||
|
||
use super::*; | ||
use crate::binder::UdfContext; | ||
use crate::catalog::CatalogError; | ||
use crate::expr::{Expr, ExprImpl, Literal}; | ||
use crate::{bind_data_type, Binder}; | ||
|
||
/// The error type for hint display | ||
/// Currently we will try invalid parameter first | ||
/// Then try to find non-existent functions | ||
enum ErrMsgType { | ||
Parameter, | ||
Function, | ||
// Not yet support | ||
None, | ||
} | ||
|
||
const DEFAULT_ERR_MSG: &str = "Failed to conduct semantic check"; | ||
|
||
/// Used for hint display | ||
const PROMPT: &str = "In SQL UDF definition: "; | ||
|
||
/// Used for detecting non-existent function | ||
const FUNCTION_KEYWORD: &str = "function"; | ||
|
||
/// Used for detecting invalid parameters | ||
pub const SQL_UDF_PATTERN: &str = "[sql udf]"; | ||
|
||
/// Validate the error message to see if | ||
/// it's possible to improve the display to users | ||
fn validate_err_msg(invalid_msg: &str) -> ErrMsgType { | ||
// First try invalid parameters | ||
if invalid_msg.contains(SQL_UDF_PATTERN) { | ||
ErrMsgType::Parameter | ||
} else if invalid_msg.contains(FUNCTION_KEYWORD) { | ||
ErrMsgType::Function | ||
} else { | ||
// Nothing could be better display | ||
ErrMsgType::None | ||
} | ||
} | ||
|
||
/// Extract the target name to hint display | ||
/// according to the type of the error message item | ||
fn extract_hint_display_target(err_msg_type: ErrMsgType, invalid_msg: &str) -> Option<&str> { | ||
match err_msg_type { | ||
// e.g., [sql udf] failed to find named parameter <target name> | ||
ErrMsgType::Parameter => invalid_msg.split_whitespace().last(), | ||
// e.g., function <target name> does not exist | ||
ErrMsgType::Function => { | ||
let func = invalid_msg.split_whitespace().nth(1).unwrap_or("null"); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Is it possible to extract targets directly from error types, instead of relying on formatted error messages? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Error type can not indicate [function / invalid parameter] name I'm assuming, so it seems impossible? 🥵 |
||
// Note: we do not want the parenthesis | ||
func.find('(').map(|i| &func[0..i]) | ||
} | ||
// Nothing to hint display, return default error message | ||
ErrMsgType::None => None, | ||
} | ||
} | ||
|
||
/// Find the pattern for better hint display | ||
/// return the exact index where the pattern first appears | ||
fn find_target(input: &str, target: &str) -> Option<usize> { | ||
// Regex pattern to find `target` not preceded or followed by an ASCII letter | ||
// The pattern uses negative lookbehind (?<!...) and lookahead (?!...) to ensure | ||
// the target is not surrounded by ASCII alphabetic characters | ||
let pattern = format!(r"(?<![A-Za-z]){0}(?![A-Za-z])", fancy_regex::escape(target)); | ||
let Ok(re) = Regex::new(&pattern) else { | ||
return None; | ||
}; | ||
|
||
let Ok(Some(ma)) = re.find(input) else { | ||
return None; | ||
}; | ||
|
||
Some(ma.start()) | ||
} | ||
|
||
/// Create a mock `udf_context`, which is used for semantic check | ||
fn create_mock_udf_context( | ||
arg_types: Vec<DataType>, | ||
|
@@ -205,11 +275,47 @@ pub async fn handle_create_sql_function( | |
.into()); | ||
} | ||
} | ||
Err(e) => return Err(ErrorCode::InvalidInputSyntax(format!( | ||
"failed to conduct semantic check, please see if you are calling non-existence functions or parameters\ndetailed error message: {}", | ||
e.as_report() | ||
)) | ||
.into()), | ||
Err(e) => { | ||
if let ErrorCode::BindErrorRoot { expr: _, error } = e.inner() { | ||
let invalid_msg = error.to_string(); | ||
|
||
// First validate the message | ||
let err_msg_type = validate_err_msg(invalid_msg.as_str()); | ||
|
||
// Get the name of the invalid item | ||
// We will just display the first one found | ||
let Some(invalid_item_name) = | ||
extract_hint_display_target(err_msg_type, invalid_msg.as_str()) | ||
else { | ||
return Err( | ||
ErrorCode::InvalidInputSyntax(DEFAULT_ERR_MSG.into()).into() | ||
); | ||
}; | ||
|
||
// Find the invalid parameter / column / function | ||
let Some(idx) = find_target(body.as_str(), invalid_item_name) else { | ||
return Err( | ||
ErrorCode::InvalidInputSyntax(DEFAULT_ERR_MSG.into()).into() | ||
); | ||
}; | ||
|
||
// The exact error position for `^` to point to | ||
let position = format!( | ||
"{}{}", | ||
" ".repeat(idx + PROMPT.len() + 1), | ||
"^".repeat(invalid_item_name.len()) | ||
); | ||
|
||
return Err(ErrorCode::InvalidInputSyntax(format!( | ||
"{}\n{}\n{}`{}`\n{}", | ||
DEFAULT_ERR_MSG, invalid_msg, PROMPT, body, position | ||
)) | ||
.into()); | ||
} | ||
|
||
// Otherwise return the default error message | ||
return Err(ErrorCode::InvalidInputSyntax(DEFAULT_ERR_MSG.into()).into()); | ||
} | ||
} | ||
} else { | ||
return Err(ErrorCode::InvalidInputSyntax( | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Can we associate identifier names in this enum? This way,
validate_err_msg
andextract_hint_display_target
can be merged into one.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Actually I don't think this two could merge. 🤔
Without prior context on what the error message is for, we can't simply retrieve the hint target from the input message.