Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat(sql-udf): add hint display for non-existent functions #15188

Merged
merged 27 commits into from
Feb 29, 2024
Merged
Show file tree
Hide file tree
Changes from 25 commits
Commits
Show all changes
27 commits
Select commit Hold shift + click to select a range
f07dd5b
better display
xzhseh Feb 13, 2024
876f810
make default_err_msg & prompt constant
xzhseh Feb 13, 2024
fbf809c
Fix "cargo-hakari"
xzhseh Feb 13, 2024
d9f9360
fix check
xzhseh Feb 13, 2024
5a2efab
make pattern constant
xzhseh Feb 13, 2024
c31e285
update sql udf test
xzhseh Feb 13, 2024
44781a4
fix check
xzhseh Feb 13, 2024
a390d3d
adjust the display a little bit
xzhseh Feb 14, 2024
bbf48a6
tiny update
xzhseh Feb 14, 2024
119f528
add multi-line error message
xzhseh Feb 14, 2024
0e42253
add more test
xzhseh Feb 14, 2024
974455a
use word boundary instead of fragile manual lookahead
xzhseh Feb 19, 2024
5858a3c
use SQL_UDF_PATTERN instead of manual encoding
xzhseh Feb 19, 2024
dccf8fd
bring back lookahead technique when conducting regex pattern matching
xzhseh Feb 19, 2024
461fa4e
remove redundant debugging statement
xzhseh Feb 19, 2024
79e6bcc
add more test cases
xzhseh Feb 19, 2024
c94e0e5
Merge branch 'main' into xzhseh/sql-udf-creation-better-hint-display
xzhseh Feb 19, 2024
e9e94b4
return None if regex compilation fails
xzhseh Feb 20, 2024
f47a474
add hint display for non-existent function
xzhseh Feb 21, 2024
1005bc0
fix check
xzhseh Feb 21, 2024
96a0c3b
remove debug statement
xzhseh Feb 21, 2024
a4cf35a
fix check
xzhseh Feb 21, 2024
ce30232
update tests
xzhseh Feb 21, 2024
bce64a6
Update create_sql_function.rs
xzhseh Feb 21, 2024
32314b4
use contains instead of find
xzhseh Feb 22, 2024
66cc802
Merge branch 'main' into xzhseh/sql-udf-creation-better-funciton-hint…
xzhseh Feb 26, 2024
85814d0
fix check
xzhseh Feb 26, 2024
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 13 additions & 1 deletion Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

96 changes: 91 additions & 5 deletions e2e_test/udf/sql_udf.slt
Original file line number Diff line number Diff line change
Expand Up @@ -280,23 +280,109 @@ $1 + 114514 + $1

# Named sql udf with invalid parameter in body definition
# Will be rejected at creation time
statement error failed to find named parameter aa
statement error
create function unknown_parameter(a INT) returns int language sql as 'select a + aa + a';
----
db error: ERROR: Failed to run the query

Caused by:
Invalid input syntax: Failed to conduct semantic check
Bind error: [sql udf] failed to find named parameter aa
In SQL UDF definition: `select a + aa + a`
^^


# With unnamed parameter
statement error
create function unnamed_param_hint(INT, INT) returns int language sql as 'select $1 + $3 + $2';
----
db error: ERROR: Failed to run the query

Caused by:
Invalid input syntax: Failed to conduct semantic check
Bind error: [sql udf] failed to find unnamed parameter $3
In SQL UDF definition: `select $1 + $3 + $2`
^^


# A mixture of both
statement error
create function mix_hint(INT, aa INT, INT) returns int language sql as 'select $1 + aa + a + $2';
----
db error: ERROR: Failed to run the query

Caused by:
Invalid input syntax: Failed to conduct semantic check
Bind error: [sql udf] failed to find named parameter a
In SQL UDF definition: `select $1 + aa + a + $2`
^


# Long invalid parameter
statement error
create function long_invalid_param(i_am_valid INT) returns int language sql as
'select i_am_valid + _this_is_invalid_please_properly_mark_the_boundary_of_this_invalid_paramerter_ + i_am_valid';
----
db error: ERROR: Failed to run the query

Caused by:
Invalid input syntax: Failed to conduct semantic check
Bind error: [sql udf] failed to find named parameter _this_is_invalid_please_properly_mark_the_boundary_of_this_invalid_paramerter_
In SQL UDF definition: `select i_am_valid + _this_is_invalid_please_properly_mark_the_boundary_of_this_invalid_paramerter_ + i_am_valid`
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^


# sql udf with non-existent function
statement error
create function call_non_existent_function(INT) returns int language sql as 'select 1 + non_existent(1) + 1';
----
db error: ERROR: Failed to run the query

Caused by:
Invalid input syntax: Failed to conduct semantic check
function non_existent(integer) does not exist
In SQL UDF definition: `select 1 + non_existent(1) + 1`
^^^^^^^^^^^^


# First display invalid parameter
# if not found, try displaying non-existent functions
# if still not found, display default error message without hint
statement error
create function param_func_mix(a INT, b INT) returns int language sql as 'select a + b + c + not_be_displayed(c)';
----
db error: ERROR: Failed to run the query

Caused by:
Invalid input syntax: Failed to conduct semantic check
Bind error: [sql udf] failed to find named parameter c
In SQL UDF definition: `select a + b + c + not_be_displayed(c)`
^


statement error Expected end of statement, found: 💩
create function call_regexp_replace() returns varchar language sql as 'select regexp_replace('💩💩💩💩💩foo🤔️bar亲爱的😭baz这不是爱情❤️‍🔥', 'baz(...)', '这是🥵', 'ic')';

# Recursive definition can NOT be accepted at present due to semantic check
statement error failed to conduct semantic check, please see if you are calling non-existent functions
statement error
create function recursive(INT, INT) returns int language sql as 'select recursive($1, $2) + recursive($1, $2)';
----
db error: ERROR: Failed to run the query

Caused by:
Invalid input syntax: Failed to conduct semantic check
function recursive(integer, integer) does not exist
In SQL UDF definition: `select recursive($1, $2) + recursive($1, $2)`
^^^^^^^^^


# Complex but error-prone definition, recursive & normal sql udfs interleaving
statement error failed to conduct semantic check, please see if you are calling non-existent functions
statement error Failed to conduct semantic check
create function recursive_non_recursive(INT, INT) returns int language sql as 'select recursive($1, $2) + sub($1, $2)';

# Create a valid recursive function
# Please note we do NOT support actual running the recursive sql udf at present
statement error failed to conduct semantic check, please see if you are calling non-existent functions
statement error Failed to conduct semantic check
create function fib(INT) returns int
language sql as 'select case
when $1 = 0 then 0
Expand All @@ -307,7 +393,7 @@ create function fib(INT) returns int
end;';

# Calling a non-existent function
statement error failed to conduct semantic check, please see if you are calling non-existent functions
statement error Failed to conduct semantic check
create function non_exist(INT) returns int language sql as 'select yo(114514)';

# Try to create an sql udf with unnamed parameters whose return type mismatches with the sql body definition
Expand Down
1 change: 1 addition & 0 deletions src/frontend/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@ easy-ext = "1"
educe = "0.5"
either = "1"
enum-as-inner = "0.6"
fancy-regex = "0.11.0"
fixedbitset = "0.4.2"
futures = { version = "0.3", default-features = false, features = ["alloc"] }
futures-async-stream = { workspace = true }
Expand Down
5 changes: 4 additions & 1 deletion src/frontend/src/binder/expr/column.rs
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ use risingwave_sqlparser::ast::Ident;
use crate::binder::Binder;
use crate::error::{ErrorCode, Result};
use crate::expr::{CorrelatedInputRef, ExprImpl, ExprType, FunctionCall, InputRef, Literal};
use crate::handler::create_sql_function::SQL_UDF_PATTERN;

impl Binder {
pub fn bind_column(&mut self, idents: &[Ident]) -> Result<ExprImpl> {
Expand Down Expand Up @@ -54,8 +55,10 @@ impl Binder {
// there will not exist any column identifiers
// And invalid cases should already be caught
// during semantic check phase
// Note: the error message here also help with hint display
// when invalid definition occurs at sql udf creation time
return Err(ErrorCode::BindError(format!(
"failed to find named parameter {column_name}"
"{SQL_UDF_PATTERN} failed to find named parameter {column_name}"
))
.into());
}
Expand Down
7 changes: 7 additions & 0 deletions src/frontend/src/binder/expr/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ use crate::binder::expr::function::SYS_FUNCTION_WITHOUT_ARGS;
use crate::binder::Binder;
use crate::error::{ErrorCode, Result, RwError};
use crate::expr::{Expr as _, ExprImpl, ExprType, FunctionCall, InputRef, Parameter, SubqueryKind};
use crate::handler::create_sql_function::SQL_UDF_PATTERN;

mod binary_op;
mod column;
Expand Down Expand Up @@ -393,6 +394,12 @@ impl Binder {
if let Some(expr) = self.udf_context.get_expr(&format!("${index}")) {
return Ok(expr.clone());
}
// Same as `bind_column`, the error message here
// help with hint display when invalid definition occurs
return Err(ErrorCode::BindError(format!(
"{SQL_UDF_PATTERN} failed to find unnamed parameter ${index}"
))
.into());
}

Ok(Parameter::new(index, self.param_types.clone()).into())
Expand Down
118 changes: 112 additions & 6 deletions src/frontend/src/handler/create_sql_function.rs
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@

use std::collections::HashMap;

use fancy_regex::Regex;
use itertools::Itertools;
use pgwire::pg_response::StatementType;
use risingwave_common::catalog::FunctionId;
Expand All @@ -24,14 +25,83 @@ use risingwave_sqlparser::ast::{
CreateFunctionBody, FunctionDefinition, ObjectName, OperateFunctionArg,
};
use risingwave_sqlparser::parser::{Parser, ParserError};
use thiserror_ext::AsReport;

use super::*;
use crate::binder::UdfContext;
use crate::catalog::CatalogError;
use crate::expr::{Expr, ExprImpl, Literal};
use crate::{bind_data_type, Binder};

/// The error type for hint display
/// Currently we will try invalid parameter first
/// Then try to find non-existent functions
enum ErrMsgType {
Parameter,
Function,
Comment on lines +38 to +40
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can we associate identifier names in this enum? This way, validate_err_msg and extract_hint_display_target can be merged into one.

Suggested change
enum ErrMsgType {
Parameter,
Function,
enum ErrMsgType {
Parameter(String),
Function(String),

Copy link
Contributor Author

@xzhseh xzhseh Feb 26, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Actually I don't think this two could merge. 🤔
Without prior context on what the error message is for, we can't simply retrieve the hint target from the input message.

// Not yet support
None,
}

const DEFAULT_ERR_MSG: &str = "Failed to conduct semantic check";

/// Used for hint display
const PROMPT: &str = "In SQL UDF definition: ";

/// Used for detecting non-existent function
const FUNCTION_KEYWORD: &str = "function";

/// Used for detecting invalid parameters
pub const SQL_UDF_PATTERN: &str = "[sql udf]";

/// Validate the error message to see if
/// it's possible to improve the display to users
fn validate_err_msg(invalid_msg: &str) -> ErrMsgType {
// First try invalid parameters
if invalid_msg.contains(SQL_UDF_PATTERN) {
ErrMsgType::Parameter
} else if invalid_msg.contains(FUNCTION_KEYWORD) {
ErrMsgType::Function
} else {
// Nothing could be better display
ErrMsgType::None
}
}

/// Extract the target name to hint display
/// according to the type of the error message item
fn extract_hint_display_target(err_msg_type: ErrMsgType, invalid_msg: &str) -> Option<&str> {
match err_msg_type {
// e.g., [sql udf] failed to find named parameter <target name>
ErrMsgType::Parameter => invalid_msg.split_whitespace().last(),
// e.g., function <target name> does not exist
ErrMsgType::Function => {
let func = invalid_msg.split_whitespace().nth(1).unwrap_or("null");
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is it possible to extract targets directly from error types, instead of relying on formatted error messages?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Error type can not indicate [function / invalid parameter] name I'm assuming, so it seems impossible? 🥵

// Note: we do not want the parenthesis
func.find('(').map(|i| &func[0..i])
}
// Nothing to hint display, return default error message
ErrMsgType::None => None,
}
}

/// Find the pattern for better hint display
/// return the exact index where the pattern first appears
fn find_target(input: &str, target: &str) -> Option<usize> {
// Regex pattern to find `target` not preceded or followed by an ASCII letter
// The pattern uses negative lookbehind (?<!...) and lookahead (?!...) to ensure
// the target is not surrounded by ASCII alphabetic characters
let pattern = format!(r"(?<![A-Za-z]){0}(?![A-Za-z])", fancy_regex::escape(target));
let Ok(re) = Regex::new(&pattern) else {
return None;
};

let Ok(Some(ma)) = re.find(input) else {
return None;
};

Some(ma.start())
}

/// Create a mock `udf_context`, which is used for semantic check
fn create_mock_udf_context(
arg_types: Vec<DataType>,
Expand Down Expand Up @@ -205,11 +275,47 @@ pub async fn handle_create_sql_function(
.into());
}
}
Err(e) => return Err(ErrorCode::InvalidInputSyntax(format!(
"failed to conduct semantic check, please see if you are calling non-existence functions or parameters\ndetailed error message: {}",
e.as_report()
))
.into()),
Err(e) => {
if let ErrorCode::BindErrorRoot { expr: _, error } = e.inner() {
let invalid_msg = error.to_string();

// First validate the message
let err_msg_type = validate_err_msg(invalid_msg.as_str());

// Get the name of the invalid item
// We will just display the first one found
let Some(invalid_item_name) =
extract_hint_display_target(err_msg_type, invalid_msg.as_str())
else {
return Err(
ErrorCode::InvalidInputSyntax(DEFAULT_ERR_MSG.into()).into()
);
};

// Find the invalid parameter / column / function
let Some(idx) = find_target(body.as_str(), invalid_item_name) else {
return Err(
ErrorCode::InvalidInputSyntax(DEFAULT_ERR_MSG.into()).into()
);
};

// The exact error position for `^` to point to
let position = format!(
"{}{}",
" ".repeat(idx + PROMPT.len() + 1),
"^".repeat(invalid_item_name.len())
);

return Err(ErrorCode::InvalidInputSyntax(format!(
"{}\n{}\n{}`{}`\n{}",
DEFAULT_ERR_MSG, invalid_msg, PROMPT, body, position
))
.into());
}

// Otherwise return the default error message
return Err(ErrorCode::InvalidInputSyntax(DEFAULT_ERR_MSG.into()).into());
}
}
} else {
return Err(ErrorCode::InvalidInputSyntax(
Expand Down
1 change: 1 addition & 0 deletions src/workspace-hack/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@ aws-smithy-types = { version = "1", default-features = false, features = ["byte-
axum = { version = "0.6" }
base64 = { version = "0.21", features = ["alloc"] }
bigdecimal = { version = "0.4" }
bit-set = { version = "0.5" }
bit-vec = { version = "0.6" }
bitflags = { version = "2", default-features = false, features = ["serde", "std"] }
byteorder = { version = "1" }
Expand Down
Loading