risingwavelabs · xzhseh · Feb 29, 2024 · Feb 13, 2024 · Feb 13, 2024 · Feb 13, 2024
diff --git a/Cargo.lock b/Cargo.lock
diff --git a/e2e_test/udf/sql_udf.slt b/e2e_test/udf/sql_udf.slt
@@ -280,23 +280,109 @@ $1 + 114514 + $1
 
 # Named sql udf with invalid parameter in body definition
 # Will be rejected at creation time
-statement error failed to find named parameter aa
+statement error
 create function unknown_parameter(a INT) returns int language sql as 'select a + aa + a';
+----
+db error: ERROR: Failed to run the query
+
+Caused by:
+  Invalid input syntax: Failed to conduct semantic check
+Bind error: [sql udf] failed to find named parameter aa
+In SQL UDF definition: `select a + aa + a`
+                                   ^^
+
+
+# With unnamed parameter
+statement error
+create function unnamed_param_hint(INT, INT) returns int language sql as 'select $1 + $3 + $2';
+----
+db error: ERROR: Failed to run the query
+
+Caused by:
+  Invalid input syntax: Failed to conduct semantic check
+Bind error: [sql udf] failed to find unnamed parameter $3
+In SQL UDF definition: `select $1 + $3 + $2`
+                                    ^^
+
+
+# A mixture of both
+statement error
+create function mix_hint(INT, aa INT, INT) returns int language sql as 'select $1 + aa + a + $2';
+----
+db error: ERROR: Failed to run the query
+
+Caused by:
+  Invalid input syntax: Failed to conduct semantic check
+Bind error: [sql udf] failed to find named parameter a
+In SQL UDF definition: `select $1 + aa + a + $2`
+                                         ^
+
+
+# Long invalid parameter
+statement error
+create function long_invalid_param(i_am_valid INT) returns int language sql as
+  'select i_am_valid + _this_is_invalid_please_properly_mark_the_boundary_of_this_invalid_paramerter_ + i_am_valid';
+----
+db error: ERROR: Failed to run the query
+
+Caused by:
+  Invalid input syntax: Failed to conduct semantic check
+Bind error: [sql udf] failed to find named parameter _this_is_invalid_please_properly_mark_the_boundary_of_this_invalid_paramerter_
+In SQL UDF definition: `select i_am_valid + _this_is_invalid_please_properly_mark_the_boundary_of_this_invalid_paramerter_ + i_am_valid`
+                                            ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+
+# sql udf with non-existent function
+statement error
+create function call_non_existent_function(INT) returns int language sql as 'select 1 + non_existent(1) + 1';
+----
+db error: ERROR: Failed to run the query
+
+Caused by:
+  Invalid input syntax: Failed to conduct semantic check
+function non_existent(integer) does not exist
+In SQL UDF definition: `select 1 + non_existent(1) + 1`
+                                   ^^^^^^^^^^^^
+
+
+# First display invalid parameter
+# if not found, try displaying non-existent functions
+# if still not found, display default error message without hint
+statement error
+create function param_func_mix(a INT, b INT) returns int language sql as 'select a + b + c + not_be_displayed(c)';
+----
+db error: ERROR: Failed to run the query
+
+Caused by:
+  Invalid input syntax: Failed to conduct semantic check
+Bind error: [sql udf] failed to find named parameter c
+In SQL UDF definition: `select a + b + c + not_be_displayed(c)`
+                                       ^
+
 
 statement error Expected end of statement, found: 💩
 create function call_regexp_replace() returns varchar language sql as 'select regexp_replace('💩💩💩💩💩foo🤔️bar亲爱的😭baz这不是爱情❤️‍🔥', 'baz(...)', '这是🥵', 'ic')';
 
 # Recursive definition can NOT be accepted at present due to semantic check
-statement error failed to conduct semantic check, please see if you are calling non-existent functions
+statement error
 create function recursive(INT, INT) returns int language sql as 'select recursive($1, $2) + recursive($1, $2)';
+----
+db error: ERROR: Failed to run the query
+
+Caused by:
+  Invalid input syntax: Failed to conduct semantic check
+function recursive(integer, integer) does not exist
+In SQL UDF definition: `select recursive($1, $2) + recursive($1, $2)`
+                               ^^^^^^^^^
+
 
 # Complex but error-prone definition, recursive & normal sql udfs interleaving
-statement error failed to conduct semantic check, please see if you are calling non-existent functions
+statement error Failed to conduct semantic check
 create function recursive_non_recursive(INT, INT) returns int language sql as 'select recursive($1, $2) + sub($1, $2)';
 
 # Create a valid recursive function
 # Please note we do NOT support actual running the recursive sql udf at present
-statement error failed to conduct semantic check, please see if you are calling non-existent functions
+statement error Failed to conduct semantic check
 create function fib(INT) returns int
  language sql as 'select case
         when $1 = 0 then 0
@@ -307,7 +393,7 @@ create function fib(INT) returns int
       end;';
 
 # Calling a non-existent function
-statement error failed to conduct semantic check, please see if you are calling non-existent functions
+statement error Failed to conduct semantic check
 create function non_exist(INT) returns int language sql as 'select yo(114514)';
 
 # Try to create an sql udf with unnamed parameters whose return type mismatches with the sql body definition

diff --git a/src/frontend/Cargo.toml b/src/frontend/Cargo.toml
@@ -33,6 +33,7 @@ easy-ext = "1"
 educe = "0.5"
 either = "1"
 enum-as-inner = "0.6"
+fancy-regex = "0.11.0"
 fixedbitset = "0.4.2"
 futures = { version = "0.3", default-features = false, features = ["alloc"] }
 futures-async-stream = { workspace = true }

diff --git a/src/frontend/src/binder/expr/column.rs b/src/frontend/src/binder/expr/column.rs
@@ -18,6 +18,7 @@ use risingwave_sqlparser::ast::Ident;
 use crate::binder::Binder;
 use crate::error::{ErrorCode, Result};
 use crate::expr::{CorrelatedInputRef, ExprImpl, ExprType, FunctionCall, InputRef, Literal};
+use crate::handler::create_sql_function::SQL_UDF_PATTERN;
 
 impl Binder {
     pub fn bind_column(&mut self, idents: &[Ident]) -> Result<ExprImpl> {
@@ -54,8 +55,10 @@ impl Binder {
                 // there will not exist any column identifiers
                 // And invalid cases should already be caught
                 // during semantic check phase
+                // Note: the error message here also help with hint display
+                // when invalid definition occurs at sql udf creation time
                 return Err(ErrorCode::BindError(format!(
-                    "failed to find named parameter {column_name}"
+                    "{SQL_UDF_PATTERN} failed to find named parameter {column_name}"
                 ))
                 .into());
             }

diff --git a/src/frontend/src/binder/expr/mod.rs b/src/frontend/src/binder/expr/mod.rs
@@ -27,6 +27,7 @@ use crate::binder::expr::function::SYS_FUNCTION_WITHOUT_ARGS;
 use crate::binder::Binder;
 use crate::error::{ErrorCode, Result, RwError};
 use crate::expr::{Expr as _, ExprImpl, ExprType, FunctionCall, InputRef, Parameter, SubqueryKind};
+use crate::handler::create_sql_function::SQL_UDF_PATTERN;
 
 mod binary_op;
 mod column;
@@ -393,6 +394,12 @@ impl Binder {
             if let Some(expr) = self.udf_context.get_expr(&format!("${index}")) {
                 return Ok(expr.clone());
             }
+            // Same as `bind_column`, the error message here
+            // help with hint display when invalid definition occurs
+            return Err(ErrorCode::BindError(format!(
+                "{SQL_UDF_PATTERN} failed to find unnamed parameter ${index}"
+            ))
+            .into());
         }
 
         Ok(Parameter::new(index, self.param_types.clone()).into())

diff --git a/src/frontend/src/handler/create_sql_function.rs b/src/frontend/src/handler/create_sql_function.rs
@@ -14,6 +14,7 @@
 
 use std::collections::HashMap;
 
+use fancy_regex::Regex;
 use itertools::Itertools;
 use pgwire::pg_response::StatementType;
 use risingwave_common::catalog::FunctionId;
@@ -24,14 +25,83 @@ use risingwave_sqlparser::ast::{
     CreateFunctionBody, FunctionDefinition, ObjectName, OperateFunctionArg,
 };
 use risingwave_sqlparser::parser::{Parser, ParserError};
-use thiserror_ext::AsReport;
 
 use super::*;
 use crate::binder::UdfContext;
 use crate::catalog::CatalogError;
 use crate::expr::{Expr, ExprImpl, Literal};
 use crate::{bind_data_type, Binder};
 
+/// The error type for hint display
+/// Currently we will try invalid parameter first
+/// Then try to find non-existent functions
+enum ErrMsgType {
+    Parameter,
+    Function,
-enum ErrMsgType {
-    Parameter,
-    Function,
+enum ErrMsgType {
+    Parameter(String),
+    Function(String),
-enum ErrMsgType {
-    Parameter,
-    Function,
+enum ErrMsgType {
+    Parameter(String),
+    Function(String),
+    // Not yet support
+    None,
+}
+
+const DEFAULT_ERR_MSG: &str = "Failed to conduct semantic check";
+
+/// Used for hint display
+const PROMPT: &str = "In SQL UDF definition: ";
+
+/// Used for detecting non-existent function
+const FUNCTION_KEYWORD: &str = "function";
+
+/// Used for detecting invalid parameters
+pub const SQL_UDF_PATTERN: &str = "[sql udf]";
+
+/// Validate the error message to see if
+/// it's possible to improve the display to users
+fn validate_err_msg(invalid_msg: &str) -> ErrMsgType {
+    // First try invalid parameters
+    if invalid_msg.contains(SQL_UDF_PATTERN) {
+        ErrMsgType::Parameter
+    } else if invalid_msg.contains(FUNCTION_KEYWORD) {
+        ErrMsgType::Function
+    } else {
+        // Nothing could be better display
+        ErrMsgType::None
+    }
+}
+
+/// Extract the target name to hint display
+/// according to the type of the error message item
+fn extract_hint_display_target(err_msg_type: ErrMsgType, invalid_msg: &str) -> Option<&str> {
+    match err_msg_type {
+        // e.g., [sql udf] failed to find named parameter <target name>
+        ErrMsgType::Parameter => invalid_msg.split_whitespace().last(),
+        // e.g., function <target name> does not exist
+        ErrMsgType::Function => {
+            let func = invalid_msg.split_whitespace().nth(1).unwrap_or("null");
+            // Note: we do not want the parenthesis
+            func.find('(').map(|i| &func[0..i])
+        }
+        // Nothing to hint display, return default error message
+        ErrMsgType::None => None,
+    }
+}
+
+/// Find the pattern for better hint display
+/// return the exact index where the pattern first appears
+fn find_target(input: &str, target: &str) -> Option<usize> {
+    // Regex pattern to find `target` not preceded or followed by an ASCII letter
+    // The pattern uses negative lookbehind (?<!...) and lookahead (?!...) to ensure
+    // the target is not surrounded by ASCII alphabetic characters
+    let pattern = format!(r"(?<![A-Za-z]){0}(?![A-Za-z])", fancy_regex::escape(target));
+    let Ok(re) = Regex::new(&pattern) else {
+        return None;
+    };
+
+    let Ok(Some(ma)) = re.find(input) else {
+        return None;
+    };
+
+    Some(ma.start())
+}
+
 /// Create a mock `udf_context`, which is used for semantic check
 fn create_mock_udf_context(
     arg_types: Vec<DataType>,
@@ -205,11 +275,47 @@ pub async fn handle_create_sql_function(
                         .into());
                     }
                 }
-                Err(e) => return Err(ErrorCode::InvalidInputSyntax(format!(
-                    "failed to conduct semantic check, please see if you are calling non-existence functions or parameters\ndetailed error message: {}",
-                    e.as_report()
-                ))
-                .into()),
+                Err(e) => {
+                    if let ErrorCode::BindErrorRoot { expr: _, error } = e.inner() {
+                        let invalid_msg = error.to_string();
+
+                        // First validate the message
+                        let err_msg_type = validate_err_msg(invalid_msg.as_str());
+
+                        // Get the name of the invalid item
+                        // We will just display the first one found
+                        let Some(invalid_item_name) =
+                            extract_hint_display_target(err_msg_type, invalid_msg.as_str())
+                        else {
+                            return Err(
+                                ErrorCode::InvalidInputSyntax(DEFAULT_ERR_MSG.into()).into()
+                            );
+                        };
+
+                        // Find the invalid parameter / column / function
+                        let Some(idx) = find_target(body.as_str(), invalid_item_name) else {
+                            return Err(
+                                ErrorCode::InvalidInputSyntax(DEFAULT_ERR_MSG.into()).into()
+                            );
+                        };
+
+                        // The exact error position for `^` to point to
+                        let position = format!(
+                            "{}{}",
+                            " ".repeat(idx + PROMPT.len() + 1),
+                            "^".repeat(invalid_item_name.len())
+                        );
+
+                        return Err(ErrorCode::InvalidInputSyntax(format!(
+                            "{}\n{}\n{}`{}`\n{}",
+                            DEFAULT_ERR_MSG, invalid_msg, PROMPT, body, position
+                        ))
+                        .into());
+                    }
+
+                    // Otherwise return the default error message
+                    return Err(ErrorCode::InvalidInputSyntax(DEFAULT_ERR_MSG.into()).into());
+                }
             }
         } else {
             return Err(ErrorCode::InvalidInputSyntax(

diff --git a/src/workspace-hack/Cargo.toml b/src/workspace-hack/Cargo.toml
@@ -32,6 +32,7 @@ aws-smithy-types = { version = "1", default-features = false, features = ["byte-
 axum = { version = "0.6" }
 base64 = { version = "0.21", features = ["alloc"] }
 bigdecimal = { version = "0.4" }
+bit-set = { version = "0.5" }
 bit-vec = { version = "0.6" }
 bitflags = { version = "2", default-features = false, features = ["serde", "std"] }
 byteorder = { version = "1" }