Skip to content

Commit

Permalink
refactor(optimizer): record error contexts when casting composite typ…
Browse files Browse the repository at this point in the history
…es (#19449)

Signed-off-by: Bugen Zhao <[email protected]>
  • Loading branch information
BugenZhao authored Nov 25, 2024
1 parent b4bca57 commit 9abd62a
Show file tree
Hide file tree
Showing 15 changed files with 229 additions and 98 deletions.
8 changes: 4 additions & 4 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -163,7 +163,7 @@ parquet = { version = "53.2", features = ["async"] }
mysql_async = { version = "0.34", default-features = false, features = [
"default",
] }
thiserror-ext = "0.1.2"
thiserror-ext = { version = "0.2.1", features = ["backtrace"] }
tikv-jemalloc-ctl = { git = "https://github.com/risingwavelabs/jemallocator.git", rev = "64a2d9" }
tikv-jemallocator = { git = "https://github.com/risingwavelabs/jemallocator.git", features = [
"profiling",
Expand Down
11 changes: 8 additions & 3 deletions e2e_test/batch/basic/dml_update.slt.part
Original file line number Diff line number Diff line change
Expand Up @@ -93,10 +93,15 @@ select * from t;
889 999

# Multiple assignments, to subquery with cast failure.
# TODO: this currently shows `cannot cast type "record" to "record"` because we wrap the subquery result
# into a struct, which is not quite clear.
statement error cannot cast type
statement error
update t set (v1, v2) = (select '888.88', 999);
----
db error: ERROR: Failed to run the query

Caused by these errors (recent errors listed first):
1: cannot cast type "record" to "record"
2: cannot cast type "character varying" to "integer" in Assign context


# Multiple assignments, to subquery with mismatched column count.
statement error number of columns does not match number of values
Expand Down
17 changes: 17 additions & 0 deletions src/frontend/planner_test/tests/testdata/input/cast.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -64,3 +64,20 @@
select count(*) FILTER(WHERE 'y') from t;
expected_outputs:
- batch_plan
- name: composite type cast error message (array)
sql: |
select '[]'::int[]::bytea[];
expected_outputs:
- binder_error
- name: composite type cast error message (struct)
sql: |
create table t (v struct<a struct<b int>, c bool>);
select v::struct<d struct<e bytea>, f bool> from t;
expected_outputs:
- binder_error
- name: composite type cast error message (map)
sql: |
create table t (v map(int, int));
select v::map(int, bytea) from t;
expected_outputs:
- binder_error
2 changes: 1 addition & 1 deletion src/frontend/planner_test/tests/testdata/output/array.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -228,7 +228,7 @@
sql: |
create table t (v1 varchar[]);
insert into t values ('{c,' || 'd}');
binder_error: 'Bind error: cannot cast type "character varying" to "character varying[]" in Assign context'
binder_error: cannot cast type "character varying" to "character varying[]" in Assign context
- name: string to varchar[] in explicit context
sql: |
select ('{c,' || 'd}')::varchar[];
Expand Down
33 changes: 33 additions & 0 deletions src/frontend/planner_test/tests/testdata/output/cast.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -80,3 +80,36 @@
└─BatchExchange { order: [], dist: Single }
└─BatchSimpleAgg { aggs: [count] }
└─BatchScan { table: t, columns: [], distribution: SomeShard }
- name: composite type cast error message (array)
sql: |
select '[]'::int[]::bytea[];
binder_error: |
Failed to bind expression: CAST(CAST('[]' AS INT[]) AS BYTEA[])
Caused by these errors (recent errors listed first):
1: cannot cast type "integer[]" to "bytea[]"
2: cannot cast type "integer" to "bytea" in Explicit context
- name: composite type cast error message (struct)
sql: |
create table t (v struct<a struct<b int>, c bool>);
select v::struct<d struct<e bytea>, f bool> from t;
binder_error: |
Failed to bind expression: CAST(v AS STRUCT<d STRUCT<e BYTEA>, f BOOLEAN>)
Caused by these errors (recent errors listed first):
1: cannot cast type "struct<a struct<b integer>, c boolean>" to "struct<d struct<e bytea>, f boolean>"
2: cannot cast struct field "a" to struct field "d"
3: cannot cast type "struct<b integer>" to "struct<e bytea>"
4: cannot cast struct field "b" to struct field "e"
5: cannot cast type "integer" to "bytea" in Explicit context
- name: composite type cast error message (map)
sql: |
create table t (v map(int, int));
select v::map(int, bytea) from t;
binder_error: |
Failed to bind expression: CAST(v AS MAP(INT,BYTEA))
Caused by these errors (recent errors listed first):
1: cannot cast type "map(integer,integer)" to "map(integer,bytea)"
2: cannot cast map value
3: cannot cast type "integer" to "bytea" in Explicit context
2 changes: 1 addition & 1 deletion src/frontend/planner_test/tests/testdata/output/expr.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -304,7 +304,7 @@
Failed to bind expression: concat_ws(v1, 1.2)
Caused by:
Bind error: cannot cast type "integer" to "character varying" in Implicit context
cannot cast type "integer" to "character varying" in Implicit context
- sql: |
create table t (v1 int);
select concat_ws() from t;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -421,7 +421,7 @@
- sql: |
CREATE TABLE a (c STRUCT<i STRUCT<a INTEGER>, j INTEGER>);
INSERT INTO a VALUES (1);
binder_error: 'Bind error: cannot cast type "integer" to "struct<i struct<a integer>, j integer>" in Assign context'
binder_error: cannot cast type "integer" to "struct<i struct<a integer>, j integer>" in Assign context
- name: test struct type alignment in CASE expression
sql: |
select CASE WHEN false THEN ROW(0, INTERVAL '1') WHEN true THEN ROW(1.1, INTERVAL '1') ELSE ROW(1, INTERVAL '1') END;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
- sql: |
create table t (v1 int, v2 int);
update t set v1 = true;
binder_error: 'Bind error: cannot cast type "boolean" to "integer" in Assign context'
binder_error: cannot cast type "boolean" to "integer" in Assign context
- sql: |
create table t (v1 int, v2 int);
update t set v1 = v2 + 1;
Expand Down
10 changes: 5 additions & 5 deletions src/frontend/src/catalog/system_catalog/pg_catalog/pg_cast.rs
Original file line number Diff line number Diff line change
Expand Up @@ -12,11 +12,12 @@
// See the License for the specific language governing permissions and
// limitations under the License.

use itertools::Itertools;
use risingwave_common::types::{DataType, Fields};
use risingwave_frontend_macro::system_catalog;

use crate::catalog::system_catalog::SysCatalogReaderImpl;
use crate::expr::cast_map_array;
use crate::expr::CAST_TABLE;

/// The catalog `pg_cast` stores data type conversion paths.
/// Ref: [`https://www.postgresql.org/docs/current/catalog-pg-cast.html`]
Expand All @@ -31,12 +32,11 @@ struct PgCast {

#[system_catalog(table, "pg_catalog.pg_cast")]
fn read_pg_cast(_: &SysCatalogReaderImpl) -> Vec<PgCast> {
let mut cast_array = cast_map_array();
cast_array.sort();
cast_array
CAST_TABLE
.iter()
.sorted()
.enumerate()
.map(|(idx, (src, target, ctx))| PgCast {
.map(|(idx, ((src, target), ctx))| PgCast {
oid: idx as i32,
castsource: DataType::try_from(*src).unwrap().to_oid(),
casttarget: DataType::try_from(*target).unwrap().to_oid(),
Expand Down
8 changes: 8 additions & 0 deletions src/frontend/src/error.rs
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,8 @@ use risingwave_rpc_client::error::{RpcError, TonicStatusWrapper};
use thiserror::Error;
use tokio::task::JoinError;

use crate::expr::CastError;

/// The error type for the frontend crate, acting as the top-level error type for the
/// entire RisingWave project.
// TODO(error-handling): this is migrated from the `common` crate, and there could
Expand Down Expand Up @@ -114,6 +116,12 @@ pub enum ErrorCode {
#[backtrace]
error: BoxedError,
},
#[error(transparent)]
CastError(
#[from]
#[backtrace]
CastError,
),
#[error("Catalog error: {0}")]
CatalogError(
#[source]
Expand Down
60 changes: 24 additions & 36 deletions src/frontend/src/expr/function_call.rs
Original file line number Diff line number Diff line change
Expand Up @@ -16,12 +16,11 @@ use itertools::Itertools;
use risingwave_common::catalog::Schema;
use risingwave_common::types::{DataType, ScalarImpl};
use risingwave_common::util::iter_util::ZipEqFast;
use thiserror::Error;
use thiserror_ext::AsReport;

use super::{cast_ok, infer_some_all, infer_type, CastContext, Expr, ExprImpl, Literal};
use crate::error::{ErrorCode, Result as RwResult};
use crate::expr::{ExprDisplay, ExprType, ExprVisitor, ImpureAnalyzer};
use super::type_inference::cast;
use super::{infer_some_all, infer_type, CastContext, CastError, Expr, ExprImpl, Literal};
use crate::error::Result as RwResult;
use crate::expr::{bail_cast_error, ExprDisplay, ExprType, ExprVisitor, ImpureAnalyzer};

#[derive(Clone, Eq, PartialEq, Hash)]
pub struct FunctionCall {
Expand Down Expand Up @@ -144,22 +143,23 @@ impl FunctionCall {
// else when eager parsing fails, just proceed as normal.
// Some callers are not ready to handle `'a'::int` error here.
}

let source = child.return_type();
if source == target {
Ok(())
// Casting from unknown is allowed in all context. And PostgreSQL actually does the parsing
// in frontend.
} else if child.is_untyped() || cast_ok(&source, &target, allows) {
// Always Ok below. Safe to mutate `child`.
let owned = std::mem::replace(child, ExprImpl::literal_bool(false));
*child = Self::new_unchecked(ExprType::Cast, vec![owned], target).into();
Ok(())
return Ok(());
}

if child.is_untyped() {
// Casting from unknown is allowed in all context. And PostgreSQL actually does the parsing
// in frontend.
} else {
Err(CastError(format!(
"cannot cast type \"{}\" to \"{}\" in {:?} context",
source, target, allows
)))
cast(&source, &target, allows)?;
}

// Always Ok below. Safe to mutate `child`.
let owned = std::mem::replace(child, ExprImpl::literal_bool(false));
*child = Self::new_unchecked(ExprType::Cast, vec![owned], target).into();
Ok(())
}

/// Cast a `ROW` expression to the target type. We intentionally disallow casting arbitrary
Expand All @@ -170,13 +170,13 @@ impl FunctionCall {
target_type: DataType,
allows: CastContext,
) -> Result<(), CastError> {
// Can only cast to a struct type.
let DataType::Struct(t) = &target_type else {
return Err(CastError(format!(
"cannot cast type \"{}\" to \"{}\" in {:?} context",
func.return_type(),
bail_cast_error!(
"cannot cast type \"{}\" to \"{}\"",
func.return_type(), // typically "record"
target_type,
allows
)));
);
};
match t.len().cmp(&func.inputs.len()) {
std::cmp::Ordering::Equal => {
Expand All @@ -189,10 +189,8 @@ impl FunctionCall {
func.return_type = target_type;
Ok(())
}
std::cmp::Ordering::Less => Err(CastError("Input has too few columns.".to_string())),
std::cmp::Ordering::Greater => {
Err(CastError("Input has too many columns.".to_string()))
}
std::cmp::Ordering::Less => bail_cast_error!("input has too few columns"),
std::cmp::Ordering::Greater => bail_cast_error!("input has too many columns"),
}
}

Expand Down Expand Up @@ -422,13 +420,3 @@ pub fn is_row_function(expr: &ExprImpl) -> bool {
}
false
}

#[derive(Debug, Error)]
#[error("{0}")]
pub struct CastError(pub(super) String);

impl From<CastError> for ErrorCode {
fn from(value: CastError) -> Self {
ErrorCode::BindError(value.to_report_string())
}
}
8 changes: 2 additions & 6 deletions src/frontend/src/expr/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -66,10 +66,7 @@ pub use risingwave_pb::expr::expr_node::Type as ExprType;
pub use session_timezone::{SessionTimezone, TimestamptzExprFinder};
pub use subquery::{Subquery, SubqueryKind};
pub use table_function::{TableFunction, TableFunctionType};
pub use type_inference::{
align_types, cast_map_array, cast_ok, cast_sigs, infer_some_all, infer_type, infer_type_name,
infer_type_with_sigmap, CastContext, CastSig, FuncSign,
};
pub use type_inference::*;
pub use user_defined_function::UserDefinedFunction;
pub use utils::*;
pub use window_function::WindowFunction;
Expand Down Expand Up @@ -300,7 +297,7 @@ impl ExprImpl {
))),
DataType::Int32 => Ok(self),
dt if dt.is_int() => Ok(self.cast_explicit(DataType::Int32)?),
_ => Err(CastError("Unsupported input type".to_string())),
_ => bail_cast_error!("unsupported input type"),
}
}

Expand Down Expand Up @@ -1171,7 +1168,6 @@ use risingwave_common::bail;
use risingwave_common::catalog::Schema;
use risingwave_common::row::OwnedRow;

use self::function_call::CastError;
use crate::binder::BoundSetExpr;
use crate::utils::Condition;

Expand Down
Loading

0 comments on commit 9abd62a

Please sign in to comment.