Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

refactor(optimizer): record error contexts when casting composite types #19449

Merged
merged 10 commits into from
Nov 25, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 4 additions & 4 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -163,7 +163,7 @@ parquet = { version = "53.2", features = ["async"] }
mysql_async = { version = "0.34", default-features = false, features = [
"default",
] }
thiserror-ext = "0.1.2"
thiserror-ext = { version = "0.2.1", features = ["backtrace"] }
tikv-jemalloc-ctl = { git = "https://github.com/risingwavelabs/jemallocator.git", rev = "64a2d9" }
tikv-jemallocator = { git = "https://github.com/risingwavelabs/jemallocator.git", features = [
"profiling",
Expand Down
11 changes: 8 additions & 3 deletions e2e_test/batch/basic/dml_update.slt.part
Original file line number Diff line number Diff line change
Expand Up @@ -93,10 +93,15 @@ select * from t;
889 999

# Multiple assignments, to subquery with cast failure.
# TODO: this currently shows `cannot cast type "record" to "record"` because we wrap the subquery result
# into a struct, which is not quite clear.
statement error cannot cast type
statement error
update t set (v1, v2) = (select '888.88', 999);
----
db error: ERROR: Failed to run the query

Caused by these errors (recent errors listed first):
1: cannot cast type "record" to "record"
2: cannot cast type "character varying" to "integer" in Assign context


# Multiple assignments, to subquery with mismatched column count.
statement error number of columns does not match number of values
Expand Down
17 changes: 17 additions & 0 deletions src/frontend/planner_test/tests/testdata/input/cast.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -64,3 +64,20 @@
select count(*) FILTER(WHERE 'y') from t;
expected_outputs:
- batch_plan
- name: composite type cast error message (array)
sql: |
select '[]'::int[]::bytea[];
expected_outputs:
- binder_error
- name: composite type cast error message (struct)
sql: |
create table t (v struct<a struct<b int>, c bool>);
select v::struct<d struct<e bytea>, f bool> from t;
expected_outputs:
- binder_error
- name: composite type cast error message (map)
sql: |
create table t (v map(int, int));
select v::map(int, bytea) from t;
expected_outputs:
- binder_error
2 changes: 1 addition & 1 deletion src/frontend/planner_test/tests/testdata/output/array.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -228,7 +228,7 @@
sql: |
create table t (v1 varchar[]);
insert into t values ('{c,' || 'd}');
binder_error: 'Bind error: cannot cast type "character varying" to "character varying[]" in Assign context'
binder_error: cannot cast type "character varying" to "character varying[]" in Assign context
- name: string to varchar[] in explicit context
sql: |
select ('{c,' || 'd}')::varchar[];
Expand Down
33 changes: 33 additions & 0 deletions src/frontend/planner_test/tests/testdata/output/cast.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -80,3 +80,36 @@
└─BatchExchange { order: [], dist: Single }
└─BatchSimpleAgg { aggs: [count] }
└─BatchScan { table: t, columns: [], distribution: SomeShard }
- name: composite type cast error message (array)
sql: |
select '[]'::int[]::bytea[];
binder_error: |
Failed to bind expression: CAST(CAST('[]' AS INT[]) AS BYTEA[])

Caused by these errors (recent errors listed first):
1: cannot cast type "integer[]" to "bytea[]"
2: cannot cast type "integer" to "bytea" in Explicit context
- name: composite type cast error message (struct)
sql: |
create table t (v struct<a struct<b int>, c bool>);
select v::struct<d struct<e bytea>, f bool> from t;
binder_error: |
Failed to bind expression: CAST(v AS STRUCT<d STRUCT<e BYTEA>, f BOOLEAN>)

Caused by these errors (recent errors listed first):
1: cannot cast type "struct<a struct<b integer>, c boolean>" to "struct<d struct<e bytea>, f boolean>"
2: cannot cast struct field "a" to struct field "d"
3: cannot cast type "struct<b integer>" to "struct<e bytea>"
4: cannot cast struct field "b" to struct field "e"
5: cannot cast type "integer" to "bytea" in Explicit context
- name: composite type cast error message (map)
sql: |
create table t (v map(int, int));
select v::map(int, bytea) from t;
binder_error: |
Failed to bind expression: CAST(v AS MAP(INT,BYTEA))

Caused by these errors (recent errors listed first):
1: cannot cast type "map(integer,integer)" to "map(integer,bytea)"
2: cannot cast map value
3: cannot cast type "integer" to "bytea" in Explicit context
2 changes: 1 addition & 1 deletion src/frontend/planner_test/tests/testdata/output/expr.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -304,7 +304,7 @@
Failed to bind expression: concat_ws(v1, 1.2)

Caused by:
Bind error: cannot cast type "integer" to "character varying" in Implicit context
cannot cast type "integer" to "character varying" in Implicit context
- sql: |
create table t (v1 int);
select concat_ws() from t;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -421,7 +421,7 @@
- sql: |
CREATE TABLE a (c STRUCT<i STRUCT<a INTEGER>, j INTEGER>);
INSERT INTO a VALUES (1);
binder_error: 'Bind error: cannot cast type "integer" to "struct<i struct<a integer>, j integer>" in Assign context'
binder_error: cannot cast type "integer" to "struct<i struct<a integer>, j integer>" in Assign context
- name: test struct type alignment in CASE expression
sql: |
select CASE WHEN false THEN ROW(0, INTERVAL '1') WHEN true THEN ROW(1.1, INTERVAL '1') ELSE ROW(1, INTERVAL '1') END;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
- sql: |
create table t (v1 int, v2 int);
update t set v1 = true;
binder_error: 'Bind error: cannot cast type "boolean" to "integer" in Assign context'
binder_error: cannot cast type "boolean" to "integer" in Assign context
- sql: |
create table t (v1 int, v2 int);
update t set v1 = v2 + 1;
Expand Down
10 changes: 5 additions & 5 deletions src/frontend/src/catalog/system_catalog/pg_catalog/pg_cast.rs
Original file line number Diff line number Diff line change
Expand Up @@ -12,11 +12,12 @@
// See the License for the specific language governing permissions and
// limitations under the License.

use itertools::Itertools;
use risingwave_common::types::{DataType, Fields};
use risingwave_frontend_macro::system_catalog;

use crate::catalog::system_catalog::SysCatalogReaderImpl;
use crate::expr::cast_map_array;
use crate::expr::CAST_TABLE;

/// The catalog `pg_cast` stores data type conversion paths.
/// Ref: [`https://www.postgresql.org/docs/current/catalog-pg-cast.html`]
Expand All @@ -31,12 +32,11 @@ struct PgCast {

#[system_catalog(table, "pg_catalog.pg_cast")]
fn read_pg_cast(_: &SysCatalogReaderImpl) -> Vec<PgCast> {
let mut cast_array = cast_map_array();
cast_array.sort();
cast_array
CAST_TABLE
.iter()
.sorted()
.enumerate()
.map(|(idx, (src, target, ctx))| PgCast {
.map(|(idx, ((src, target), ctx))| PgCast {
oid: idx as i32,
castsource: DataType::try_from(*src).unwrap().to_oid(),
casttarget: DataType::try_from(*target).unwrap().to_oid(),
Expand Down
8 changes: 8 additions & 0 deletions src/frontend/src/error.rs
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,8 @@ use risingwave_rpc_client::error::{RpcError, TonicStatusWrapper};
use thiserror::Error;
use tokio::task::JoinError;

use crate::expr::CastError;

/// The error type for the frontend crate, acting as the top-level error type for the
/// entire RisingWave project.
// TODO(error-handling): this is migrated from the `common` crate, and there could
Expand Down Expand Up @@ -114,6 +116,12 @@ pub enum ErrorCode {
#[backtrace]
error: BoxedError,
},
#[error(transparent)]
CastError(
#[from]
#[backtrace]
CastError,
),
#[error("Catalog error: {0}")]
CatalogError(
#[source]
Expand Down
60 changes: 24 additions & 36 deletions src/frontend/src/expr/function_call.rs
Original file line number Diff line number Diff line change
Expand Up @@ -16,12 +16,11 @@ use itertools::Itertools;
use risingwave_common::catalog::Schema;
use risingwave_common::types::{DataType, ScalarImpl};
use risingwave_common::util::iter_util::ZipEqFast;
use thiserror::Error;
use thiserror_ext::AsReport;

use super::{cast_ok, infer_some_all, infer_type, CastContext, Expr, ExprImpl, Literal};
use crate::error::{ErrorCode, Result as RwResult};
use crate::expr::{ExprDisplay, ExprType, ExprVisitor, ImpureAnalyzer};
use super::type_inference::cast;
use super::{infer_some_all, infer_type, CastContext, CastError, Expr, ExprImpl, Literal};
use crate::error::Result as RwResult;
use crate::expr::{bail_cast_error, ExprDisplay, ExprType, ExprVisitor, ImpureAnalyzer};

#[derive(Clone, Eq, PartialEq, Hash)]
pub struct FunctionCall {
Expand Down Expand Up @@ -144,22 +143,23 @@ impl FunctionCall {
// else when eager parsing fails, just proceed as normal.
// Some callers are not ready to handle `'a'::int` error here.
}

let source = child.return_type();
if source == target {
Ok(())
// Casting from unknown is allowed in all context. And PostgreSQL actually does the parsing
// in frontend.
} else if child.is_untyped() || cast_ok(&source, &target, allows) {
// Always Ok below. Safe to mutate `child`.
let owned = std::mem::replace(child, ExprImpl::literal_bool(false));
*child = Self::new_unchecked(ExprType::Cast, vec![owned], target).into();
Ok(())
return Ok(());
}

if child.is_untyped() {
// Casting from unknown is allowed in all context. And PostgreSQL actually does the parsing
// in frontend.
} else {
Err(CastError(format!(
"cannot cast type \"{}\" to \"{}\" in {:?} context",
source, target, allows
)))
cast(&source, &target, allows)?;
}

// Always Ok below. Safe to mutate `child`.
let owned = std::mem::replace(child, ExprImpl::literal_bool(false));
*child = Self::new_unchecked(ExprType::Cast, vec![owned], target).into();
Ok(())
}

/// Cast a `ROW` expression to the target type. We intentionally disallow casting arbitrary
Expand All @@ -170,13 +170,13 @@ impl FunctionCall {
target_type: DataType,
allows: CastContext,
) -> Result<(), CastError> {
// Can only cast to a struct type.
let DataType::Struct(t) = &target_type else {
return Err(CastError(format!(
"cannot cast type \"{}\" to \"{}\" in {:?} context",
func.return_type(),
bail_cast_error!(
"cannot cast type \"{}\" to \"{}\"",
func.return_type(), // typically "record"
target_type,
allows
)));
);
};
match t.len().cmp(&func.inputs.len()) {
std::cmp::Ordering::Equal => {
Expand All @@ -189,10 +189,8 @@ impl FunctionCall {
func.return_type = target_type;
Ok(())
}
std::cmp::Ordering::Less => Err(CastError("Input has too few columns.".to_string())),
std::cmp::Ordering::Greater => {
Err(CastError("Input has too many columns.".to_string()))
}
std::cmp::Ordering::Less => bail_cast_error!("input has too few columns"),
std::cmp::Ordering::Greater => bail_cast_error!("input has too many columns"),
}
}

Expand Down Expand Up @@ -422,13 +420,3 @@ pub fn is_row_function(expr: &ExprImpl) -> bool {
}
false
}

#[derive(Debug, Error)]
#[error("{0}")]
pub struct CastError(pub(super) String);

impl From<CastError> for ErrorCode {
fn from(value: CastError) -> Self {
ErrorCode::BindError(value.to_report_string())
}
}
8 changes: 2 additions & 6 deletions src/frontend/src/expr/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -66,10 +66,7 @@ pub use risingwave_pb::expr::expr_node::Type as ExprType;
pub use session_timezone::{SessionTimezone, TimestamptzExprFinder};
pub use subquery::{Subquery, SubqueryKind};
pub use table_function::{TableFunction, TableFunctionType};
pub use type_inference::{
align_types, cast_map_array, cast_ok, cast_sigs, infer_some_all, infer_type, infer_type_name,
infer_type_with_sigmap, CastContext, CastSig, FuncSign,
};
pub use type_inference::*;
pub use user_defined_function::UserDefinedFunction;
pub use utils::*;
pub use window_function::WindowFunction;
Expand Down Expand Up @@ -300,7 +297,7 @@ impl ExprImpl {
))),
DataType::Int32 => Ok(self),
dt if dt.is_int() => Ok(self.cast_explicit(DataType::Int32)?),
_ => Err(CastError("Unsupported input type".to_string())),
_ => bail_cast_error!("unsupported input type"),
}
}

Expand Down Expand Up @@ -1171,7 +1168,6 @@ use risingwave_common::bail;
use risingwave_common::catalog::Schema;
use risingwave_common::row::OwnedRow;

use self::function_call::CastError;
use crate::binder::BoundSetExpr;
use crate::utils::Condition;

Expand Down
Loading
Loading