diff --git a/src/expr/core/src/sig/cast.rs b/src/expr/core/src/sig/cast.rs deleted file mode 100644 index 73841a85e303e..0000000000000 --- a/src/expr/core/src/sig/cast.rs +++ /dev/null @@ -1,143 +0,0 @@ -// Copyright 2023 RisingWave Labs -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -use std::collections::BTreeMap; -use std::sync::LazyLock; - -use parse_display::Display; -use risingwave_common::types::DataTypeName; - -#[derive(Clone, Debug)] -pub struct CastSig { - pub from_type: DataTypeName, - pub to_type: DataTypeName, - pub context: CastContext, -} - -/// The context a cast operation is invoked in. An implicit cast operation is allowed in a context -/// that allows explicit casts, but not vice versa. See details in -/// [PG](https://www.postgresql.org/docs/current/catalog-pg-cast.html). -#[derive(Clone, Copy, Debug, Display, Eq, Ord, PartialEq, PartialOrd)] -pub enum CastContext { - #[display("i")] - Implicit, - #[display("a")] - Assign, - #[display("e")] - Explicit, -} - -pub type CastMap = BTreeMap<(DataTypeName, DataTypeName), CastContext>; - -pub fn cast_sigs() -> impl Iterator { - CAST_MAP - .iter() - .map(|((from_type, to_type), context)| CastSig { - from_type: *from_type, - to_type: *to_type, - context: *context, - }) -} - -pub static CAST_MAP: LazyLock = LazyLock::new(|| { - use DataTypeName as T; - - // Implicit cast operations in PG are organized in 3 sequences, with the reverse direction being - // assign cast operations. - // https://github.com/postgres/postgres/blob/e0064f0ff6dfada2695330c6bc1945fa7ae813be/src/include/catalog/pg_cast.dat#L18-L20 - let mut m = BTreeMap::new(); - insert_cast_seq( - &mut m, - &[ - T::Int16, - T::Int32, - T::Int64, - T::Decimal, - T::Float32, - T::Float64, - ], - ); - insert_cast_seq(&mut m, &[T::Date, T::Timestamp, T::Timestamptz]); - insert_cast_seq(&mut m, &[T::Time, T::Interval]); - - // Casting to and from string type. - for t in [ - T::Boolean, - T::Int16, - T::Int32, - T::Int64, - T::Int256, - T::Decimal, - T::Float32, - T::Float64, - T::Date, - T::Timestamp, - T::Timestamptz, - T::Time, - T::Interval, - T::Jsonb, - T::Bytea, - ] { - m.insert((t, T::Varchar), CastContext::Assign); - m.insert((T::Varchar, t), CastContext::Explicit); - } - - // Casting between `decimal`, `int256`, and `float` is not allowed. - m.insert((T::Int16, T::Int256), CastContext::Implicit); - m.insert((T::Int32, T::Int256), CastContext::Implicit); - m.insert((T::Int64, T::Int256), CastContext::Implicit); - - m.insert((T::Int256, T::Float64), CastContext::Explicit); - - // Misc casts allowed by PG that are neither in implicit cast sequences nor from/to string. - m.insert((T::Timestamp, T::Time), CastContext::Assign); - m.insert((T::Timestamptz, T::Time), CastContext::Assign); - m.insert((T::Boolean, T::Int32), CastContext::Explicit); - m.insert((T::Int32, T::Boolean), CastContext::Explicit); - - // Casting from jsonb to bool / number. - for t in [ - T::Boolean, - T::Int16, - T::Int32, - T::Int64, - T::Decimal, - T::Float32, - T::Float64, - ] { - m.insert((T::Jsonb, t), CastContext::Explicit); - } - - m -}); - -fn insert_cast_seq( - m: &mut BTreeMap<(DataTypeName, DataTypeName), CastContext>, - types: &[DataTypeName], -) { - for (source_index, source_type) in types.iter().enumerate() { - for (target_index, target_type) in types.iter().enumerate() { - let cast_context = match source_index.cmp(&target_index) { - std::cmp::Ordering::Less => CastContext::Implicit, - // Unnecessary cast between the same type should have been removed. - // Note that sizing cast between `NUMERIC(18, 3)` and `NUMERIC(20, 4)` or between - // `int` and `int not null` may still be necessary. But we do not have such types - // yet. - std::cmp::Ordering::Equal => continue, - std::cmp::Ordering::Greater => CastContext::Assign, - }; - m.insert((*source_type, *target_type), cast_context); - } - } -} diff --git a/src/expr/core/src/sig/mod.rs b/src/expr/core/src/sig/mod.rs index 738b4f6b9eaf9..32593607381c5 100644 --- a/src/expr/core/src/sig/mod.rs +++ b/src/expr/core/src/sig/mod.rs @@ -29,8 +29,6 @@ use crate::expr::BoxedExpression; use crate::table_function::BoxedTableFunction; use crate::ExprError; -pub mod cast; - /// The global registry of all function signatures. pub static FUNCTION_REGISTRY: LazyLock = LazyLock::new(|| unsafe { // SAFETY: this function is called after all `#[ctor]` functions are called. diff --git a/src/frontend/src/expr/type_inference/cast.rs b/src/frontend/src/expr/type_inference/cast.rs index b941732a2a720..ccd99048557ee 100644 --- a/src/frontend/src/expr/type_inference/cast.rs +++ b/src/frontend/src/expr/type_inference/cast.rs @@ -12,11 +12,14 @@ // See the License for the specific language governing permissions and // limitations under the License. +use std::collections::BTreeMap; +use std::sync::LazyLock; + use itertools::Itertools as _; +use parse_display::Display; use risingwave_common::error::ErrorCode; use risingwave_common::types::{DataType, DataTypeName}; use risingwave_common::util::iter_util::ZipEqFast; -pub use risingwave_expr::sig::cast::*; use crate::expr::{Expr as _, ExprImpl, InputRef, Literal}; @@ -165,6 +168,87 @@ pub fn cast_map_array() -> Vec<(DataTypeName, DataTypeName, CastContext)> { .collect_vec() } +#[derive(Clone, Debug)] +pub struct CastSig { + pub from_type: DataTypeName, + pub to_type: DataTypeName, + pub context: CastContext, +} + +/// The context a cast operation is invoked in. An implicit cast operation is allowed in a context +/// that allows explicit casts, but not vice versa. See details in +/// [PG](https://www.postgresql.org/docs/current/catalog-pg-cast.html). +#[derive(Clone, Copy, Debug, Display, Eq, Ord, PartialEq, PartialOrd)] +pub enum CastContext { + #[display("i")] + Implicit, + #[display("a")] + Assign, + #[display("e")] + Explicit, +} + +pub type CastMap = BTreeMap<(DataTypeName, DataTypeName), CastContext>; + +pub fn cast_sigs() -> impl Iterator { + CAST_MAP + .iter() + .map(|((from_type, to_type), context)| CastSig { + from_type: *from_type, + to_type: *to_type, + context: *context, + }) +} + +pub static CAST_MAP: LazyLock = LazyLock::new(|| { + // cast rules: + // 1. implicit cast operations in PG are organized in 3 sequences, + // with the reverse direction being assign cast operations. + // https://github.com/postgres/postgres/blob/e0064f0ff6dfada2695330c6bc1945fa7ae813be/src/include/catalog/pg_cast.dat#L18-L20 + // 1. int2 -> int4 -> int8 -> numeric -> float4 -> float8 + // 2. date -> timestamp -> timestamptz + // 3. time -> interval + // 2. any -> varchar is assign and varchar -> any is explicit + // 3. jsonb -> bool/number is explicit + // 4. int32 <-> bool is explicit + // 5. timestamp/timestamptz -> time is assign + // 6. int2/int4/int8 -> int256 is implicit and int256 -> float8 is explicit + use DataTypeName::*; + const CAST_TABLE: &[(&str, DataTypeName)] = &[ + // 123456789ABCDEF + (". e a", Boolean), // 0 + (" .iiiiii a", Int16), // 1 + ("ea.iiiii a", Int32), // 2 + (" aa.iiii a", Int64), // 3 + (" aaa.ii a", Decimal), // 4 + (" aaaa.i a", Float32), // 5 + (" aaaaa. a", Float64), // 6 + (" e. a", Int256), // 7 + (" .ii a", Date), // 8 + (" a.ia a", Timestamp), // 9 + (" aa.a a", Timestamptz), // A + (" .i a", Time), // B + (" a. a", Interval), // C + ("eeeeeee . a", Jsonb), // D + (" .a", Bytea), // E + ("eeeeeeeeeeeeeee.", Varchar), // F + ]; + let mut map = BTreeMap::new(); + for (row, source) in CAST_TABLE { + for ((_, target), c) in CAST_TABLE.iter().zip_eq_fast(row.bytes()) { + let context = match c { + b' ' | b'.' => continue, + b'i' => CastContext::Implicit, + b'a' => CastContext::Assign, + b'e' => CastContext::Explicit, + _ => unreachable!("invalid cast table char"), + }; + map.insert((*source, *target), context); + } + } + map +}); + #[cfg(test)] mod tests { use super::*; diff --git a/src/tests/sqlsmith/src/sql_gen/expr.rs b/src/tests/sqlsmith/src/sql_gen/expr.rs index 9999dcd9ea641..b1b895e492b63 100644 --- a/src/tests/sqlsmith/src/sql_gen/expr.rs +++ b/src/tests/sqlsmith/src/sql_gen/expr.rs @@ -16,8 +16,8 @@ use itertools::Itertools; use rand::seq::SliceRandom; use rand::Rng; use risingwave_common::types::{DataType, DataTypeName, StructType}; -use risingwave_expr::sig::cast::cast_sigs; use risingwave_expr::sig::FUNCTION_REGISTRY; +use risingwave_frontend::expr::cast_sigs; use risingwave_sqlparser::ast::{Expr, Ident, OrderByExpr, Value}; use crate::sql_gen::types::data_type_to_ast_data_type; diff --git a/src/tests/sqlsmith/src/sql_gen/types.rs b/src/tests/sqlsmith/src/sql_gen/types.rs index 5ef762f9951ad..ca462bb9f3601 100644 --- a/src/tests/sqlsmith/src/sql_gen/types.rs +++ b/src/tests/sqlsmith/src/sql_gen/types.rs @@ -20,9 +20,8 @@ use std::sync::LazyLock; use itertools::Itertools; use risingwave_common::types::{DataType, DataTypeName}; use risingwave_expr::aggregate::AggKind; -use risingwave_expr::sig::cast::{cast_sigs, CastContext, CastSig as RwCastSig}; use risingwave_expr::sig::{FuncSign, FUNCTION_REGISTRY}; -use risingwave_frontend::expr::ExprType; +use risingwave_frontend::expr::{cast_sigs, CastContext, CastSig as RwCastSig, ExprType}; use risingwave_sqlparser::ast::{BinaryOperator, DataType as AstDataType, StructField}; pub(super) fn data_type_to_ast_data_type(data_type: &DataType) -> AstDataType {