diff --git a/e2e_test/batch/basic/collation.slt.part b/e2e_test/batch/basic/collation.slt.part new file mode 100644 index 0000000000000..54e657eaed9c8 --- /dev/null +++ b/e2e_test/batch/basic/collation.slt.part @@ -0,0 +1,108 @@ +statement ok +SET RW_IMPLICIT_FLUSH TO true; + +# collate in select + +query T +select 'a' collate "C"; +---- +a + +query B +select 'a' < 'b' collate "C"; +---- +t + +query B +select 'b' < 'a' collate "POSIX"; +---- +f + +query T +select (varchar 't') collate "C"; +---- +t + +# case-sensitive +statement error +select 'a' collate "c"; + +# case-sensitive +statement error +select 'a' collate "posix"; + +# case-sensitive +statement error +select 'a' collate "pOsIx"; + +query BT +select 'a' < 'b', 'a' collate "C"; +---- +t a + +query B +select 'a' < ('b' collate "C"); +---- +t + +query B +select '1' collate "C" > 2; +---- +f + +query B +select ('1' collate "C") = 1; +---- +t + +query I +select '10' collate "C" - 1; +---- +9 + +# only `text`, 'varchar' and `char` are built-in collatable types (in PostgreSQL) +# the type of `('a' < 'b')` is Bool, it SHOULD be failed, +statement error +select ('a' < 'b') collate "C"; + +statement error +select 123 collate "C"; + +# parser issue +statement error +select varchar 't' collate "C"; + +statement error +select 'a' collate "Invalid"; + +# collate in create table + +statement ok +create table t1 ( + a text collate "C", + b int +); + +statement ok +create table t2 ( + a text collate "POSIX", + b int +); + +statement error +create table t3 ( + a text collate "pOsIx", + b int +); + +statement error +create table t4 ( + a text collate "POSIX", + b int collate "C" +); + +statement ok +drop table t1; + +statement ok +drop table t2; diff --git a/src/frontend/src/binder/expr/mod.rs b/src/frontend/src/binder/expr/mod.rs index 4433605b03a54..eacd5fc95dd65 100644 --- a/src/frontend/src/binder/expr/mod.rs +++ b/src/frontend/src/binder/expr/mod.rs @@ -193,6 +193,7 @@ impl Binder { count, } => self.bind_overlay(*expr, *new_substring, *start, count), Expr::Parameter { index } => self.bind_parameter(index), + Expr::Collate { expr, collation } => self.bind_collate(*expr, collation), _ => Err(ErrorCode::NotImplemented( format!("unsupported expression {:?}", expr), 112.into(), @@ -561,6 +562,33 @@ impl Binder { let lhs = self.bind_expr_inner(expr)?; lhs.cast_explicit(data_type).map_err(Into::into) } + + pub fn bind_collate(&mut self, expr: Expr, collation: ObjectName) -> Result { + if !["C", "POSIX"].contains(&collation.real_value().as_str()) { + return Err(ErrorCode::NotImplemented( + "Collate collation other than `C` or `POSIX` is not implemented".into(), + None.into(), + ) + .into()); + } + + let bound_inner = self.bind_expr_inner(expr)?; + let ret_type = bound_inner.return_type(); + + match ret_type { + DataType::Varchar => {} + _ => { + return Err(ErrorCode::NotSupported( + format!("{} is not a collatable data type", ret_type), + "The only built-in collatable data types are `varchar`, please check your type" + .into(), + ) + .into()); + } + } + + Ok(bound_inner) + } } /// Given a type `STRUCT`, this function binds the field `v1 int`. diff --git a/src/frontend/src/handler/create_table.rs b/src/frontend/src/handler/create_table.rs index 805931d6c3835..e3d9490eecff2 100644 --- a/src/frontend/src/handler/create_table.rs +++ b/src/frontend/src/handler/create_table.rs @@ -170,11 +170,28 @@ pub fn bind_sql_columns(column_defs: &[ColumnDef]) -> Result> .clone() .ok_or_else(|| ErrorCode::InvalidInputSyntax("data type is not specified".into()))?; if let Some(collation) = collation { - return Err(ErrorCode::NotImplemented( - format!("collation \"{}\"", collation), - None.into(), - ) - .into()); + // PostgreSQL will limit the datatypes that collate can work on. + // https://www.postgresql.org/docs/16/collation.html#COLLATION-CONCEPTS + // > The built-in collatable data types are `text`, `varchar`, and `char`. + // + // But we don't support real collation, we simply ignore it here. + if !["C", "POSIX"].contains(&collation.real_value().as_str()) { + return Err(ErrorCode::NotImplemented( + "Collate collation other than `C` or `POSIX` is not implemented".into(), + None.into(), + ) + .into()); + } + + match data_type { + AstDataType::Text | AstDataType::Varchar | AstDataType::Char(_) => {} + _ => { + return Err(ErrorCode::NotSupported( + format!("{} is not a collatable data type", data_type), + "The only built-in collatable data types are `varchar`, please check your type".into() + ).into()); + } + } } check_valid_column_name(&name.real_value())?;