From 2628460bc5d28d0b5add8d5c7dd2ce8eb7705e1c Mon Sep 17 00:00:00 2001 From: Runji Wang Date: Wed, 15 Nov 2023 16:44:21 +0800 Subject: [PATCH 1/3] fix(type): fix parsing array literal and printing struct value (#13229) Signed-off-by: Runji Wang --- e2e_test/batch/basic/array.slt.part | 5 +- .../test_multi_column_reference.slt.part | 2 +- .../batch/order/test_order_struct.slt.part | 12 +- e2e_test/batch/types/jsonb.slt.part | 10 +- .../types/struct/nested_structs.slt.part | 12 +- e2e_test/batch/types/struct/struct.slt.part | 44 ++- .../batch/types/struct/struct_case.slt.part | 2 +- .../batch/types/struct/struct_cast.slt.part | 18 +- .../struct/struct_cross_product.slt.part | 50 ++-- e2e_test/ddl/alter_rename_relation.slt | 4 +- e2e_test/sink/kafka/avro.slt | 4 +- e2e_test/sink/kafka/protobuf.slt | 2 +- e2e_test/source/basic/kafka.slt | 6 +- e2e_test/source/basic/kafka_batch.slt | 2 +- .../basic/old_row_format_syntax/kafka.slt | 6 +- .../old_row_format_syntax/kafka_batch.slt | 2 +- e2e_test/streaming/struct_table.slt | 8 +- e2e_test/udf/udf.slt | 14 +- src/common/src/array/list_array.rs | 282 +++++++++++++++++- src/common/src/array/struct_array.rs | 56 +++- src/common/src/types/mod.rs | 7 +- src/expr/impl/benches/expr.rs | 5 + src/expr/impl/src/scalar/cast.rs | 83 +----- .../tests/testdata/input/array.yaml | 2 +- .../tests/testdata/output/array.yaml | 10 +- .../tests/testdata/output/expr.yaml | 2 +- src/tests/regress/data/expected/arrays.out | 12 +- src/tests/regress/data/sql/arrays.sql | 16 +- 28 files changed, 476 insertions(+), 202 deletions(-) diff --git a/e2e_test/batch/basic/array.slt.part b/e2e_test/batch/basic/array.slt.part index 3229118b6d001..c02ec89312c8c 100644 --- a/e2e_test/batch/basic/array.slt.part +++ b/e2e_test/batch/basic/array.slt.part @@ -157,14 +157,13 @@ select pg_typeof((v1::integer[][])[2][1]), (v1::integer[][])[2][1] from (values integer 2 # Test multiple castings of the same input. -query TTI +query TI select - (arr::varchar[][])[1][2] as double_varchar, (arr::varchar[][][])[1][2][3] as triple_varchar, (arr::integer[][][])[1][2][3] as triple_integer from (values ('{{{1, 2, 3}, {44, 55, 66}}}')) as t(arr); ---- -{44, 55, 66} 66 66 +66 66 # Test cast from array to string query T diff --git a/e2e_test/batch/duckdb/select/test_multi_column_reference.slt.part b/e2e_test/batch/duckdb/select/test_multi_column_reference.slt.part index 23b252b33f32d..8b3e5b0e63d39 100644 --- a/e2e_test/batch/duckdb/select/test_multi_column_reference.slt.part +++ b/e2e_test/batch/duckdb/select/test_multi_column_reference.slt.part @@ -129,7 +129,7 @@ INSERT INTO t.t VALUES (ROW(ROW(ROW(42)))); query I SELECT (t.t.t).t FROM t.t; ---- -((42)) +("(42)") query I SELECT (t.t.t).t.t FROM t.t; diff --git a/e2e_test/batch/order/test_order_struct.slt.part b/e2e_test/batch/order/test_order_struct.slt.part index 0ceedf969251e..de55d2f964fc7 100644 --- a/e2e_test/batch/order/test_order_struct.slt.part +++ b/e2e_test/batch/order/test_order_struct.slt.part @@ -12,10 +12,10 @@ select * from t order by 1; ---- (1,2) (1,3) -(1,NULL) +(1,) (2,1) (2,2) -(NULL,NULL) +(,) statement ok drop table t; @@ -29,10 +29,10 @@ insert into t values (('abc',('bcd',2)),1), (('abc',('bcd',2)),2), (('a',('bcbcd query TTII select * from t order by 1 DESC, 2 ASC; ---- -(b,(a,2)) 2 -(abc,(bcd,2)) 1 -(abc,(bcd,2)) 2 -(a,(bcbcd,2)) 2 +(b,"(a,2)") 2 +(abc,"(bcd,2)") 1 +(abc,"(bcd,2)") 2 +(a,"(bcbcd,2)") 2 statement ok drop table t; diff --git a/e2e_test/batch/types/jsonb.slt.part b/e2e_test/batch/types/jsonb.slt.part index 9812f254ff0cc..949b18af315c0 100644 --- a/e2e_test/batch/types/jsonb.slt.part +++ b/e2e_test/batch/types/jsonb.slt.part @@ -227,14 +227,8 @@ b "bar" query T select jsonb_each('{"a":"foo", "b":"bar"}'::jsonb); ---- -(a,"foo") -(b,"bar") - -# FIXME: the output format is inconsistent with pg -# https://github.com/risingwavelabs/risingwave/issues/4769 -# -# (a,"""foo""") -# (b,"""bar""") +(a,"""foo""") +(b,"""bar""") statement error cannot deconstruct select * from jsonb_each('null'::jsonb) diff --git a/e2e_test/batch/types/struct/nested_structs.slt.part b/e2e_test/batch/types/struct/nested_structs.slt.part index 9ba4b0f2718b6..cff09a75e3623 100644 --- a/e2e_test/batch/types/struct/nested_structs.slt.part +++ b/e2e_test/batch/types/struct/nested_structs.slt.part @@ -13,7 +13,7 @@ INSERT INTO a VALUES (ROW(ROW(3), 4)) query I SELECT * FROM a ---- -((3),4) +("(3)",4) query I SELECT ((c).i).a FROM a @@ -26,7 +26,7 @@ INSERT INTO a VALUES (NULL) query I rowsort SELECT * FROM a ---- -((3),4) +("(3)",4) NULL query I rowsort @@ -48,10 +48,10 @@ INSERT INTO a VALUES (ROW(NULL, 1)) query I rowsort SELECT * FROM a ---- -((1),NULL) -((3),4) -((NULL),1) -(NULL,1) +("()",1) +("(1)",) +("(3)",4) +(,1) NULL # nested struct mismatch on insertion diff --git a/e2e_test/batch/types/struct/struct.slt.part b/e2e_test/batch/types/struct/struct.slt.part index 58c56bc81fb97..3829838c4e1fe 100644 --- a/e2e_test/batch/types/struct/struct.slt.part +++ b/e2e_test/batch/types/struct/struct.slt.part @@ -25,8 +25,8 @@ select (v2).* from st; query II rowsort select * from st; ---- -1 (1,(1,2)) -1 (1,(1,3)) +1 (1,"(1,2)") +1 (1,"(1,3)") query I select (v2).v2.v1 from st; @@ -43,22 +43,22 @@ select (v2).v2 from st; query II select * from st where v2 = (1,(1,3)); ---- -1 (1,(1,3)) +1 (1,"(1,3)") query II select * from st where v2 != (1,(1,3)); ---- -1 (1,(1,2)) +1 (1,"(1,2)") query II select * from st where (v2).v2 < (1,3); ---- -1 (1,(1,2)) +1 (1,"(1,2)") query II select * from st where (v2).v2 > (1,2); ---- -1 (1,(1,3)) +1 (1,"(1,3)") query I select max((v2).v2) from st; @@ -82,7 +82,7 @@ insert into st values(1,(1,(1,null))); query II select * from st; ---- -1 (1,(1,NULL)) +1 (1,"(1,)") statement ok drop table st; @@ -90,7 +90,7 @@ drop table st; query T select Row('foo', 'bar', null); ---- -(foo,bar,NULL) +(foo,bar,) query T select Row(); @@ -100,7 +100,7 @@ select Row(); query T select Row(null); ---- -(NULL) +() statement ok create table t (v1 int); @@ -136,3 +136,29 @@ select * from t where Row(1,v1*2) > Row(1,2); statement ok drop table t; + +# row to text +query TTTT +select Row('a'), Row(''), Row('"'), Row(' a '), Row('a b'); +---- +(a) ("") ("""") (" a ") ("a b") + +query TTT +select Row('{}'), Row('[]'), Row('()'), Row(','); +---- +({}) ([]) ("()") (",") + +query TTT +select Row(NULL), Row(NULL, NULL), Row('null'); +---- +() (,) (null) + +query TTT +select Row(Array[] :: varchar[]), Row(Array[1] :: varchar[]), Row(Array[1,2] :: varchar[]); +---- +({}) ({1}) ("{1,2}") + +query T +select Row(Array['"'] :: varchar[]); +---- +("{""\\""""}") diff --git a/e2e_test/batch/types/struct/struct_case.slt.part b/e2e_test/batch/types/struct/struct_case.slt.part index 3200d7b05b35d..88b3a4c8ebd2b 100644 --- a/e2e_test/batch/types/struct/struct_case.slt.part +++ b/e2e_test/batch/types/struct/struct_case.slt.part @@ -24,7 +24,7 @@ NULL query I SELECT CASE WHEN 1=0 THEN NULL ELSE ROW(NULL) END ---- -(NULL) +() # now with a table query II diff --git a/e2e_test/batch/types/struct/struct_cast.slt.part b/e2e_test/batch/types/struct/struct_cast.slt.part index 77ac022dd9e09..fb24123c6441d 100644 --- a/e2e_test/batch/types/struct/struct_cast.slt.part +++ b/e2e_test/batch/types/struct/struct_cast.slt.part @@ -13,12 +13,12 @@ SELECT ROW(1,2)::STRUCT; query I SELECT (NULL, 'hello')::STRUCT; ---- -(NULL,hello) +(,hello) query I SELECT (NULL, NULL)::STRUCT; ---- -(NULL,NULL) +(,) query I SELECT NULL::STRUCT; @@ -29,7 +29,7 @@ NULL query I SELECT (NULL, NULL)::STRUCT; ---- -(NULL,NULL) +(,) query I SELECT ((NULL, NULL)::STRUCT).i; @@ -45,22 +45,22 @@ NULL query I SELECT (1, (2, 3))::STRUCT>; ---- -(1,(2,3)) +(1,"(2,3)") query I SELECT (1, (NULL, 3))::STRUCT>; ---- -(1,(NULL,3)) +(1,"(,3)") query I SELECT (1, (2, NULL))::STRUCT>; ---- -(1,(2,NULL)) +(1,"(2,)") query I SELECT (1, NULL)::STRUCT>; ---- -(1,NULL) +(1,) # cast and extract query I @@ -83,9 +83,9 @@ INSERT INTO structs VALUES ((1,2)), ((null, 2)), ((1, null)), (NULL) query I rowsort SELECT s FROM structs; ---- +(,2) +(1,) (1,2) -(1,NULL) -(NULL,2) NULL statement ok diff --git a/e2e_test/batch/types/struct/struct_cross_product.slt.part b/e2e_test/batch/types/struct/struct_cross_product.slt.part index 2c45edf96785a..7046f8cbec65a 100644 --- a/e2e_test/batch/types/struct/struct_cross_product.slt.part +++ b/e2e_test/batch/types/struct/struct_cross_product.slt.part @@ -14,45 +14,45 @@ insert into t values (1, ROW(ROW(3, 7), ARRAY[1, 2, 3])), (2, NULL), (3, ROW(NUL query IIII SELECT * FROM t AS v, t AS w ORDER BY v.v1, w.v1; ---- -1 ((3,7),{1,2,3}) 1 ((3,7),{1,2,3}) -1 ((3,7),{1,2,3}) 2 NULL -1 ((3,7),{1,2,3}) 3 (NULL,{4,5,NULL}) -2 NULL 1 ((3,7),{1,2,3}) +1 ("(3,7)","{1,2,3}") 1 ("(3,7)","{1,2,3}") +1 ("(3,7)","{1,2,3}") 2 NULL +1 ("(3,7)","{1,2,3}") 3 (,"{4,5,NULL}") +2 NULL 1 ("(3,7)","{1,2,3}") 2 NULL 2 NULL -2 NULL 3 (NULL,{4,5,NULL}) -3 (NULL,{4,5,NULL}) 1 ((3,7),{1,2,3}) -3 (NULL,{4,5,NULL}) 2 NULL -3 (NULL,{4,5,NULL}) 3 (NULL,{4,5,NULL}) +2 NULL 3 (,"{4,5,NULL}") +3 (,"{4,5,NULL}") 1 ("(3,7)","{1,2,3}") +3 (,"{4,5,NULL}") 2 NULL +3 (,"{4,5,NULL}") 3 (,"{4,5,NULL}") query IIII SELECT * FROM t v, t w WHERE v.v1 >= w.v1 ORDER BY v.v1, w.v1; ---- -1 ((3,7),{1,2,3}) 1 ((3,7),{1,2,3}) -2 NULL 1 ((3,7),{1,2,3}) +1 ("(3,7)","{1,2,3}") 1 ("(3,7)","{1,2,3}") +2 NULL 1 ("(3,7)","{1,2,3}") 2 NULL 2 NULL -3 (NULL,{4,5,NULL}) 1 ((3,7),{1,2,3}) -3 (NULL,{4,5,NULL}) 2 NULL -3 (NULL,{4,5,NULL}) 3 (NULL,{4,5,NULL}) +3 (,"{4,5,NULL}") 1 ("(3,7)","{1,2,3}") +3 (,"{4,5,NULL}") 2 NULL +3 (,"{4,5,NULL}") 3 (,"{4,5,NULL}") query IIII SELECT * FROM t v, t w WHERE v.v1 <> w.v1 ORDER BY v.v1, w.v1; ---- -1 ((3,7),{1,2,3}) 2 NULL -1 ((3,7),{1,2,3}) 3 (NULL,{4,5,NULL}) -2 NULL 1 ((3,7),{1,2,3}) -2 NULL 3 (NULL,{4,5,NULL}) -3 (NULL,{4,5,NULL}) 1 ((3,7),{1,2,3}) -3 (NULL,{4,5,NULL}) 2 NULL +1 ("(3,7)","{1,2,3}") 2 NULL +1 ("(3,7)","{1,2,3}") 3 (,"{4,5,NULL}") +2 NULL 1 ("(3,7)","{1,2,3}") +2 NULL 3 (,"{4,5,NULL}") +3 (,"{4,5,NULL}") 1 ("(3,7)","{1,2,3}") +3 (,"{4,5,NULL}") 2 NULL query IIII SELECT * FROM t v, t w WHERE v.v1 <> w.v1 OR v.v1 > w.v1 ORDER BY v.v1, w.v1; ---- -1 ((3,7),{1,2,3}) 2 NULL -1 ((3,7),{1,2,3}) 3 (NULL,{4,5,NULL}) -2 NULL 1 ((3,7),{1,2,3}) -2 NULL 3 (NULL,{4,5,NULL}) -3 (NULL,{4,5,NULL}) 1 ((3,7),{1,2,3}) -3 (NULL,{4,5,NULL}) 2 NULL +1 ("(3,7)","{1,2,3}") 2 NULL +1 ("(3,7)","{1,2,3}") 3 (,"{4,5,NULL}") +2 NULL 1 ("(3,7)","{1,2,3}") +2 NULL 3 (,"{4,5,NULL}") +3 (,"{4,5,NULL}") 1 ("(3,7)","{1,2,3}") +3 (,"{4,5,NULL}") 2 NULL statement ok drop table t; diff --git a/e2e_test/ddl/alter_rename_relation.slt b/e2e_test/ddl/alter_rename_relation.slt index df085a273b0a9..3e19d08084d2f 100644 --- a/e2e_test/ddl/alter_rename_relation.slt +++ b/e2e_test/ddl/alter_rename_relation.slt @@ -158,8 +158,8 @@ SELECT * from v3 query IIII rowsort SELECT * from v4 ---- -1 (1,(1,2)) 1 (1,(1,2)) -2 (2,(2,4)) 2 (2,(2,4)) +1 (1,"(1,2)") 1 (1,"(1,2)") +2 (2,"(2,4)") 2 (2,"(2,4)") statement ok CREATE MATERIALIZED VIEW mv4 AS SELECT * FROM src; diff --git a/e2e_test/sink/kafka/avro.slt b/e2e_test/sink/kafka/avro.slt index 88909135b9e53..a9040c93f793f 100644 --- a/e2e_test/sink/kafka/avro.slt +++ b/e2e_test/sink/kafka/avro.slt @@ -59,8 +59,8 @@ select time_micros_field, time_millis_field from from_kafka order by string_field; ---- -t Rising \x6130 3.5 4.25 22 23 NULL {{NULL,3},NULL,{7,NULL,2}} 2006-01-02 22:04:05+00:00 NULL NULL 12:34:56.123456 NULL -f Wave \x5a4446 1.5 NULL 11 12 (NULL,foo) NULL NULL 2006-01-02 22:04:05+00:00 2021-04-01 NULL 23:45:16.654 +t Rising \x6130 3.5 4.25 22 23 NULL {{NULL,3},NULL,{7,NULL,2}} 2006-01-02 22:04:05+00:00 NULL NULL 12:34:56.123456 NULL +f Wave \x5a4446 1.5 NULL 11 12 (,foo) NULL NULL 2006-01-02 22:04:05+00:00 2021-04-01 NULL 23:45:16.654 statement error SchemaFetchError create sink sink_err from into_kafka with ( diff --git a/e2e_test/sink/kafka/protobuf.slt b/e2e_test/sink/kafka/protobuf.slt index 9510c6e7fbf8b..83e92fd431a0d 100644 --- a/e2e_test/sink/kafka/protobuf.slt +++ b/e2e_test/sink/kafka/protobuf.slt @@ -63,7 +63,7 @@ select timestamp_field, oneof_int32 from from_kafka order by string_field; ---- -t Rising \x6130 3.5 4.25 22 23 24 0 26 27 (1,) {4,0,4} (1136239445,0) 42 +t Rising \x6130 3.5 4.25 22 23 24 0 26 27 (1,"") {4,0,4} (1136239445,0) 42 f Wave \x5a4446 1.5 0 11 12 13 14 15 16 (4,foo) {} (0,0) 0 statement error failed to read file diff --git a/e2e_test/source/basic/kafka.slt b/e2e_test/source/basic/kafka.slt index ff1178cb9c156..941d6ae98c0ae 100644 --- a/e2e_test/source/basic/kafka.slt +++ b/e2e_test/source/basic/kafka.slt @@ -629,17 +629,17 @@ select id, sequence_id, name, score, avg_score, is_lasted, entrance_date, birthd query ITITT select id, code, timestamp, xfas, contacts, sex from s10; ---- -100 abc 1473305798 {"(0,200,10.0.0.1)","(1,400,10.0.0.2)"} ({1xxx,2xxx},{1xxx,2xxx}) MALE +100 abc 1473305798 {"(0,200,10.0.0.1)","(1,400,10.0.0.2)"} ("{1xxx,2xxx}","{1xxx,2xxx}") MALE query ITITT select id, code, timestamp, xfas, contacts, sex from s11; ---- -0 abc 1473305798 {"(0,200,127.0.0.1)","(1,400,127.0.0.2)"} ({1xxx,2xxx},{1xxx,2xxx}) MALE +0 abc 1473305798 {"(0,200,127.0.0.1)","(1,400,127.0.0.2)"} ("{1xxx,2xxx}","{1xxx,2xxx}") MALE query ITITT select id, code, timestamp, xfas, contacts, jsonb from s12; ---- -100 abc 1473305798 {"(0,200,10.0.0.1)","(1,400,10.0.0.2)"} ({1xxx,2xxx},{1xxx,2xxx}) {"blockNumber": 16938734} +100 abc 1473305798 {"(0,200,10.0.0.1)","(1,400,10.0.0.2)"} ("{1xxx,2xxx}","{1xxx,2xxx}") {"blockNumber": 16938734} query ITIT select * from s13 order by id; diff --git a/e2e_test/source/basic/kafka_batch.slt b/e2e_test/source/basic/kafka_batch.slt index a1b7690a36c45..525031684166c 100644 --- a/e2e_test/source/basic/kafka_batch.slt +++ b/e2e_test/source/basic/kafka_batch.slt @@ -203,7 +203,7 @@ drop source s6 query ITITT select id, code, timestamp, xfas, contacts from s7; ---- -100 abc 1473305798 {"(0,200,10.0.0.1)","(1,400,10.0.0.2)"} ({1xxx,2xxx},{1xxx,2xxx}) +100 abc 1473305798 {"(0,200,10.0.0.1)","(1,400,10.0.0.2)"} ("{1xxx,2xxx}","{1xxx,2xxx}") statement ok drop source s7 diff --git a/e2e_test/source/basic/old_row_format_syntax/kafka.slt b/e2e_test/source/basic/old_row_format_syntax/kafka.slt index aeba9b8667d78..3d2e4719d744d 100644 --- a/e2e_test/source/basic/old_row_format_syntax/kafka.slt +++ b/e2e_test/source/basic/old_row_format_syntax/kafka.slt @@ -583,17 +583,17 @@ select id, sequence_id, name, score, avg_score, is_lasted, entrance_date, birthd query ITITT select id, code, timestamp, xfas, contacts, sex from s10; ---- -100 abc 1473305798 {"(0,200,10.0.0.1)","(1,400,10.0.0.2)"} ({1xxx,2xxx},{1xxx,2xxx}) MALE +100 abc 1473305798 {"(0,200,10.0.0.1)","(1,400,10.0.0.2)"} ("{1xxx,2xxx}","{1xxx,2xxx}") MALE query ITITT select id, code, timestamp, xfas, contacts, sex from s11; ---- -0 abc 1473305798 {"(0,200,127.0.0.1)","(1,400,127.0.0.2)"} ({1xxx,2xxx},{1xxx,2xxx}) MALE +0 abc 1473305798 {"(0,200,127.0.0.1)","(1,400,127.0.0.2)"} ("{1xxx,2xxx}","{1xxx,2xxx}") MALE query ITITT select id, code, timestamp, xfas, contacts, jsonb from s12; ---- -100 abc 1473305798 {"(0,200,10.0.0.1)","(1,400,10.0.0.2)"} ({1xxx,2xxx},{1xxx,2xxx}) {"blockNumber": 16938734} +100 abc 1473305798 {"(0,200,10.0.0.1)","(1,400,10.0.0.2)"} ("{1xxx,2xxx}","{1xxx,2xxx}") {"blockNumber": 16938734} query ITIT select * from s13 order by id; diff --git a/e2e_test/source/basic/old_row_format_syntax/kafka_batch.slt b/e2e_test/source/basic/old_row_format_syntax/kafka_batch.slt index 5ab2f2dbce15f..7a1495470f253 100644 --- a/e2e_test/source/basic/old_row_format_syntax/kafka_batch.slt +++ b/e2e_test/source/basic/old_row_format_syntax/kafka_batch.slt @@ -181,7 +181,7 @@ drop source s6 query ITITT select id, code, timestamp, xfas, contacts from s7; ---- -100 abc 1473305798 {"(0,200,10.0.0.1)","(1,400,10.0.0.2)"} ({1xxx,2xxx},{1xxx,2xxx}) +100 abc 1473305798 {"(0,200,10.0.0.1)","(1,400,10.0.0.2)"} ("{1xxx,2xxx}","{1xxx,2xxx}") statement ok drop source s7 diff --git a/e2e_test/streaming/struct_table.slt b/e2e_test/streaming/struct_table.slt index 462818456b909..e254d85366678 100644 --- a/e2e_test/streaming/struct_table.slt +++ b/e2e_test/streaming/struct_table.slt @@ -41,10 +41,10 @@ flush; query II select * from t1 order by v1; ---- -1 (2,(1,0)) -2 (5,(4,1)) -3 (6,(3,4)) -4 (3,(2,2)) +1 (2,"(1,0)") +2 (5,"(4,1)") +3 (6,"(3,4)") +4 (3,"(2,2)") statement ok create materialized view mv3 as select * from t1 order by (v2).v3; diff --git a/e2e_test/udf/udf.slt b/e2e_test/udf/udf.slt index 7ebdf49d0b20d..62979196e74ca 100644 --- a/e2e_test/udf/udf.slt +++ b/e2e_test/udf/udf.slt @@ -132,10 +132,10 @@ select jsonb_array_identity(ARRAY[null, '1'::jsonb, '"str"'::jsonb, '{}'::jsonb] query T select jsonb_array_struct_identity(ROW(ARRAY[null, '1'::jsonb, '"str"'::jsonb, '{}'::jsonb], 4)::struct); ---- -({NULL,1,"\"str\"","{}"},4) +("{NULL,1,""\\""str\\"""",""{}""}",4) query T -select return_all( +select (return_all( true, 1 ::smallint, 1 ::int, @@ -150,12 +150,12 @@ select return_all( 'string', 'bytes'::bytea, '{"key":1}'::jsonb -); +)).*; ---- -(t,1,1,1,1,1,1234567890123456789012345678,2023-06-01,01:02:03.456789,2023-06-01 01:02:03.456789,1 mon 2 days 00:00:03,string,\x6279746573,{"key": 1}) +t 1 1 1 1 1 1234567890123456789012345678 2023-06-01 01:02:03.456789 2023-06-01 01:02:03.456789 1 mon 2 days 00:00:03 string \x6279746573 {"key": 1} query T -select return_all_arrays( +select (return_all_arrays( array[null, true], array[null, 1 ::smallint], array[null, 1 ::int], @@ -170,9 +170,9 @@ select return_all_arrays( array[null, 'string'], array[null, 'bytes'::bytea], array[null, '{"key":1}'::jsonb] -); +)).*; ---- -({NULL,t},{NULL,1},{NULL,1},{NULL,1},{NULL,1},{NULL,1},{NULL,1234567890123456789012345678},{NULL,2023-06-01},{NULL,01:02:03.456789},{NULL,"2023-06-01 01:02:03.456789"},{NULL,"1 mon 2 days 00:00:03"},{NULL,string},{NULL,"\\x6279746573"},{NULL,"{\"key\": 1}"}) +{NULL,t} {NULL,1} {NULL,1} {NULL,1} {NULL,1} {NULL,1} {NULL,1234567890123456789012345678} {NULL,2023-06-01} {NULL,01:02:03.456789} {NULL,"2023-06-01 01:02:03.456789"} {NULL,"1 mon 2 days 00:00:03"} {NULL,string} {NULL,"\\x6279746573"} {NULL,"{\"key\": 1}"} query I select series(5); diff --git a/src/common/src/array/list_array.rs b/src/common/src/array/list_array.rs index 7eaaffff98534..2bc7772e19ee3 100644 --- a/src/common/src/array/list_array.rs +++ b/src/common/src/array/list_array.rs @@ -12,6 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. +use std::borrow::Cow; use std::cmp::Ordering; use std::fmt; use std::fmt::Debug; @@ -31,7 +32,8 @@ use crate::buffer::{Bitmap, BitmapBuilder}; use crate::estimate_size::EstimateSize; use crate::row::Row; use crate::types::{ - hash_datum, DataType, Datum, DatumRef, DefaultOrd, Scalar, ScalarRefImpl, ToDatumRef, ToText, + hash_datum, DataType, Datum, DatumRef, DefaultOrd, Scalar, ScalarImpl, ScalarRefImpl, + ToDatumRef, ToText, }; use crate::util::memcmp_encoding; use crate::util::value_encoding::estimate_serialize_datum_size; @@ -624,6 +626,235 @@ impl<'a> From<&'a ListValue> for ListRef<'a> { } } +impl ListValue { + /// Construct an array from literal string. + pub fn from_str(input: &str, data_type: &DataType) -> Result { + struct Parser<'a> { + input: &'a str, + data_type: &'a DataType, + } + + impl Parser<'_> { + /// Parse a datum. + fn parse(&mut self) -> Result { + self.skip_whitespace(); + if self.data_type.is_array() { + if self.try_parse_null() { + return Ok(None); + } + Ok(Some(self.parse_array()?.into())) + } else { + self.parse_value() + } + } + + /// Parse an array. + fn parse_array(&mut self) -> Result { + self.skip_whitespace(); + if !self.try_consume('{') { + return Err("Array value must start with \"{\"".to_string()); + } + self.skip_whitespace(); + if self.try_consume('}') { + return Ok(ListValue::new(vec![])); + } + let mut elems = Vec::new(); + loop { + let mut parser = Self { + input: self.input, + data_type: self.data_type.as_list(), + }; + elems.push(parser.parse()?); + self.input = parser.input; + + // expect ',' or '}' + self.skip_whitespace(); + match self.peek() { + Some(',') => { + self.try_consume(','); + } + Some('}') => { + self.try_consume('}'); + break; + } + None => return Err(Self::eoi()), + _ => return Err("Unexpected array element.".to_string()), + } + } + Ok(ListValue::new(elems)) + } + + /// Parse a non-array value. + fn parse_value(&mut self) -> Result { + if self.peek() == Some('"') { + return Ok(Some(self.parse_quoted()?)); + } + // peek until the next unescaped ',' or '}' + let mut chars = self.input.char_indices(); + let mut has_escape = false; + let s = loop { + match chars.next().ok_or_else(Self::eoi)? { + (_, '\\') => { + has_escape = true; + chars.next().ok_or_else(Self::eoi)?; + } + (i, c @ ',' | c @ '}') => { + let s = &self.input[..i]; + // consume the value and leave the ',' or '}' for parent + self.input = &self.input[i..]; + + break if has_escape { + Cow::Owned(Self::unescape_trim_end(s)) + } else { + let trimmed = s.trim_end(); + if trimmed.is_empty() { + return Err(format!("Unexpected \"{c}\" character.")); + } + if trimmed.eq_ignore_ascii_case("null") { + return Ok(None); + } + Cow::Borrowed(trimmed) + }; + } + (_, '{') => return Err("Unexpected \"{\" character.".to_string()), + (_, '"') => return Err("Unexpected array element.".to_string()), + _ => {} + } + }; + Ok(Some(ScalarImpl::from_literal(&s, self.data_type)?)) + } + + /// Parse a double quoted non-array value. + fn parse_quoted(&mut self) -> Result { + assert!(self.try_consume('"')); + // peek until the next unescaped '"' + let mut chars = self.input.char_indices(); + let mut has_escape = false; + let s = loop { + match chars.next().ok_or_else(Self::eoi)? { + (_, '\\') => { + has_escape = true; + chars.next().ok_or_else(Self::eoi)?; + } + (i, '"') => { + let s = &self.input[..i]; + self.input = &self.input[i + 1..]; + break if has_escape { + Cow::Owned(Self::unescape(s)) + } else { + Cow::Borrowed(s) + }; + } + _ => {} + } + }; + ScalarImpl::from_literal(&s, self.data_type) + } + + /// Unescape a string. + fn unescape(s: &str) -> String { + let mut unescaped = String::with_capacity(s.len()); + let mut chars = s.chars(); + while let Some(mut c) = chars.next() { + if c == '\\' { + c = chars.next().unwrap(); + } + unescaped.push(c); + } + unescaped + } + + /// Unescape a string and trim the trailing whitespaces. + /// + /// Example: `"\ " -> " "` + fn unescape_trim_end(s: &str) -> String { + let mut unescaped = String::with_capacity(s.len()); + let mut chars = s.chars(); + let mut len_after_last_escaped_char = 0; + while let Some(mut c) = chars.next() { + if c == '\\' { + c = chars.next().unwrap(); + unescaped.push(c); + len_after_last_escaped_char = unescaped.len(); + } else { + unescaped.push(c); + } + } + let l = unescaped[len_after_last_escaped_char..].trim_end().len(); + unescaped.truncate(len_after_last_escaped_char + l); + unescaped + } + + /// Consume the next 4 characters if it matches "null". + /// + /// Note: We don't use this function when parsing non-array values. + /// Because we can't decide whether it is a null value or a string starts with "null". + /// Consider this case: `{null value}` => `["null value"]` + fn try_parse_null(&mut self) -> bool { + if let Some(s) = self.input.get(..4) && s.eq_ignore_ascii_case("null") { + let next_char = self.input[4..].chars().next(); + match next_char { + None | Some(',' | '}') => {} + Some(c) if c.is_ascii_whitespace() => {} + // following normal characters + _ => return false, + } + self.input = &self.input[4..]; + true + } else { + false + } + } + + /// Consume the next character if it matches `c`. + fn try_consume(&mut self, c: char) -> bool { + if self.peek() == Some(c) { + self.input = &self.input[c.len_utf8()..]; + true + } else { + false + } + } + + /// Expect end of input. + fn expect_end(&mut self) -> Result<(), String> { + self.skip_whitespace(); + match self.peek() { + Some(_) => Err("Junk after closing right brace.".to_string()), + None => Ok(()), + } + } + + /// Skip whitespaces. + fn skip_whitespace(&mut self) { + self.input = match self + .input + .char_indices() + .find(|(_, c)| !c.is_ascii_whitespace()) + { + Some((i, _)) => &self.input[i..], + None => "", + }; + } + + /// Peek the next character. + fn peek(&self) -> Option { + self.input.chars().next() + } + + /// Return the error message for unexpected end of input. + fn eoi() -> String { + "Unexpected end of input.".into() + } + } + + let mut parser = Parser { input, data_type }; + let array = parser.parse_array()?; + parser.expect_end()?; + Ok(array) + } +} + #[cfg(test)] mod tests { use more_asserts::{assert_gt, assert_lt}; @@ -1035,4 +1266,53 @@ mod tests { let scalar = list_ref.get(1).unwrap(); assert_eq!(scalar, Some(types::ScalarRefImpl::Int32(5))); } + + #[test] + fn test_from_to_literal() { + #[track_caller] + fn test(typestr: &str, input: &str, output: Option<&str>) { + let datatype: DataType = typestr.parse().unwrap(); + let list = ListValue::from_str(input, &datatype).unwrap(); + let actual = list.as_scalar_ref().to_text(); + let output = output.unwrap_or(input); + assert_eq!(actual, output); + } + + #[track_caller] + fn test_err(typestr: &str, input: &str, err: &str) { + let datatype: DataType = typestr.parse().unwrap(); + let actual_err = ListValue::from_str(input, &datatype).unwrap_err(); + assert_eq!(actual_err, err); + } + + test("varchar[]", "{}", None); + test("varchar[]", "{1 2}", Some(r#"{"1 2"}"#)); + test("varchar[]", "{🥵,🤡}", None); + test("varchar[]", r#"{aa\\bb}"#, Some(r#"{"aa\\bb"}"#)); + test("int[]", "{1,2,3}", None); + test("varchar[]", r#"{"1,2"}"#, None); + test("varchar[]", r#"{1, ""}"#, Some(r#"{1,""}"#)); + test("varchar[]", r#"{"\""}"#, None); + test("varchar[]", r#"{\ }"#, Some(r#"{" "}"#)); + test("varchar[]", r#"{\\ }"#, Some(r#"{"\\"}"#)); + test("varchar[]", "{nulla}", None); + test("varchar[]", "{null a}", Some(r#"{"null a"}"#)); + test( + "varchar[]", + r#"{"null", "NULL", null, NuLL}"#, + Some(r#"{"null","NULL",NULL,NULL}"#), + ); + test("varchar[][]", "{{1, 2, 3}, null }", Some("{{1,2,3},NULL}")); + test( + "varchar[][][]", + "{{{1, 2, 3}}, {{4, 5, 6}}}", + Some("{{{1,2,3}},{{4,5,6}}}"), + ); + test_err("varchar[]", "()", r#"Array value must start with "{""#); + test_err("varchar[]", "{1,", r#"Unexpected end of input."#); + test_err("varchar[]", "{1,}", r#"Unexpected "}" character."#); + test_err("varchar[]", "{1,,3}", r#"Unexpected "," character."#); + test_err("varchar[]", r#"{"a""b"}"#, r#"Unexpected array element."#); + test_err("varchar[]", r#"{}{"#, r#"Junk after closing right brace."#); + } } diff --git a/src/common/src/array/struct_array.rs b/src/common/src/array/struct_array.rs index 27c5d8c0dc237..492a7ca3aed1d 100644 --- a/src/common/src/array/struct_array.rs +++ b/src/common/src/array/struct_array.rs @@ -12,9 +12,8 @@ // See the License for the specific language governing permissions and // limitations under the License. -use core::fmt; use std::cmp::Ordering; -use std::fmt::Debug; +use std::fmt::{self, Debug, Write}; use std::hash::Hash; use std::sync::Arc; @@ -411,6 +410,7 @@ impl Debug for StructRef<'_> { impl ToText for StructRef<'_> { fn write(&self, f: &mut W) -> std::fmt::Result { + let mut raw_text = String::new(); iter_fields_ref!(*self, it, { write!(f, "(")?; let mut is_first = true; @@ -420,7 +420,12 @@ impl ToText for StructRef<'_> { } else { write!(f, ",")?; } - ToText::write(&x, f)?; + // print nothing for null + if x.is_some() { + raw_text.clear(); + x.write(&mut raw_text)?; + quote_if_need(&raw_text, f)?; + } } write!(f, ")") }) @@ -434,6 +439,32 @@ impl ToText for StructRef<'_> { } } +/// Double quote a string if it contains any special characters. +fn quote_if_need(input: &str, writer: &mut impl Write) -> std::fmt::Result { + if !input.is_empty() // non-empty + && !input.contains([ + '"', '\\', '(', ')', ',', + // PostgreSQL `array_isspace` includes '\x0B' but rust + // [`char::is_ascii_whitespace`] does not. + ' ', '\t', '\n', '\r', '\x0B', '\x0C', + ]) + { + return writer.write_str(input); + } + + writer.write_char('"')?; + + for ch in input.chars() { + match ch { + '"' => writer.write_str("\"\"")?, + '\\' => writer.write_str("\\\\")?, + _ => writer.write_char(ch)?, + } + } + + writer.write_char('"') +} + #[cfg(test)] mod tests { use more_asserts::assert_gt; @@ -711,4 +742,23 @@ mod tests { assert_eq!(lhs_serialized.cmp(&rhs_serialized), order); } } + + #[test] + fn test_quote() { + #[track_caller] + fn test(input: &str, quoted: &str) { + let mut actual = String::new(); + quote_if_need(input, &mut actual).unwrap(); + assert_eq!(quoted, actual); + } + test("abc", "abc"); + test("", r#""""#); + test(" x ", r#"" x ""#); + test("a b", r#""a b""#); + test(r#"a"bc"#, r#""a""bc""#); + test(r#"a\bc"#, r#""a\\bc""#); + test("{1}", "{1}"); + test("{1,2}", r#""{1,2}""#); + test(r#"{"f": 1}"#, r#""{""f"": 1}""#); + } } diff --git a/src/common/src/types/mod.rs b/src/common/src/types/mod.rs index 3d62780c19b91..98ea7ee061bd6 100644 --- a/src/common/src/types/mod.rs +++ b/src/common/src/types/mod.rs @@ -997,11 +997,10 @@ impl ScalarImpl { DataType::Timestamptz => Timestamptz::from_str(s).map_err(|e| e.to_string())?.into(), DataType::Time => Time::from_str(s).map_err(|e| e.to_string())?.into(), DataType::Interval => Interval::from_str(s).map_err(|e| e.to_string())?.into(), - // Not processing list or struct literal right now. Leave it for later phase (normal backend - // evaluation). - DataType::List { .. } => return Err("not supported".into()), + DataType::List { .. } => ListValue::from_str(s, t)?.into(), + // Not processing struct literal right now. Leave it for later phase (normal backend evaluation). DataType::Struct(_) => return Err("not supported".into()), - DataType::Jsonb => return Err("not supported".into()), + DataType::Jsonb => JsonbVal::from_str(s).map_err(|e| e.to_string())?.into(), DataType::Bytea => str_to_bytea(s)?.into(), }) } diff --git a/src/expr/impl/benches/expr.rs b/src/expr/impl/benches/expr.rs index 685b5de98c1e8..1d8ab36472fb6 100644 --- a/src/expr/impl/benches/expr.rs +++ b/src/expr/impl/benches/expr.rs @@ -354,6 +354,11 @@ fn bench_expr(c: &mut Criterion) { }); } + c.bench_function("cast(character varying) -> int8[]", |bencher| { + let expr = build_from_pretty(r#"(cast:int8[] {1,"2"}:varchar)"#); + bencher.to_async(FuturesExecutor).iter(|| expr.eval(&input)) + }); + let sigs = FUNCTION_REGISTRY .iter_aggregates() .sorted_by_cached_key(|sig| format!("{sig:?}")); diff --git a/src/expr/impl/src/scalar/cast.rs b/src/expr/impl/src/scalar/cast.rs index ae2ed50472b86..0f69e4cdad5e6 100644 --- a/src/expr/impl/src/scalar/cast.rs +++ b/src/expr/impl/src/scalar/cast.rs @@ -20,7 +20,7 @@ use itertools::Itertools; use risingwave_common::array::{ListRef, ListValue, StructRef, StructValue}; use risingwave_common::cast; use risingwave_common::row::OwnedRow; -use risingwave_common::types::{DataType, Int256, IntoOrdered, JsonbRef, ToText, F64}; +use risingwave_common::types::{Int256, IntoOrdered, JsonbRef, ToText, F64}; use risingwave_common::util::iter_util::ZipEqFast; use risingwave_expr::expr::{ build_func, Context, Expression, ExpressionBoxExt, InputRefExpression, @@ -189,63 +189,9 @@ pub fn str_to_bytea(elem: &str) -> Result> { cast::str_to_bytea(elem).map_err(|err| ExprError::Parse(err.into())) } -// TODO(nanderstabel): optimize for multidimensional List. Depth can be given as a parameter to this -// function. -/// Takes a string input in the form of a comma-separated list enclosed in braces, and returns a -/// vector of strings containing the list items. -/// -/// # Examples -/// - "{1, 2, 3}" => ["1", "2", "3"] -/// - "{1, {2, 3}}" => ["1", "{2, 3}"] -fn unnest(input: &str) -> Result> { - let trimmed = input.trim(); - if !trimmed.starts_with('{') || !trimmed.ends_with('}') { - return Err(ExprError::Parse("Input must be braced".into())); - } - let trimmed = &trimmed[1..trimmed.len() - 1]; - - let mut items = Vec::new(); - let mut depth = 0; - let mut start = 0; - for (i, c) in trimmed.chars().enumerate() { - match c { - '{' => depth += 1, - '}' => depth -= 1, - ',' if depth == 0 => { - let item = trimmed[start..i].trim(); - items.push(item); - start = i + 1; - } - _ => {} - } - } - if depth != 0 { - return Err(ExprError::Parse("Unbalanced braces".into())); - } - let last = trimmed[start..].trim(); - if !last.is_empty() { - items.push(last); - } - Ok(items) -} - #[function("cast(varchar) -> anyarray", type_infer = "panic")] fn str_to_list(input: &str, ctx: &Context) -> Result { - let cast = build_func( - PbType::Cast, - ctx.return_type.as_list().clone(), - vec![InputRefExpression::new(DataType::Varchar, 0).boxed()], - ) - .unwrap(); - let mut values = vec![]; - for item in unnest(input)? { - let v = cast - .eval_row(&OwnedRow::new(vec![Some(item.to_string().into())])) // TODO: optimize - .now_or_never() - .unwrap()?; - values.push(v); - } - Ok(ListValue::new(values)) + ListValue::from_str(input, &ctx.return_type).map_err(|err| ExprError::Parse(err.into())) } /// Cast array with `source_elem_type` into array with `target_elem_type` by casting each element. @@ -352,31 +298,6 @@ mod tests { test!(general_to_text(Decimal::NaN), "NaN"); } - #[test] - fn test_unnest() { - assert_eq!(unnest("{ }").unwrap(), vec![] as Vec); - assert_eq!( - unnest("{1, 2, 3}").unwrap(), - vec!["1".to_string(), "2".to_string(), "3".to_string()] - ); - assert_eq!( - unnest("{{1, 2, 3}, {4, 5, 6}}").unwrap(), - vec!["{1, 2, 3}".to_string(), "{4, 5, 6}".to_string()] - ); - assert_eq!( - unnest("{{{1, 2, 3}}, {{4, 5, 6}}}").unwrap(), - vec!["{{1, 2, 3}}".to_string(), "{{4, 5, 6}}".to_string()] - ); - assert_eq!( - unnest("{{{1, 2, 3}, {4, 5, 6}}}").unwrap(), - vec!["{{1, 2, 3}, {4, 5, 6}}".to_string()] - ); - assert_eq!( - unnest("{{{aa, bb, cc}, {dd, ee, ff}}}").unwrap(), - vec!["{{aa, bb, cc}, {dd, ee, ff}}".to_string()] - ); - } - #[test] fn test_str_to_list() { // Empty List diff --git a/src/frontend/planner_test/tests/testdata/input/array.yaml b/src/frontend/planner_test/tests/testdata/input/array.yaml index bab19465d46e0..c08133a99251f 100644 --- a/src/frontend/planner_test/tests/testdata/input/array.yaml +++ b/src/frontend/planner_test/tests/testdata/input/array.yaml @@ -84,7 +84,7 @@ expected_outputs: - batch_plan - sql: | - select array_cat('{a}', '{{b}}') = array['a', '{b}']; + select array_cat('{a}', '{"{b}"}') = array['a', '{b}']; name: array_cat(unknown as text[], unknown as text[]) -> text[] expected_outputs: - batch_plan diff --git a/src/frontend/planner_test/tests/testdata/output/array.yaml b/src/frontend/planner_test/tests/testdata/output/array.yaml index f43fba600f885..e578ddac53071 100644 --- a/src/frontend/planner_test/tests/testdata/output/array.yaml +++ b/src/frontend/planner_test/tests/testdata/output/array.yaml @@ -115,7 +115,7 @@ batch_plan: 'BatchValues { rows: [[ARRAY[a, b]:List(Varchar)]] }' - name: array_cat(unknown as text[], unknown as text[]) -> text[] sql: | - select array_cat('{a}', '{{b}}') = array['a', '{b}']; + select array_cat('{a}', '{"{b}"}') = array['a', '{b}']; batch_plan: 'BatchValues { rows: [[true:Boolean]] }' - name: array_cat(unknown as int[], int[]) -> int[] sql: | @@ -238,19 +238,19 @@ - name: unknown to varchar[] in implicit context sql: | values (array['a', 'b']), ('{c,d}'); - logical_plan: 'LogicalValues { rows: [[Array(''a'':Varchar, ''b'':Varchar)], [''{c,d}'':Varchar::List(Varchar)]], schema: Schema { fields: [*VALUES*_0.column_0:List(Varchar)] } }' + logical_plan: 'LogicalValues { rows: [[Array(''a'':Varchar, ''b'':Varchar)], [ARRAY[c, d]:List(Varchar)]], schema: Schema { fields: [*VALUES*_0.column_0:List(Varchar)] } }' - name: unknown to varchar[] in assign context sql: | create table t (v1 varchar[]); insert into t values ('{c,d}'); logical_plan: |- LogicalInsert { table: t, mapping: [0:0] } - └─LogicalValues { rows: [['{c,d}':Varchar::List(Varchar)]], schema: Schema { fields: [*VALUES*_0.column_0:List(Varchar)] } } + └─LogicalValues { rows: [[ARRAY[c, d]:List(Varchar)]], schema: Schema { fields: [*VALUES*_0.column_0:List(Varchar)] } } - name: unknown to varchar[] in explicit context sql: | select ('{c,d}')::varchar[]; logical_plan: |- - LogicalProject { exprs: ['{c,d}':Varchar::List(Varchar) as $expr1] } + LogicalProject { exprs: [ARRAY[c, d]:List(Varchar)] } └─LogicalValues { rows: [[]], schema: Schema { fields: [] } } - name: varchar[] to string in assign context sql: | @@ -287,7 +287,7 @@ sql: | select array[1] = '{1}'; logical_plan: |- - LogicalProject { exprs: [(Array(1:Int32) = '{1}':Varchar::List(Int32)) as $expr1] } + LogicalProject { exprs: [(Array(1:Int32) = ARRAY[1]:List(Int32)) as $expr1] } └─LogicalValues { rows: [[]], schema: Schema { fields: [] } } - name: compare with different type sql: | diff --git a/src/frontend/planner_test/tests/testdata/output/expr.yaml b/src/frontend/planner_test/tests/testdata/output/expr.yaml index 2074a0410b431..65df2522c81dd 100644 --- a/src/frontend/planner_test/tests/testdata/output/expr.yaml +++ b/src/frontend/planner_test/tests/testdata/output/expr.yaml @@ -393,7 +393,7 @@ - sql: | select 1 < ALL('{2,3}'); logical_plan: |- - LogicalProject { exprs: [All((1:Int32 < '{2,3}':Varchar::List(Int32))) as $expr1] } + LogicalProject { exprs: [All((1:Int32 < ARRAY[2, 3]:List(Int32))) as $expr1] } └─LogicalValues { rows: [[]], schema: Schema { fields: [] } } - sql: | select 1 < SOME(null::integer[]); diff --git a/src/tests/regress/data/expected/arrays.out b/src/tests/regress/data/expected/arrays.out index 0c30298435324..678a5087b85b5 100644 --- a/src/tests/regress/data/expected/arrays.out +++ b/src/tests/regress/data/expected/arrays.out @@ -1464,25 +1464,25 @@ select '{}'::text[]; {} (1 row) -select '{{{1,2,3,4},{2,3,4,5}},{{3,4,5,6},{4,5,6,7}}}'::text[]; +select '{{{1,2,3,4},{2,3,4,5}},{{3,4,5,6},{4,5,6,7}}}'::text[][][]; text ----------------------------------------------- {{{1,2,3,4},{2,3,4,5}},{{3,4,5,6},{4,5,6,7}}} (1 row) select '{0 second ,0 second}'::interval[]; - interval ---------------- - {"@ 0","@ 0"} + interval +--------------------- + {00:00:00,00:00:00} (1 row) -select '{ { "," } , { 3 } }'::text[]; +select '{ { "," } , { 3 } }'::text[][]; text ------------- {{","},{3}} (1 row) -select ' { { " 0 second " , 0 second } }'::text[]; +select ' { { " 0 second " , 0 second } }'::text[][]; text ------------------------------- {{" 0 second ","0 second"}} diff --git a/src/tests/regress/data/sql/arrays.sql b/src/tests/regress/data/sql/arrays.sql index 30c2155742d86..a97e9e3005d07 100644 --- a/src/tests/regress/data/sql/arrays.sql +++ b/src/tests/regress/data/sql/arrays.sql @@ -251,7 +251,7 @@ SELECT ARRAY[ARRAY['hello'],ARRAY['world']]; --@ SELECT ARRAY(select f2 from arrtest_f order by f2) AS "ARRAY"; -- with nulls ---@ SELECT '{1,null,3}'::int[]; +SELECT '{1,null,3}'::int[]; SELECT ARRAY[1,NULL,3]; -- functions @@ -372,12 +372,12 @@ select 33 * any (44); -- nulls select 33 = any (null::int[]); select null::int = any ('{1,2,3}'); ---@ select 33 = any ('{1,null,3}'); ---@ select 33 = any ('{1,null,33}'); +select 33 = any ('{1,null,3}'); +select 33 = any ('{1,null,33}'); select 33 = all (null::int[]); select null::int = all ('{1,2,3}'); ---@ select 33 = all ('{1,null,3}'); ---@ select 33 = all ('{33,null,33}'); +select 33 = all ('{1,null,3}'); +select 33 = all ('{33,null,33}'); -- nulls later in the bitmap --@ SELECT -1 != ALL(ARRAY(SELECT NULLIF(g.i, 900) FROM generate_series(1,1000) g(i))); @@ -439,10 +439,10 @@ select array[]; -- all of the following should be accepted select '{}'::text[]; ---@ select '{{{1,2,3,4},{2,3,4,5}},{{3,4,5,6},{4,5,6,7}}}'::text[]; +select '{{{1,2,3,4},{2,3,4,5}},{{3,4,5,6},{4,5,6,7}}}'::text[][][]; --@ select '{0 second ,0 second}'::interval[]; ---@ select '{ { "," } , { 3 } }'::text[]; ---@ select ' { { " 0 second " , 0 second } }'::text[]; +select '{ { "," } , { 3 } }'::text[][]; +select ' { { " 0 second " , 0 second } }'::text[][]; --@ select '{ --@ 0 second, --@ @ 1 hour @ 42 minutes @ 20 seconds From 1b1950eaa51a0c89965a25bec829b07e197a291e Mon Sep 17 00:00:00 2001 From: August Date: Wed, 15 Nov 2023 17:07:35 +0800 Subject: [PATCH 2/3] chore: add some unit test for alter relation rename in catalog controller (#13438) --- src/meta/src/controller/catalog.rs | 84 ++++++++++++++++++++++++++++++ 1 file changed, 84 insertions(+) diff --git a/src/meta/src/controller/catalog.rs b/src/meta/src/controller/catalog.rs index cd69cdb630f40..f7eb35b10f7b1 100644 --- a/src/meta/src/controller/catalog.rs +++ b/src/meta/src/controller/catalog.rs @@ -1020,6 +1020,8 @@ impl CatalogController { #[cfg(test)] #[cfg(not(madsim))] mod tests { + use risingwave_meta_model_v2::ViewId; + use super::*; const TEST_DATABASE_ID: DatabaseId = 1; @@ -1115,4 +1117,86 @@ mod tests { Ok(()) } + + #[tokio::test] + async fn test_alter_rename() -> MetaResult<()> { + let mgr = CatalogController::new(MetaSrvEnv::for_test().await)?; + let pb_source = PbSource { + schema_id: TEST_SCHEMA_ID as _, + database_id: TEST_DATABASE_ID as _, + name: "s1".to_string(), + owner: TEST_OWNER_ID as _, + definition: r#"CREATE SOURCE s1 (v1 int) with ( + connector = 'kafka', + topic = 'kafka_alter', + properties.bootstrap.server = 'message_queue:29092', + scan.startup.mode = 'earliest' +) FORMAT PLAIN ENCODE JSON"# + .to_string(), + ..Default::default() + }; + mgr.create_source(pb_source).await?; + let source_id: SourceId = Source::find() + .select_only() + .column(source::Column::SourceId) + .filter(source::Column::Name.eq("s1")) + .into_tuple() + .one(&mgr.inner.read().await.db) + .await? + .unwrap(); + + let pb_view = PbView { + schema_id: TEST_SCHEMA_ID as _, + database_id: TEST_DATABASE_ID as _, + name: "view_1".to_string(), + owner: TEST_OWNER_ID as _, + sql: "CREATE VIEW view_1 AS SELECT v1 FROM s1".to_string(), + dependent_relations: vec![source_id as _], + ..Default::default() + }; + mgr.create_view(pb_view).await?; + let view_id: ViewId = View::find() + .select_only() + .column(view::Column::ViewId) + .filter(view::Column::Name.eq("view_1")) + .into_tuple() + .one(&mgr.inner.read().await.db) + .await? + .unwrap(); + + mgr.alter_relation_name(ObjectType::Source, source_id, "s2") + .await?; + let source = Source::find_by_id(source_id) + .one(&mgr.inner.read().await.db) + .await? + .unwrap(); + assert_eq!(source.name, "s2"); + assert_eq!( + source.definition, + "CREATE SOURCE s2 (v1 INT) WITH (\ + connector = 'kafka', \ + topic = 'kafka_alter', \ + properties.bootstrap.server = 'message_queue:29092', \ + scan.startup.mode = 'earliest'\ +) FORMAT PLAIN ENCODE JSON" + ); + + let view = View::find_by_id(view_id) + .one(&mgr.inner.read().await.db) + .await? + .unwrap(); + assert_eq!( + view.definition, + "CREATE VIEW view_1 AS SELECT v1 FROM s2 AS s1" + ); + + mgr.drop_relation(ObjectType::Source, source_id, DropMode::Cascade) + .await?; + assert!(View::find_by_id(view_id) + .one(&mgr.inner.read().await.db) + .await? + .is_none()); + + Ok(()) + } } From a62493618c19abe7c6d43ba765aa8c613a25e5e1 Mon Sep 17 00:00:00 2001 From: xiangjinwu <17769960+xiangjinwu@users.noreply.github.com> Date: Wed, 15 Nov 2023 18:08:07 +0800 Subject: [PATCH 3/3] refactor(datagen): generalize `TimestampField` to `ChronoField` (#13439) --- src/common/src/field_generator/mod.rs | 6 +- src/common/src/field_generator/timestamp.rs | 78 +++++++++++-------- .../src/source/datagen/source/reader.rs | 2 +- 3 files changed, 48 insertions(+), 38 deletions(-) diff --git a/src/common/src/field_generator/mod.rs b/src/common/src/field_generator/mod.rs index 9d61c01e41861..02a958aa0ebec 100644 --- a/src/common/src/field_generator/mod.rs +++ b/src/common/src/field_generator/mod.rs @@ -26,7 +26,7 @@ pub use timestamp::*; pub use varchar::*; use crate::array::{ListValue, StructValue}; -use crate::types::{DataType, Datum, ScalarImpl}; +use crate::types::{DataType, Datum, ScalarImpl, Timestamp}; pub const DEFAULT_MIN: i16 = i16::MIN; pub const DEFAULT_MAX: i16 = i16::MAX; @@ -95,7 +95,7 @@ pub enum FieldGeneratorImpl { VarcharRandomVariableLength(VarcharRandomVariableLengthField), VarcharRandomFixedLength(VarcharRandomFixedLengthField), VarcharConstant, - Timestamp(TimestampField), + Timestamp(ChronoField), Struct(Vec<(String, FieldGeneratorImpl)>), List(Box, usize), } @@ -181,7 +181,7 @@ impl FieldGeneratorImpl { max_past_mode: Option, seed: u64, ) -> Result { - Ok(FieldGeneratorImpl::Timestamp(TimestampField::new( + Ok(FieldGeneratorImpl::Timestamp(ChronoField::new( base, max_past, max_past_mode, diff --git a/src/common/src/field_generator/timestamp.rs b/src/common/src/field_generator/timestamp.rs index 000f806c66d50..54c55de273965 100644 --- a/src/common/src/field_generator/timestamp.rs +++ b/src/common/src/field_generator/timestamp.rs @@ -18,26 +18,19 @@ use chrono::{Duration, DurationRound}; use humantime::parse_duration; use rand::rngs::StdRng; use rand::{Rng, SeedableRng}; -use serde_json::{json, Value}; +use serde_json::Value; use tracing::debug; use super::DEFAULT_MAX_PAST; use crate::types::{Datum, Scalar, Timestamp}; -#[derive(Debug)] -enum LocalNow { - Relative, - Absolute(NaiveDateTime), -} - -pub struct TimestampField { - base: Option>, +pub struct ChronoField { max_past: Duration, - local_now: LocalNow, + absolute_base: Option, seed: u64, } -impl TimestampField { +impl ChronoField { pub fn new( base: Option>, max_past_option: Option, @@ -45,14 +38,8 @@ impl TimestampField { seed: u64, ) -> Result { let local_now = match max_past_mode.as_deref() { - Some("relative") => LocalNow::Relative, - _ => { - LocalNow::Absolute( - Local::now() - .naive_local() - .duration_round(Duration::microseconds(1))?, - ) // round to 1 us std duration - } + Some("relative") => None, + _ => Some(T::from_now()), }; let max_past = if let Some(max_past_option) = max_past_option { @@ -63,36 +50,59 @@ impl TimestampField { }; debug!(?local_now, ?max_past, "parse timestamp field option"); Ok(Self { - base, // convert to chrono::Duration max_past: chrono::Duration::from_std(max_past)?, - local_now, + absolute_base: base.map(T::from_base).or(local_now), seed, }) } - fn generate_data(&mut self, offset: u64) -> NaiveDateTime { + fn generate_data(&mut self, offset: u64) -> T { let milliseconds = self.max_past.num_milliseconds(); let mut rng = StdRng::seed_from_u64(offset ^ self.seed); let max_milliseconds = rng.gen_range(0..=milliseconds); - let now = match self.base { - Some(base) => base.naive_local(), - None => match self.local_now { - LocalNow::Relative => Local::now() - .naive_local() - .duration_round(Duration::microseconds(1)) - .unwrap(), - LocalNow::Absolute(now) => now, - }, + let base = match self.absolute_base { + Some(base) => base, + None => T::from_now(), }; - now - Duration::milliseconds(max_milliseconds) + base.minus(Duration::milliseconds(max_milliseconds)) } pub fn generate(&mut self, offset: u64) -> Value { - json!(self.generate_data(offset).to_string()) + self.generate_data(offset).to_json() } pub fn generate_datum(&mut self, offset: u64) -> Datum { - Some(Timestamp::new(self.generate_data(offset)).to_scalar_value()) + Some(self.generate_data(offset).to_scalar_value()) + } +} + +pub trait ChronoFieldInner: std::fmt::Debug + Copy + Scalar { + fn from_now() -> Self; + fn from_base(base: DateTime) -> Self; + fn minus(&self, duration: Duration) -> Self; + fn to_json(&self) -> Value; +} + +impl ChronoFieldInner for Timestamp { + fn from_now() -> Self { + Timestamp::new( + Local::now() + .naive_local() + .duration_round(Duration::microseconds(1)) + .unwrap(), + ) + } + + fn from_base(base: DateTime) -> Self { + Timestamp::new(base.naive_local()) + } + + fn minus(&self, duration: Duration) -> Self { + Timestamp::new(self.0 - duration) + } + + fn to_json(&self) -> Value { + Value::String(self.0.to_string()) } } diff --git a/src/connector/src/source/datagen/source/reader.rs b/src/connector/src/source/datagen/source/reader.rs index bd9f74ee3aa9a..11cb9db08c48a 100644 --- a/src/connector/src/source/datagen/source/reader.rs +++ b/src/connector/src/source/datagen/source/reader.rs @@ -215,7 +215,7 @@ fn generator_from_data_type( match data_type { DataType::Timestamp => { let max_past_key = format!("fields.{}.max_past", name); - let max_past_value = fields_option_map.get(&max_past_key).map(|s| s.to_string()); + let max_past_value = fields_option_map.get(&max_past_key).cloned(); let max_past_mode_key = format!("fields.{}.max_past_mode", name); let max_past_mode_value = fields_option_map .get(&max_past_mode_key)