diff --git a/Cargo.lock b/Cargo.lock index fac9e97c5f8b..f89c6646b2cc 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -293,14 +293,14 @@ dependencies = [ [[package]] name = "arrow-arith" -version = "49.0.0" +version = "50.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "34ccd45e217ffa6e53bbb0080990e77113bdd4e91ddb84e97b77649810bcf1a7" +checksum = "753abd0a5290c1bcade7c6623a556f7d1659c5f4148b140b5b63ce7bd1a45705" dependencies = [ - "arrow-array 49.0.0", - "arrow-buffer 49.0.0", - "arrow-data 49.0.0", - "arrow-schema 49.0.0", + "arrow-array 50.0.0", + "arrow-buffer 50.0.0", + "arrow-data 50.0.0", + "arrow-schema 50.0.0", "chrono", "half 2.3.1", "num", @@ -325,14 +325,14 @@ dependencies = [ [[package]] name = "arrow-array" -version = "49.0.0" +version = "50.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6bda9acea48b25123c08340f3a8ac361aa0f74469bb36f5ee9acf923fce23e9d" +checksum = "d390feeb7f21b78ec997a4081a025baef1e2e0d6069e181939b61864c9779609" dependencies = [ "ahash 0.8.6", - "arrow-buffer 49.0.0", - "arrow-data 49.0.0", - "arrow-schema 49.0.0", + "arrow-buffer 50.0.0", + "arrow-data 50.0.0", + "arrow-schema 50.0.0", "chrono", "half 2.3.1", "hashbrown 0.14.0", @@ -352,9 +352,9 @@ dependencies = [ [[package]] name = "arrow-buffer" -version = "49.0.0" +version = "50.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "01a0fc21915b00fc6c2667b069c1b64bdd920982f426079bc4a7cab86822886c" +checksum = "69615b061701bcdffbc62756bc7e85c827d5290b472b580c972ebbbf690f5aa4" dependencies = [ "bytes", "half 2.3.1", @@ -381,15 +381,15 @@ dependencies = [ [[package]] name = "arrow-cast" -version = "49.0.0" +version = "50.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5dc0368ed618d509636c1e3cc20db1281148190a78f43519487b2daf07b63b4a" +checksum = "e448e5dd2f4113bf5b74a1f26531708f5edcacc77335b7066f9398f4bcf4cdef" dependencies = [ - "arrow-array 49.0.0", - "arrow-buffer 49.0.0", - "arrow-data 49.0.0", - "arrow-schema 49.0.0", - "arrow-select 49.0.0", + "arrow-array 50.0.0", + "arrow-buffer 50.0.0", + "arrow-data 50.0.0", + "arrow-schema 50.0.0", + "arrow-select 50.0.0", "base64 0.21.4", "chrono", "half 2.3.1", @@ -430,27 +430,27 @@ dependencies = [ [[package]] name = "arrow-data" -version = "49.0.0" +version = "50.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "907fafe280a3874474678c1858b9ca4cb7fd83fb8034ff5b6d6376205a08c634" +checksum = "67d644b91a162f3ad3135ce1184d0a31c28b816a581e08f29e8e9277a574c64e" dependencies = [ - "arrow-buffer 49.0.0", - "arrow-schema 49.0.0", + "arrow-buffer 50.0.0", + "arrow-schema 50.0.0", "half 2.3.1", "num", ] [[package]] name = "arrow-flight" -version = "49.0.0" +version = "50.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "624e0dcb6b5a7a06222bfd2be3f7e905ce849a6b714ec989f18cdba330c77d38" +checksum = "1d7f215461ad6346f2e4cc853e377d4e076d533e1ed78d327debe83023e3601f" dependencies = [ - "arrow-array 49.0.0", - "arrow-buffer 49.0.0", - "arrow-cast 49.0.0", - "arrow-ipc 49.0.0", - "arrow-schema 49.0.0", + "arrow-array 50.0.0", + "arrow-buffer 50.0.0", + "arrow-cast 50.0.0", + "arrow-ipc 50.0.0", + "arrow-schema 50.0.0", "base64 0.21.4", "bytes", "futures", @@ -476,15 +476,15 @@ dependencies = [ [[package]] name = "arrow-ipc" -version = "49.0.0" +version = "50.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "79a43d6808411886b8c7d4f6f7dd477029c1e77ffffffb7923555cc6579639cd" +checksum = "03dea5e79b48de6c2e04f03f62b0afea7105be7b77d134f6c5414868feefb80d" dependencies = [ - "arrow-array 49.0.0", - "arrow-buffer 49.0.0", - "arrow-cast 49.0.0", - "arrow-data 49.0.0", - "arrow-schema 49.0.0", + "arrow-array 50.0.0", + "arrow-buffer 50.0.0", + "arrow-cast 50.0.0", + "arrow-data 50.0.0", + "arrow-schema 50.0.0", "flatbuffers", ] @@ -525,15 +525,15 @@ dependencies = [ [[package]] name = "arrow-ord" -version = "49.0.0" +version = "50.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9b23b0e53c0db57c6749997fd343d4c0354c994be7eca67152dd2bdb9a3e1bb4" +checksum = "1ed9630979034077982d8e74a942b7ac228f33dd93a93b615b4d02ad60c260be" dependencies = [ - "arrow-array 49.0.0", - "arrow-buffer 49.0.0", - "arrow-data 49.0.0", - "arrow-schema 49.0.0", - "arrow-select 49.0.0", + "arrow-array 50.0.0", + "arrow-buffer 50.0.0", + "arrow-data 50.0.0", + "arrow-schema 50.0.0", + "arrow-select 50.0.0", "half 2.3.1", "num", ] @@ -555,15 +555,15 @@ dependencies = [ [[package]] name = "arrow-row" -version = "49.0.0" +version = "50.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "361249898d2d6d4a6eeb7484be6ac74977e48da12a4dd81a708d620cc558117a" +checksum = "007035e17ae09c4e8993e4cb8b5b96edf0afb927cd38e2dff27189b274d83dcf" dependencies = [ "ahash 0.8.6", - "arrow-array 49.0.0", - "arrow-buffer 49.0.0", - "arrow-data 49.0.0", - "arrow-schema 49.0.0", + "arrow-array 50.0.0", + "arrow-buffer 50.0.0", + "arrow-data 50.0.0", + "arrow-schema 50.0.0", "half 2.3.1", "hashbrown 0.14.0", ] @@ -579,9 +579,9 @@ dependencies = [ [[package]] name = "arrow-schema" -version = "49.0.0" +version = "50.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "09e28a5e781bf1b0f981333684ad13f5901f4cd2f20589eab7cf1797da8fc167" +checksum = "0ff3e9c01f7cd169379d269f926892d0e622a704960350d09d331be3ec9e0029" [[package]] name = "arrow-select" @@ -599,15 +599,15 @@ dependencies = [ [[package]] name = "arrow-select" -version = "49.0.0" +version = "50.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4f6208466590960efc1d2a7172bc4ff18a67d6e25c529381d7f96ddaf0dc4036" +checksum = "1ce20973c1912de6514348e064829e50947e35977bb9d7fb637dc99ea9ffd78c" dependencies = [ "ahash 0.8.6", - "arrow-array 49.0.0", - "arrow-buffer 49.0.0", - "arrow-data 49.0.0", - "arrow-schema 49.0.0", + "arrow-array 50.0.0", + "arrow-buffer 50.0.0", + "arrow-data 50.0.0", + "arrow-schema 50.0.0", "num", ] @@ -627,15 +627,28 @@ dependencies = [ "regex-syntax 0.8.0", ] +[[package]] +name = "arrow-udf-js" +version = "0.1.0" +source = "git+https://github.com/risingwavelabs/arrow-udf.git?rev=7ba1c22#7ba1c226fa2f7418a217ee064a19b90efeb7143c" +dependencies = [ + "anyhow", + "arrow-array 50.0.0", + "arrow-buffer 50.0.0", + "arrow-schema 50.0.0", + "rquickjs", +] + [[package]] name = "arrow-udf-wasm" version = "0.1.0" -source = "git+https://github.com/risingwavelabs/arrow-udf.git?rev=f9a9e0d#f9a9e0d41d1a4ae26a6d90ac8aebf2e38a0c8a55" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9c45450e29b016fdc0ccbb22408daceead464a2178f7f8408886c7ca0c9e1aed" dependencies = [ "anyhow", - "arrow-array 49.0.0", - "arrow-ipc 49.0.0", - "arrow-schema 49.0.0", + "arrow-array 50.0.0", + "arrow-ipc 50.0.0", + "arrow-schema 50.0.0", "base64 0.21.4", "genawaiter", "lazy_static", @@ -976,9 +989,9 @@ dependencies = [ [[package]] name = "aws-credential-types" -version = "1.0.1" +version = "1.1.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8c1317e1a3514b103cf7d5828bbab3b4d30f56bd22d684f8568bc51b6cfbbb1c" +checksum = "4a7cb3510b95492bd9014b60e2e3bee3e48bc516e220316f8e6b60df18b47331" dependencies = [ "aws-smithy-async", "aws-smithy-runtime-api", @@ -1125,9 +1138,9 @@ dependencies = [ [[package]] name = "aws-sigv4" -version = "1.0.1" +version = "1.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "380adcc8134ad8bbdfeb2ace7626a869914ee266322965276cbc54066186d236" +checksum = "d222297ca90209dc62245f0a490355795f29de362eb5c19caea4f7f55fe69078" dependencies = [ "aws-credential-types", "aws-smithy-eventstream", @@ -1143,7 +1156,6 @@ dependencies = [ "once_cell", "p256 0.11.1", "percent-encoding", - "regex", "ring 0.17.5", "sha2", "subtle", @@ -1154,9 +1166,9 @@ dependencies = [ [[package]] name = "aws-smithy-async" -version = "1.0.1" +version = "1.1.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1fbfa248f7f966d73e325dbc85851a5500042b6d96e3c3b535a8527707f36fe4" +checksum = "2eac0bb78e9e2765699999a02d7bfb4e6ad8f13e0962ebb9f5202b1d8cd76006" dependencies = [ "futures-util", "pin-project-lite", @@ -1186,9 +1198,9 @@ dependencies = [ [[package]] name = "aws-smithy-eventstream" -version = "0.60.0" +version = "0.60.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1c669e1e5fc0d79561bf7a122b118bd50c898758354fe2c53eb8f2d31507cbc3" +checksum = "682371561562d08ab437766903c6bc28f4f95d7ab2ecfb389bda7849dd98aefe" dependencies = [ "aws-smithy-types", "bytes", @@ -1197,9 +1209,9 @@ dependencies = [ [[package]] name = "aws-smithy-http" -version = "0.60.0" +version = "0.60.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5b1de8aee22f67de467b2e3d0dd0fb30859dc53f579a63bd5381766b987db644" +checksum = "365ca49744b2bda2f1e2dc03b856da3fa5a28ca5b0a41e41d7ff5305a8fae190" dependencies = [ "aws-smithy-eventstream", "aws-smithy-runtime-api", @@ -1261,9 +1273,9 @@ dependencies = [ [[package]] name = "aws-smithy-runtime-api" -version = "1.0.1" +version = "1.1.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4d27c3235d4972ed976b5c1a82286e7c4457f618f3c2ae6d4ae44f081dd24575" +checksum = "02ca2da7619517310bfead6d18abcdde90f1439224d887d608503cfacff46dff" dependencies = [ "aws-smithy-async", "aws-smithy-types", @@ -1276,9 +1288,9 @@ dependencies = [ [[package]] name = "aws-smithy-types" -version = "1.1.1" +version = "1.1.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2aba8136605d14ac88f57dc3a693a9f8a4eab4a3f52bc03ff13746f0cd704e97" +checksum = "5d4bb944488536cd2fef43212d829bc7e9a8bfc4afa079d21170441e7be8d2d0" dependencies = [ "base64-simd", "bytes", @@ -4361,9 +4373,9 @@ dependencies = [ [[package]] name = "futures-channel" -version = "0.3.28" +version = "0.3.30" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "955518d47e09b25bbebc7a18df10b81f0c766eaf4c4f1cccef2fca5f2a4fb5f2" +checksum = "eac8f7d7865dcb88bd4373ab671c8cf4508703796caa2b1985a9ca867b3fcb78" dependencies = [ "futures-core", "futures-sink", @@ -4371,9 +4383,9 @@ dependencies = [ [[package]] name = "futures-core" -version = "0.3.28" +version = "0.3.30" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4bca583b7e26f571124fe5b7561d49cb2868d79116cfa0eefce955557c6fee8c" +checksum = "dfc6580bb841c5a68e9ef15c77ccc837b40a7504914d52e47b8b0e9bbda25a1d" [[package]] name = "futures-executor" @@ -4399,9 +4411,9 @@ dependencies = [ [[package]] name = "futures-io" -version = "0.3.28" +version = "0.3.30" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4fff74096e71ed47f8e023204cfd0aa1289cd54ae5430a9523be060cdb849964" +checksum = "a44623e20b9681a318efdd71c299b6b222ed6f231972bfe2f224ebad6311f0c1" [[package]] name = "futures-lite" @@ -4420,9 +4432,9 @@ dependencies = [ [[package]] name = "futures-macro" -version = "0.3.28" +version = "0.3.30" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "89ca545a94061b6365f2c7355b4b32bd20df3ff95f02da9329b34ccc3bd6ee72" +checksum = "87750cf4b7a4c0625b1529e4c543c2182106e4dedc60a2a6455e00d212c489ac" dependencies = [ "proc-macro2", "quote", @@ -4431,15 +4443,15 @@ dependencies = [ [[package]] name = "futures-sink" -version = "0.3.28" +version = "0.3.30" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f43be4fe21a13b9781a69afa4985b0f6ee0e1afab2c6f454a8cf30e2b2237b6e" +checksum = "9fb8e00e87438d937621c1c6269e53f536c14d3fbd6a042bb24879e57d474fb5" [[package]] name = "futures-task" -version = "0.3.28" +version = "0.3.30" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "76d3d132be6c0e6aa1534069c705a74a5997a356c0dc2f86a47765e5617c5b65" +checksum = "38d84fa142264698cdce1a9f9172cf383a0c82de1bddcf3092901442c4097004" [[package]] name = "futures-timer" @@ -4451,9 +4463,9 @@ dependencies = [ [[package]] name = "futures-util" -version = "0.3.28" +version = "0.3.30" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "26b01e40b772d54cf6c6d721c1d1abd0647a0106a12ecaa1c186273392a69533" +checksum = "3d6401deb83407ab3da39eba7e33987a73c3df0c82b4bb5813ee871c19c41d48" dependencies = [ "futures-channel", "futures-core", @@ -4736,9 +4748,9 @@ dependencies = [ [[package]] name = "h2" -version = "0.3.21" +version = "0.3.24" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "91fc23aa11be92976ef4729127f1a74adf36d8436f7816b185d18df956790833" +checksum = "bb2c4422095b67ee78da96fbb51a4cc413b3b25883c7717ff7ca1ab31022c9c9" dependencies = [ "bytes", "fnv", @@ -4746,7 +4758,7 @@ dependencies = [ "futures-sink", "futures-util", "http 0.2.9", - "indexmap 1.9.3", + "indexmap 2.0.0", "slab", "tokio", "tokio-util", @@ -5065,14 +5077,14 @@ source = "git+https://github.com/icelake-io/icelake?rev=32c0bbf242f5c47b1e743f10 dependencies = [ "anyhow", "apache-avro 0.17.0", - "arrow-arith 49.0.0", - "arrow-array 49.0.0", - "arrow-buffer 49.0.0", - "arrow-cast 49.0.0", - "arrow-ord 49.0.0", - "arrow-row 49.0.0", - "arrow-schema 49.0.0", - "arrow-select 49.0.0", + "arrow-arith 50.0.0", + "arrow-array 50.0.0", + "arrow-buffer 50.0.0", + "arrow-cast 50.0.0", + "arrow-ord 50.0.0", + "arrow-row 50.0.0", + "arrow-schema 50.0.0", + "arrow-select 50.0.0", "async-trait", "bitvec", "bytes", @@ -5089,7 +5101,7 @@ dependencies = [ "once_cell", "opendal", "ordered-float 3.9.1", - "parquet 49.0.0", + "parquet 50.0.0", "prometheus", "regex", "reqwest", @@ -7075,24 +7087,25 @@ dependencies = [ [[package]] name = "parquet" -version = "49.0.0" +version = "50.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "af88740a842787da39b3d69ce5fbf6fce97d20211d3b299fee0a0da6430c74d4" +checksum = "547b92ebf0c1177e3892f44c8f79757ee62e678d564a9834189725f2c5b7a750" dependencies = [ "ahash 0.8.6", - "arrow-array 49.0.0", - "arrow-buffer 49.0.0", - "arrow-cast 49.0.0", - "arrow-data 49.0.0", - "arrow-ipc 49.0.0", - "arrow-schema 49.0.0", - "arrow-select 49.0.0", + "arrow-array 50.0.0", + "arrow-buffer 50.0.0", + "arrow-cast 50.0.0", + "arrow-data 50.0.0", + "arrow-ipc 50.0.0", + "arrow-schema 50.0.0", + "arrow-select 50.0.0", "base64 0.21.4", "brotli", "bytes", "chrono", "flate2", "futures", + "half 2.3.1", "hashbrown 0.14.0", "lz4_flex", "num", @@ -7821,7 +7834,7 @@ checksum = "8bdf592881d821b83d471f8af290226c8d51402259e9bb5be7f9f8bdebbb11ac" dependencies = [ "bytes", "heck 0.4.1", - "itertools 0.11.0", + "itertools 0.10.5", "log", "multimap 0.8.3", "once_cell", @@ -7855,7 +7868,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "265baba7fabd416cf5078179f7d2cbeca4ce7a9041111900675ea7c4cb8a4c32" dependencies = [ "anyhow", - "itertools 0.11.0", + "itertools 0.10.5", "proc-macro2", "quote", "syn 2.0.48", @@ -8490,6 +8503,7 @@ dependencies = [ "serde_with", "serde_yaml", "tempfile", + "thiserror-ext", "tracing", "tracing-subscriber", "workspace-hack", @@ -8622,6 +8636,7 @@ dependencies = [ "risingwave_stream", "serde", "serde_yaml", + "thiserror-ext", "tokio-stream", "toml 0.8.2", "tracing", @@ -8689,13 +8704,13 @@ dependencies = [ "anyhow", "arc-swap", "arrow-array 48.0.1", - "arrow-array 49.0.0", + "arrow-array 50.0.0", "arrow-buffer 48.0.1", - "arrow-buffer 49.0.0", + "arrow-buffer 50.0.0", "arrow-cast 48.0.1", - "arrow-cast 49.0.0", + "arrow-cast 50.0.0", "arrow-schema 48.0.1", - "arrow-schema 49.0.0", + "arrow-schema 50.0.0", "async-trait", "auto_enums", "auto_impl", @@ -8934,8 +8949,8 @@ version = "1.7.0-alpha" dependencies = [ "anyhow", "apache-avro 0.16.0", - "arrow-array 49.0.0", - "arrow-schema 49.0.0", + "arrow-array 50.0.0", + "arrow-schema 50.0.0", "async-nats", "async-trait", "auto_enums", @@ -9103,8 +9118,9 @@ name = "risingwave_expr" version = "1.7.0-alpha" dependencies = [ "anyhow", - "arrow-array 49.0.0", - "arrow-schema 49.0.0", + "arrow-array 50.0.0", + "arrow-schema 50.0.0", + "arrow-udf-js", "arrow-udf-wasm", "async-trait", "auto_impl", @@ -9144,7 +9160,7 @@ version = "1.7.0-alpha" dependencies = [ "aho-corasick", "anyhow", - "arrow-schema 49.0.0", + "arrow-schema 50.0.0", "async-trait", "auto_enums", "chrono", @@ -9193,7 +9209,7 @@ version = "1.7.0-alpha" dependencies = [ "anyhow", "arc-swap", - "arrow-schema 49.0.0", + "arrow-schema 50.0.0", "arrow-udf-wasm", "assert_matches", "async-recursion", @@ -9511,6 +9527,7 @@ dependencies = [ "sea-orm", "serde", "serde_json", + "thiserror-ext", "tracing", "workspace-hack", ] @@ -9536,6 +9553,7 @@ dependencies = [ "risingwave_pb", "sea-orm", "sync-point", + "thiserror-ext", "tokio-stream", "tracing", "workspace-hack", @@ -9981,10 +9999,10 @@ dependencies = [ name = "risingwave_udf" version = "0.1.0" dependencies = [ - "arrow-array 49.0.0", + "arrow-array 50.0.0", "arrow-flight", - "arrow-schema 49.0.0", - "arrow-select 49.0.0", + "arrow-schema 50.0.0", + "arrow-select 50.0.0", "cfg-or-panic", "futures-util", "madsim-tokio", @@ -10059,6 +10077,33 @@ dependencies = [ "retain_mut", ] +[[package]] +name = "rquickjs" +version = "0.4.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0db265d331ae1b1a9fdb68466a8359bc9dcc5e78a9c323f790322f8442e005ac" +dependencies = [ + "rquickjs-core", +] + +[[package]] +name = "rquickjs-core" +version = "0.4.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "48e51f2fc99917699385bfa290b776e712e414b222d7c2a9b2cd67b8e93585f3" +dependencies = [ + "rquickjs-sys", +] + +[[package]] +name = "rquickjs-sys" +version = "0.4.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "86b6865056bc4154c49bc8b2babd9232a8ba55dee4860fc74c789633aecad3ca" +dependencies = [ + "cc", +] + [[package]] name = "rsa" version = "0.9.2" @@ -11056,9 +11101,9 @@ dependencies = [ [[package]] name = "smallvec" -version = "1.12.0" +version = "1.13.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2593d31f82ead8df961d8bd23a64c2ccf2eb5dd34b0a34bfb4dd54011c72009e" +checksum = "e6ecd384b10a64542d77071bd64bd7b231f4ed5940fba55e98c3de13824cf3d7" dependencies = [ "serde", ] @@ -12548,9 +12593,9 @@ checksum = "711b9620af191e0cdc7468a8d14e709c3dcdb115b36f838e601583af800a370a" [[package]] name = "uuid" -version = "1.6.1" +version = "1.7.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5e395fcf16a7a3d8127ec99782007af141946b4795001f876d54fb0d55978560" +checksum = "f00cc9702ca12d3c81455259621e676d0f7251cec66a21e98fe2e9a37db93b2a" dependencies = [ "getrandom", "rand", @@ -13665,7 +13710,7 @@ dependencies = [ "hyper", "indexmap 1.9.3", "indexmap 2.0.0", - "itertools 0.11.0", + "itertools 0.10.5", "jni", "lazy_static", "lexical-core", diff --git a/Cargo.toml b/Cargo.toml index 7bd67bc58374..0bc9cab78b4e 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -124,16 +124,17 @@ prost = { version = "0.12" } icelake = { git = "https://github.com/icelake-io/icelake", rev = "32c0bbf242f5c47b1e743f10577012fe7436c770", features = [ "prometheus", ] } -arrow-array = "49" -arrow-arith = "49" -arrow-cast = "49" -arrow-schema = "49" -arrow-buffer = "49" -arrow-flight = "49" -arrow-select = "49" -arrow-ord = "49" -arrow-row = "49" -arrow-udf-wasm = { git = "https://github.com/risingwavelabs/arrow-udf.git", rev = "f9a9e0d" } +arrow-array = "50" +arrow-arith = "50" +arrow-cast = "50" +arrow-schema = "50" +arrow-buffer = "50" +arrow-flight = "50" +arrow-select = "50" +arrow-ord = "50" +arrow-row = "50" +arrow-udf-js = { git = "https://github.com/risingwavelabs/arrow-udf.git", rev = "7ba1c22" } +arrow-udf-wasm = "0.1" arrow-array-deltalake = { package = "arrow-array", version = "48.0.1" } arrow-buffer-deltalake = { package = "arrow-buffer", version = "48.0.1" } arrow-cast-deltalake = { package = "arrow-cast", version = "48.0.1" } @@ -143,7 +144,7 @@ arrow-schema-deltalake = { package = "arrow-schema", version = "48.0.1" } deltalake = { git = "https://github.com/risingwavelabs/delta-rs", rev = "5c2dccd4640490202ffe98adbd13b09cef8e007b", features = [ "s3-no-concurrent-write", ] } -parquet = "49" +parquet = "50" thiserror-ext = "0.0.11" tikv-jemalloc-ctl = { git = "https://github.com/risingwavelabs/jemallocator.git", rev = "64a2d9" } tikv-jemallocator = { git = "https://github.com/risingwavelabs/jemallocator.git", features = [ @@ -239,6 +240,7 @@ lto = "thin" [profile.ci-release] inherits = "release" incremental = false +lto = "off" debug = "line-tables-only" split-debuginfo = "off" debug-assertions = true diff --git a/ci/rust-toolchain b/ci/rust-toolchain index e8c05a601edc..b1f2df70a8d0 100644 --- a/ci/rust-toolchain +++ b/ci/rust-toolchain @@ -1,7 +1,7 @@ # To update toolchain, do the following: # 1. update this file -# 2. update lints/rust-toolchain, lints/Cargo.toml -# 3. update ci/build-ci-image.sh and ci/docker-compose.yml to build a new CI image +# 2. update ci/build-ci-image.sh and ci/docker-compose.yml to build a new CI image +# 3. (optional) **follow the instructions in lints/README.md** to update the toolchain and dependencies for lints [toolchain] channel = "nightly-2023-12-26" diff --git a/ci/scripts/e2e-clickhouse-sink-test.sh b/ci/scripts/e2e-clickhouse-sink-test.sh index 51720b0e1d09..3464bd3c3c14 100755 --- a/ci/scripts/e2e-clickhouse-sink-test.sh +++ b/ci/scripts/e2e-clickhouse-sink-test.sh @@ -35,7 +35,7 @@ sleep 2 echo "--- testing sinks" sqllogictest -p 4566 -d dev './e2e_test/sink/clickhouse_sink.slt' -sleep 1 +sleep 5 ./clickhouse client --host=clickhouse-server --port=9000 --query="select * from demo_test FORMAT CSV;" > ./query_result.csv @@ -52,6 +52,7 @@ if ($1 == 1 && $2 == 50 && $3 == "\"1-50\"") c1++; echo "Clickhouse sink check passed" else echo "The output is not as expected." + cat ./query_result.csv exit 1 fi diff --git a/ci/workflows/main-cron.yml b/ci/workflows/main-cron.yml index ee87f3ed3f5f..d5a030e10a79 100644 --- a/ci/workflows/main-cron.yml +++ b/ci/workflows/main-cron.yml @@ -17,7 +17,7 @@ steps: run: rw-build-env config: ci/docker-compose.yml mount-buildkite-agent: true - timeout_in_minutes: 30 + timeout_in_minutes: 20 retry: *auto-retry - label: "build other components" diff --git a/docker/docker-compose-with-hdfs.yml b/docker/docker-compose-with-hdfs.yml index ccf0f433c450..b0ed75296504 100644 --- a/docker/docker-compose-with-hdfs.yml +++ b/docker/docker-compose-with-hdfs.yml @@ -2,7 +2,7 @@ version: "3" services: compactor-0: - image: ghcr.io/risingwavelabs/risingwave:RisingWave_v1.5.4_HDFS_2.7-x86_64 + image: ghcr.io/risingwavelabs/risingwave:RisingWave_1.6.0_HDFS_2.7-x86_64 command: - compactor-node - "--listen-addr" @@ -42,7 +42,7 @@ services: reservations: memory: 1G compute-node-0: - image: "ghcr.io/risingwavelabs/risingwave:RisingWave_v1.5.4_HDFS_2.7-x86_64" + image: "ghcr.io/risingwavelabs/risingwave:RisingWave_1.6.0_HDFS_2.7-x86_64" command: - compute-node - "--listen-addr" @@ -132,7 +132,7 @@ services: retries: 5 restart: always frontend-node-0: - image: "ghcr.io/risingwavelabs/risingwave:RisingWave_v1.5.4_HDFS_2.7-x86_64" + image: "ghcr.io/risingwavelabs/risingwave:RisingWave_1.6.0_HDFS_2.7-x86_64" command: - frontend-node - "--listen-addr" @@ -195,7 +195,7 @@ services: retries: 5 restart: always meta-node-0: - image: "ghcr.io/risingwavelabs/risingwave:RisingWave_v1.5.4_HDFS_2.7-x86_64" + image: "ghcr.io/risingwavelabs/risingwave:RisingWave_1.6.0_HDFS_2.7-x86_64" command: - meta-node - "--listen-addr" diff --git a/e2e_test/iceberg/start_spark_connect_server.sh b/e2e_test/iceberg/start_spark_connect_server.sh index fb6a37e8135f..cf3bff1e3991 100755 --- a/e2e_test/iceberg/start_spark_connect_server.sh +++ b/e2e_test/iceberg/start_spark_connect_server.sh @@ -15,11 +15,12 @@ tar -xzf $SPARK_FILE --no-same-owner ./spark-${SPARK_VERSION}-bin-hadoop3/sbin/start-connect-server.sh --packages $PACKAGES \ --master local[3] \ --conf spark.driver.bindAddress=0.0.0.0 \ - --conf spark.sql.catalog.demo=org.apache.iceberg.spark.SparkCatalog \ --conf spark.sql.extensions=org.apache.iceberg.spark.extensions.IcebergSparkSessionExtensions \ + --conf spark.sql.catalog.demo=org.apache.iceberg.spark.SparkCatalog \ --conf spark.sql.catalog.demo.type=hadoop \ --conf spark.sql.catalog.demo.warehouse=s3a://icebergdata/demo \ --conf spark.sql.catalog.demo.hadoop.fs.s3a.endpoint=http://127.0.0.1:9301 \ + --conf spark.sql.catalog.demo.hadoop.fs.s3a.path.style.access=true \ --conf spark.sql.catalog.demo.hadoop.fs.s3a.access.key=hummockadmin \ --conf spark.sql.catalog.demo.hadoop.fs.s3a.secret.key=hummockadmin \ --conf spark.sql.defaultCatalog=demo diff --git a/e2e_test/iceberg/test_case/cdc/load.slt b/e2e_test/iceberg/test_case/cdc/load.slt index 8fa14471ceea..12abdd283397 100644 --- a/e2e_test/iceberg/test_case/cdc/load.slt +++ b/e2e_test/iceberg/test_case/cdc/load.slt @@ -7,8 +7,8 @@ create source mysql_mydb with ( port = '3306', username = 'root', password = '123456', - database.name = 'my@db', - server.id = '2' + database.name = 'mydb', + server.id = '5085' ); statement ok @@ -16,7 +16,7 @@ create table products ( id INT, name STRING, description STRING, PRIMARY KEY (id) -) FROM mysql_mydb TABLE 'my@db.products'; +) FROM mysql_mydb TABLE 'mydb.products'; statement ok @@ -35,15 +35,9 @@ CREATE SINK s1 AS select * from products WITH ( primary_key = 'id' ); -statement ok -flush; - sleep 20s query I select count(*) from products; ---- 8 - -statement ok -flush; diff --git a/e2e_test/iceberg/test_case/cdc/mysql_cdc.sql b/e2e_test/iceberg/test_case/cdc/mysql_cdc.sql index b7b6f13af83c..f95c6c2c8d4a 100644 --- a/e2e_test/iceberg/test_case/cdc/mysql_cdc.sql +++ b/e2e_test/iceberg/test_case/cdc/mysql_cdc.sql @@ -1,7 +1,7 @@ -DROP DATABASE IF EXISTS `my@db`; -CREATE DATABASE `my@db`; +DROP DATABASE IF EXISTS `mydb`; +CREATE DATABASE `mydb`; -USE `my@db`; +USE `mydb`; CREATE TABLE products ( id INTEGER NOT NULL AUTO_INCREMENT PRIMARY KEY, diff --git a/e2e_test/iceberg/test_case/cdc/mysql_cdc_insert.sql b/e2e_test/iceberg/test_case/cdc/mysql_cdc_insert.sql index 641d6220ea8d..c7dc50316d3c 100644 --- a/e2e_test/iceberg/test_case/cdc/mysql_cdc_insert.sql +++ b/e2e_test/iceberg/test_case/cdc/mysql_cdc_insert.sql @@ -1,4 +1,4 @@ -USE `my@db`; +USE `mydb`; INSERT INTO products VALUES (default,"109","109"), (default,"110","110"), diff --git a/e2e_test/iceberg/test_case/iceberg_sink_append_only.slt b/e2e_test/iceberg/test_case/iceberg_sink_append_only.slt index dff673705736..f3156a9b40ca 100644 --- a/e2e_test/iceberg/test_case/iceberg_sink_append_only.slt +++ b/e2e_test/iceberg/test_case/iceberg_sink_append_only.slt @@ -43,6 +43,8 @@ INSERT INTO t6 VALUES statement ok FLUSH; +sleep 5s + statement ok INSERT INTO t6 VALUES (5, 5, 5000, 5.5, 5.55, '5-5', true, '2022-03-15', '2022-03-15 05:00:00Z'::timestamptz, '2022-03-15 05:00:00'); @@ -50,6 +52,8 @@ INSERT INTO t6 VALUES statement ok FLUSH; +sleep 5s + statement ok DROP SINK s6; diff --git a/e2e_test/iceberg/test_case/iceberg_sink_upsert.slt b/e2e_test/iceberg/test_case/iceberg_sink_upsert.slt index 2e8ce54e1c74..f867f0d74645 100644 --- a/e2e_test/iceberg/test_case/iceberg_sink_upsert.slt +++ b/e2e_test/iceberg/test_case/iceberg_sink_upsert.slt @@ -29,12 +29,26 @@ INSERT INTO t6 VALUES (1, 1, 2, '1-2'), (1, 2, 2, '2-2'), (1, 3, 2, '3-2'), (1, statement ok FLUSH; +sleep 5s + statement ok INSERT INTO t6 VALUES (1, 1, 50, '1-50'); statement ok FLUSH; +sleep 10s + +query I +select count(*) from t6; +---- +7 + +statement ok +FLUSH; + +sleep 10s + statement ok DROP SINK s6; diff --git a/e2e_test/sink/iceberg_sink.slt b/e2e_test/sink/iceberg_sink.slt index dbc3163b7058..4935032e8828 100644 --- a/e2e_test/sink/iceberg_sink.slt +++ b/e2e_test/sink/iceberg_sink.slt @@ -25,12 +25,16 @@ INSERT INTO t6 VALUES (1, 2, '1-2'), (2, 2, '2-2'), (3, 2, '3-2'), (5, 2, '5-2') statement ok FLUSH; +sleep 5s + statement ok INSERT INTO t6 VALUES (1, 50, '1-50'); statement ok FLUSH; +sleep 5s + statement ok DROP SINK s6; diff --git a/e2e_test/sink/kafka/avro.slt b/e2e_test/sink/kafka/avro.slt index 45ecf302f0dd..3b1685c704e2 100644 --- a/e2e_test/sink/kafka/avro.slt +++ b/e2e_test/sink/kafka/avro.slt @@ -1,5 +1,5 @@ statement ok -create table from_kafka ( primary key (some_key) ) +create table from_kafka ( *, gen_i32_field int as int32_field + 2, primary key (some_key) ) include key as some_key with ( connector = 'kafka', @@ -52,6 +52,7 @@ select float_field, double_field, int32_field, + gen_i32_field, int64_field, record_field, array_field, @@ -61,8 +62,8 @@ select time_micros_field, time_millis_field from from_kafka order by string_field; ---- -t Rising \x6130 3.5 4.25 22 23 NULL {{NULL,3},NULL,{7,NULL,2}} 2006-01-02 22:04:05+00:00 NULL NULL 12:34:56.123456 NULL -f Wave \x5a4446 1.5 NULL 11 12 (,foo) NULL NULL 2006-01-02 22:04:05+00:00 2021-04-01 NULL 23:45:16.654 +t Rising \x6130 3.5 4.25 22 24 23 NULL {{NULL,3},NULL,{7,NULL,2}} 2006-01-02 22:04:05+00:00 NULL NULL 12:34:56.123456 NULL +f Wave \x5a4446 1.5 NULL 11 13 12 (,foo) NULL NULL 2006-01-02 22:04:05+00:00 2021-04-01 NULL 23:45:16.654 statement error SchemaFetchError create sink sink_err from into_kafka with ( diff --git a/e2e_test/udf/js_udf.slt b/e2e_test/udf/js_udf.slt new file mode 100644 index 000000000000..260fd991f648 --- /dev/null +++ b/e2e_test/udf/js_udf.slt @@ -0,0 +1,154 @@ +statement ok +create function int_42() returns int language javascript as $$ + return 42; +$$; + +query I +select int_42(); +---- +42 + +statement ok +drop function int_42; + + +statement ok +create function gcd(a int, b int) returns int language javascript as $$ + // required before we support `RETURNS NULL ON NULL INPUT` + if(a == null || b == null) { + return null; + } + while (b != 0) { + let t = b; + b = a % b; + a = t; + } + return a; +$$; + +query I +select gcd(25, 15); +---- +5 + +statement ok +drop function gcd; + + +statement ok +create function decimal_add(a decimal, b decimal) returns decimal language javascript as $$ + return a + b; +$$; + +query R +select decimal_add(1.11, 2.22); +---- +3.33 + +statement ok +drop function decimal_add; + + +statement ok +create function to_string(a boolean, b smallint, c int, d bigint, e real, f float, g decimal, h varchar, i bytea, j jsonb) returns varchar language javascript as $$ + return a.toString() + b.toString() + c.toString() + d.toString() + e.toString() + f.toString() + g.toString() + h.toString() + i.toString() + JSON.stringify(j); +$$; + +query T +select to_string(false, 1::smallint, 2, 3, 4.5, 6.7, 8.9, 'abc', '\x010203', '{"key": 1}'); +---- +false1234.56.78.9abc1,2,3{"key":1} + +statement ok +drop function to_string; + + +# show data types in javascript +statement ok +create function js_typeof(a boolean, b smallint, c int, d bigint, e real, f float, g decimal, h varchar, i bytea, j jsonb) returns jsonb language javascript as $$ + return { + boolean: typeof a, + smallint: typeof b, + int: typeof c, + bigint: typeof d, + real: typeof e, + float: typeof f, + decimal: typeof g, + varchar: typeof h, + bytea: typeof i, + jsonb: typeof j, + }; +$$; + +query T +select js_typeof(false, 1::smallint, 2, 3, 4.5, 6.7, 8.9, 'abc', '\x010203', '{"key": 1}'); +---- +{"bigint": "number", "boolean": "boolean", "bytea": "object", "decimal": "bigdecimal", "float": "number", "int": "number", "jsonb": "object", "real": "number", "smallint": "number", "varchar": "string"} + +statement ok +drop function js_typeof; + + +statement ok +create function return_all(a boolean, b smallint, c int, d bigint, e real, f float, g decimal, h varchar, i bytea, j jsonb, s struct) +returns struct> +language javascript as $$ + return {a,b,c,d,e,f,g,h,i,j,s}; +$$; + +query T +select (return_all( + true, + 1 ::smallint, + 1, + 1, + 1, + 1, + 12345678901234567890.12345678, + 'string', + 'bytes', + '{"key":1}', + row(1, 2)::struct +)).*; +---- +t 1 1 1 1 1 12345678901234567890.12345678 string \x6279746573 {"key": 1} (1,2) + +statement ok +drop function return_all; + + +statement ok +create function series(n int) returns table (x int) language javascript as $$ + for(let i = 0; i < n; i++) { + yield i; + } +$$; + +query I +select series(5); +---- +0 +1 +2 +3 +4 + +statement ok +drop function series; + + +statement ok +create function split(s varchar) returns table (word varchar, length int) language javascript as $$ + for(let word of s.split(' ')) { + yield { word: word, length: word.length }; + } +$$; + +query IT +select * from split('rising wave'); +---- +rising 6 +wave 4 + +statement ok +drop function split; diff --git a/e2e_test/udf/wasm/Cargo.toml b/e2e_test/udf/wasm/Cargo.toml index 5e413a40e37b..79d911279d63 100644 --- a/e2e_test/udf/wasm/Cargo.toml +++ b/e2e_test/udf/wasm/Cargo.toml @@ -8,7 +8,7 @@ edition = "2021" crate-type = ["cdylib"] [dependencies] -arrow-udf = { git = "https://github.com/risingwavelabs/arrow-udf.git", rev = "f9a9e0d" } +arrow-udf = "0.1" genawaiter = "0.99" rust_decimal = "1" serde_json = "1" diff --git a/integration_tests/cassandra-and-scylladb-sink/create_sink.sql b/integration_tests/cassandra-and-scylladb-sink/create_sink.sql index f3d0982a9baf..a0a305aebd0e 100644 --- a/integration_tests/cassandra-and-scylladb-sink/create_sink.sql +++ b/integration_tests/cassandra-and-scylladb-sink/create_sink.sql @@ -21,3 +21,27 @@ FROM cassandra.table = 'demo_bhv_table', cassandra.datacenter = 'datacenter1', ); + +CREATE SINK cassandra_types_sink +FROM + cassandra_types WITH ( + connector = 'cassandra', + type = 'append-only', + force_append_only='true', + cassandra.url = 'cassandra:9042', + cassandra.keyspace = 'demo', + cassandra.table = 'cassandra_types', + cassandra.datacenter = 'datacenter1', +); + +CREATE SINK scylladb_types_sink +FROM + cassandra_types WITH ( + connector = 'cassandra', + type = 'append-only', + force_append_only='true', + cassandra.url = 'scylladb:9042', + cassandra.keyspace = 'demo', + cassandra.table = 'cassandra_types', + cassandra.datacenter = 'datacenter1', +); diff --git a/integration_tests/cassandra-and-scylladb-sink/create_source.sql b/integration_tests/cassandra-and-scylladb-sink/create_source.sql index 292c3265a11e..460e616aed35 100644 --- a/integration_tests/cassandra-and-scylladb-sink/create_source.sql +++ b/integration_tests/cassandra-and-scylladb-sink/create_source.sql @@ -16,3 +16,27 @@ CREATE table user_behaviors ( fields.user_name.length = '10', datagen.rows.per.second = '10' ) FORMAT PLAIN ENCODE JSON; + +CREATE TABLE cassandra_types ( + types_id int, + c_boolean boolean, + c_smallint smallint, + c_integer integer, + c_bigint bigint, + c_decimal decimal, + c_real real, + c_double_precision double precision, + c_varchar varchar, + c_bytea bytea, + c_date date, + c_time time, + c_timestamptz timestamptz, + c_interval interval, + PRIMARY KEY (types_id) +); + +INSERT INTO cassandra_types VALUES (1, False, 0, 0, 0, 0, 0, 0, '', '\x00', '0001-01-01', '00:00:00.123456', '0001-01-01 00:00:00.123456'::timestamptz, '0 second'); + +INSERT INTO cassandra_types VALUES (2, False, -32767, -2147483647, -9223372036854775807, -10.0, -9999.999999, -10000.0, 'aa', '\xff', '1970-01-01', '00:00:00', '1970-01-01 00:00:00Z', '4 hour'); + +INSERT INTO cassandra_types VALUES (3, True, 32767, 2147483647, 9223372036854775807, -10.0, 9999.999999, 10000.0, '', '\', '9999-12-31', '23:59:59.999999', '9999-12-31 23:59:59.999999Z', '9990 year'); diff --git a/integration_tests/cassandra-and-scylladb-sink/prepare.sh b/integration_tests/cassandra-and-scylladb-sink/prepare.sh index 690537d87820..adba2a7cfba6 100755 --- a/integration_tests/cassandra-and-scylladb-sink/prepare.sh +++ b/integration_tests/cassandra-and-scylladb-sink/prepare.sh @@ -3,7 +3,7 @@ set -euo pipefail # wait for cassandra and scylladb to start up -sleep 60 +sleep 30 # setup cassandra docker compose exec cassandra cqlsh -f prepare_cassandra_and_scylladb.sql diff --git a/integration_tests/cassandra-and-scylladb-sink/prepare_cassandra_and_scylladb.sql b/integration_tests/cassandra-and-scylladb-sink/prepare_cassandra_and_scylladb.sql index 1c221771c2e4..b4906cd2dc58 100644 --- a/integration_tests/cassandra-and-scylladb-sink/prepare_cassandra_and_scylladb.sql +++ b/integration_tests/cassandra-and-scylladb-sink/prepare_cassandra_and_scylladb.sql @@ -5,3 +5,20 @@ CREATE table demo_bhv_table( target_id text, event_timestamp timestamp, ); + +CREATE table cassandra_types ( + types_id int primary key, + c_boolean boolean, + c_smallint smallint, + c_integer int, + c_bigint bigint, + c_decimal decimal, + c_real float, + c_double_precision double, + c_varchar text, + c_bytea blob, + c_date date, + c_time time, + c_timestamptz timestamp, + c_interval duration +); diff --git a/integration_tests/cassandra-and-scylladb-sink/sink_check.py b/integration_tests/cassandra-and-scylladb-sink/sink_check.py index 2087e002d9f4..d516d3a7c2de 100644 --- a/integration_tests/cassandra-and-scylladb-sink/sink_check.py +++ b/integration_tests/cassandra-and-scylladb-sink/sink_check.py @@ -4,7 +4,7 @@ sleep(30) -relations = ['demo.demo_bhv_table'] +relations = ['demo.demo_bhv_table', 'demo.cassandra_types'] dbs = ['cassandra', 'scylladb'] failed_cases = [] diff --git a/integration_tests/datagen/go.mod b/integration_tests/datagen/go.mod index 9a77c3ae0056..89299416c084 100644 --- a/integration_tests/datagen/go.mod +++ b/integration_tests/datagen/go.mod @@ -28,7 +28,7 @@ require ( github.com/cpuguy83/go-md2man/v2 v2.0.2 // indirect github.com/danieljoos/wincred v1.0.2 // indirect github.com/davecgh/go-spew v1.1.1 // indirect - github.com/dvsekhvalnov/jose2go v0.0.0-20200901110807-248326c1351b // indirect + github.com/dvsekhvalnov/jose2go v1.6.0 // indirect github.com/eapache/go-resiliency v1.3.0 // indirect github.com/eapache/go-xerial-snappy v0.0.0-20180814174437-776d5712da21 // indirect github.com/eapache/queue v1.1.0 // indirect diff --git a/integration_tests/datagen/go.sum b/integration_tests/datagen/go.sum index dfdff515ee01..0a9025ec0a9d 100644 --- a/integration_tests/datagen/go.sum +++ b/integration_tests/datagen/go.sum @@ -103,8 +103,9 @@ github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= github.com/dimfeld/httptreemux v5.0.1+incompatible h1:Qj3gVcDNoOthBAqftuD596rm4wg/adLLz5xh5CmpiCA= github.com/dimfeld/httptreemux v5.0.1+incompatible/go.mod h1:rbUlSV+CCpv/SuqUTP/8Bk2O3LyUV436/yaRGkhP6Z0= -github.com/dvsekhvalnov/jose2go v0.0.0-20200901110807-248326c1351b h1:HBah4D48ypg3J7Np4N+HY/ZR76fx3HEUGxDU6Uk39oQ= github.com/dvsekhvalnov/jose2go v0.0.0-20200901110807-248326c1351b/go.mod h1:7BvyPhdbLxMXIYTFPLsyJRFMsKmOZnQmzh6Gb+uquuM= +github.com/dvsekhvalnov/jose2go v1.6.0 h1:Y9gnSnP4qEI0+/uQkHvFXeD2PLPJeXEL+ySMEA2EjTY= +github.com/dvsekhvalnov/jose2go v1.6.0/go.mod h1:QsHjhyTlD/lAVqn/NSbVZmSCGeDehTB/mPZadG+mhXU= github.com/eapache/go-resiliency v1.3.0 h1:RRL0nge+cWGlxXbUzJ7yMcq6w2XBEr19dCN6HECGaT0= github.com/eapache/go-resiliency v1.3.0/go.mod h1:5yPzW0MIvSe0JDsv0v+DvcjEv2FyD6iZYSs1ZI+iQho= github.com/eapache/go-xerial-snappy v0.0.0-20180814174437-776d5712da21 h1:YEetp8/yCZMuEPMUDHG0CW/brkkEp8mzqk2+ODEitlw= @@ -325,6 +326,7 @@ github.com/mtibben/percent v0.2.1 h1:5gssi8Nqo8QU/r2pynCm+hBQHpkB/uNK7BJCFogWdzs github.com/mtibben/percent v0.2.1/go.mod h1:KG9uO+SZkUp+VkRHsCdYQV3XSZrrSpR3O9ibNBTZrns= github.com/mwitkow/go-conntrack v0.0.0-20161129095857-cc309e4a2223/go.mod h1:qRWi+5nqEBWmkhHvq77mSJWrCKwh8bxhgT7d/eI7P4U= github.com/mwitkow/go-conntrack v0.0.0-20190716064945-2f068394615f/go.mod h1:qRWi+5nqEBWmkhHvq77mSJWrCKwh8bxhgT7d/eI7P4U= +github.com/niemeyer/pretty v0.0.0-20200227124842-a10e7caefd8e/go.mod h1:zD1mROLANZcx1PVRCS0qkT7pwLkGfwJo4zjcN/Tysno= github.com/nxadm/tail v1.4.4 h1:DQuhQpB1tVlglWS2hLQ5OV6B5r8aGxSrPc5Qo6uTN78= github.com/nxadm/tail v1.4.4/go.mod h1:kenIhsEOeOJmVchQTgglprH7qJGnHDVpk1VPCcaMI8A= github.com/onsi/ginkgo v1.6.0/go.mod h1:lLunBs/Ym6LB5Z9jYTR76FiuTmxDTDusOGeTQH+WWjE= @@ -804,6 +806,7 @@ gopkg.in/alecthomas/kingpin.v2 v2.2.6/go.mod h1:FMv+mEhP44yOT+4EoQTLFTRgOQ1FBLks gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= gopkg.in/check.v1 v1.0.0-20180628173108-788fd7840127/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= gopkg.in/check.v1 v1.0.0-20190902080502-41f04d3bba15/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= +gopkg.in/check.v1 v1.0.0-20200902074654-038fdea0a05b/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c h1:Hei/4ADfdWqJk1ZMxUNpqntNwaWcugrBjAiHlqqRiVk= gopkg.in/errgo.v2 v2.1.0/go.mod h1:hNsd1EY+bozCKY1Ytp96fpM3vjJbqLJn88ws8XvfDNI= gopkg.in/fsnotify.v1 v1.4.7/go.mod h1:Tz8NjZHkW78fSQdbUxIjBTcgA1z1m8ZHf0WmKUhAMys= diff --git a/integration_tests/feature-store/server/Cargo.lock b/integration_tests/feature-store/server/Cargo.lock index 12cd2809d8ba..20000f71fef2 100644 --- a/integration_tests/feature-store/server/Cargo.lock +++ b/integration_tests/feature-store/server/Cargo.lock @@ -650,9 +650,9 @@ checksum = "6fb8d784f27acf97159b40fc4db5ecd8aa23b9ad5ef69cdd136d3bc80665f0c0" [[package]] name = "h2" -version = "0.3.21" +version = "0.3.24" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "91fc23aa11be92976ef4729127f1a74adf36d8436f7816b185d18df956790833" +checksum = "bb2c4422095b67ee78da96fbb51a4cc413b3b25883c7717ff7ca1ab31022c9c9" dependencies = [ "bytes", "fnv", @@ -660,7 +660,7 @@ dependencies = [ "futures-sink", "futures-util", "http", - "indexmap 1.9.3", + "indexmap 2.0.0", "slab", "tokio", "tokio-util", diff --git a/integration_tests/feature-store/simulator/Cargo.lock b/integration_tests/feature-store/simulator/Cargo.lock index 30c4511e8f23..2966900cd2da 100644 --- a/integration_tests/feature-store/simulator/Cargo.lock +++ b/integration_tests/feature-store/simulator/Cargo.lock @@ -300,6 +300,12 @@ dependencies = [ "cfg-if", ] +[[package]] +name = "equivalent" +version = "1.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5443807d6dff69373d433ab9ef5378ad8df50ca6298caf15de6e52e24aaf54d5" + [[package]] name = "errno" version = "0.3.5" @@ -454,9 +460,9 @@ checksum = "6fb8d784f27acf97159b40fc4db5ecd8aa23b9ad5ef69cdd136d3bc80665f0c0" [[package]] name = "h2" -version = "0.3.21" +version = "0.3.24" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "91fc23aa11be92976ef4729127f1a74adf36d8436f7816b185d18df956790833" +checksum = "bb2c4422095b67ee78da96fbb51a4cc413b3b25883c7717ff7ca1ab31022c9c9" dependencies = [ "bytes", "fnv", @@ -464,7 +470,7 @@ dependencies = [ "futures-sink", "futures-util", "http", - "indexmap", + "indexmap 2.1.0", "slab", "tokio", "tokio-util", @@ -477,6 +483,12 @@ version = "0.12.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8a9ee70c43aaf417c914396645a0fa852624801b24ebb7ae78fe8272889ac888" +[[package]] +name = "hashbrown" +version = "0.14.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "290f1a1d9242c78d09ce40a5e87e7554ee637af1351968159f4952f028f75604" + [[package]] name = "hermit-abi" version = "0.3.3" @@ -583,7 +595,17 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "bd070e393353796e801d209ad339e89596eb4c8d430d18ede6a1cced8fafbd99" dependencies = [ "autocfg", - "hashbrown", + "hashbrown 0.12.3", +] + +[[package]] +name = "indexmap" +version = "2.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d530e1a18b1cb4c484e6e34556a0d948706958449fca0cab753d649f2bce3d1f" +dependencies = [ + "equivalent", + "hashbrown 0.14.3", ] [[package]] @@ -1265,7 +1287,7 @@ checksum = "b8fa9be0de6cf49e536ce1851f987bd21a43b771b09473c3549a6c853db37c1c" dependencies = [ "futures-core", "futures-util", - "indexmap", + "indexmap 1.9.3", "pin-project", "pin-project-lite", "rand", diff --git a/integration_tests/mysql-sink/create_sink.sql b/integration_tests/mysql-sink/create_sink.sql index bfe9bf6c0b70..9776360df291 100644 --- a/integration_tests/mysql-sink/create_sink.sql +++ b/integration_tests/mysql-sink/create_sink.sql @@ -17,3 +17,14 @@ FROM type = 'upsert', primary_key = 'id' ); + +CREATE SINK mysql_data_all_types_sink +FROM + mysql_all_types WITH ( + connector = 'jdbc', + jdbc.url = 'jdbc:mysql://mysql:3306/mydb?user=root&password=123456', + table.name = 'mysql_all_types', + type='append-only', + force_append_only = 'true', + primary_key = 'id' + ); diff --git a/integration_tests/mysql-sink/create_source.sql b/integration_tests/mysql-sink/create_source.sql index f049457aa312..787d902ca3c9 100644 --- a/integration_tests/mysql-sink/create_source.sql +++ b/integration_tests/mysql-sink/create_source.sql @@ -40,3 +40,40 @@ VALUES (3, 'Varchar value 3', 'Text value 3', 345, 678, 901, 34.56, 78.90, 12.34, TRUE, '2023-05-24', '12:34:56', '2023-05-24 12:34:56', '2023-05-24T12:34:56Z', '{"key": "value3"}', E'\\xCAFEBABE'), (4, 'Varchar value 4', 'Text value 4', 456, 789, 012, 45.67, 89.01, 23.45, FALSE, '2023-05-25', '23:45:01', '2023-05-25 23:45:01', '2023-05-25T23:45:01Z', '{"key": "value4"}', E'\\xBABEC0DE'), (5, 'Varchar value 5', 'Text value 5', 567, 890, 123, 56.78, 90.12, 34.56, TRUE, '2023-05-26', '12:34:56', '2023-05-26 12:34:56', '2023-05-26T12:34:56Z', '{"key": "value5"}', E'\\xDEADBABE'); + + +CREATE TABLE mysql_all_types( + id integer PRIMARY KEY, + c_boolean boolean, + c_tinyint smallint, + c_smallint smallint, + c_mediumint integer, + c_integer integer, + c_bigint bigint, + c_decimal decimal, + c_float real, + c_double double, + c_char_255 varchar, + c_varchar_10000 varchar, + c_text varchar, + c_blob bytea, + c_binary_255 bytea, + c_varbinary_10000 bytea, + c_date date, + c_time time, + c_datetime timestamp, + c_timestamp timestamptz, + c_json JSONB, + c_smallint_array smallint[], + c_integer_array integer[], + c_bigint_array bigint[], + c_real_array real[], + c_double_precision_array double precision[], + c_varchar_array varchar[], +); + +INSERT INTO mysql_all_types VALUES (1, False, 0, 0, 0, 0, 0, 0, 0, 0, '', '', '', '\x00', '', '', '1001-01-01', '00:00:00.000000', '1000-01-01 00:00:00.000000', '1970-01-01 00:00:01.000000Z', '{}', array[]::smallint[], array[]::integer[], array[]::bigint[], array[]::real[], array[]::double precision[], array[]::varchar[]); + +INSERT INTO mysql_all_types VALUES (2, False, -128, -32767, -8388608, -2147483647, -9223372036854775807, -10.0, -9999.999999, -10000.0, 'a', 'b', 'c', '\x00', E'\\xFFABCE', '', '1970-01-01', '12:59:59.123456', '1000-01-01 00:00:00.000000', '2024-01-01 00:00:00.123456Z', '{"a": 1, "b":"c"}', array[-32767::smallint, 0]::smallint[], array[-2147483647::integer]::integer[], array[-9223372036854775807::bigint]::bigint[], array[-9999.999999::real]::real[], array[-10000.0::double precision]::double precision[], array['aaa'::varchar]::varchar[]); + +INSERT INTO mysql_all_types VALUES (4, True, 127, 32767, 8388607, 2147483647, 9223372036854775807, -10.0, 9999.999999, 10000.0, 'zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz', '', '', '\', '\xffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff', '\', '9999-12-31', '23:59:59.999999', '9999-12-31 23:59:59.499999', '2038-01-19 03:14:07.499999Z', '{"a": 1}', array[32767::smallint]::smallint[], array[2147483647::integer]::integer[], array[9223372036854775807::bigint]::bigint[], array[9999.999999::real]::real[], array[10000.0::double precision]::double precision[], array[''::varchar]::varchar[]); diff --git a/integration_tests/mysql-sink/mysql_prepare.sql b/integration_tests/mysql-sink/mysql_prepare.sql index d06e16e7829f..a93a9de5a0e2 100644 --- a/integration_tests/mysql-sink/mysql_prepare.sql +++ b/integration_tests/mysql-sink/mysql_prepare.sql @@ -23,3 +23,33 @@ CREATE TABLE data_types ( jsonb_column JSON, bytea_column BLOB ); + +CREATE TABLE mysql_all_types ( + id integer PRIMARY KEY, + c_boolean boolean, + c_tinyint tinyint, + c_smallint smallint, + c_mediumint mediumint, + c_integer integer, + c_bigint bigint, + c_decimal decimal, + c_float float, + c_double double, + c_char_255 char(255), + c_varchar_10000 varchar(10000), + c_text text, + c_blob BLOB, + c_binary_255 binary(255), + c_varbinary_10000 varbinary(10000), + c_date date, + c_time time(6), + c_datetime datetime(6), + c_timestamp timestamp(6), + c_json JSON, + c_smallint_array LONGTEXT, + c_integer_array LONGTEXT, + c_bigint_array LONGTEXT, + c_real_array LONGTEXT, + c_double_precision_array LONGTEXT, + c_varchar_array LONGTEXT +); diff --git a/integration_tests/mysql-sink/prepare.sh b/integration_tests/mysql-sink/prepare.sh index 9f2e93d1b40a..3374386d6aa9 100755 --- a/integration_tests/mysql-sink/prepare.sh +++ b/integration_tests/mysql-sink/prepare.sh @@ -2,7 +2,5 @@ set -euo pipefail -sleep 10 - # setup mysql docker compose exec mysql bash -c "mysql -p123456 -h mysql mydb < mysql_prepare.sql" diff --git a/integration_tests/mysql-sink/sink_check.py b/integration_tests/mysql-sink/sink_check.py index b7cf590c38d0..18d0d92a0f21 100644 --- a/integration_tests/mysql-sink/sink_check.py +++ b/integration_tests/mysql-sink/sink_check.py @@ -1,7 +1,7 @@ import subprocess import sys -relations = ['target_count', 'data_types'] +relations = ['target_count', 'data_types', 'mysql_all_types'] failed_cases = [] for rel in relations: diff --git a/java/connector-node/risingwave-connector-test/src/test/java/com/risingwave/connector/sink/jdbc/JDBCSinkTest.java b/java/connector-node/risingwave-connector-test/src/test/java/com/risingwave/connector/sink/jdbc/JDBCSinkTest.java index a0d6e16532a6..12ac3ea8b7f8 100644 --- a/java/connector-node/risingwave-connector-test/src/test/java/com/risingwave/connector/sink/jdbc/JDBCSinkTest.java +++ b/java/connector-node/risingwave-connector-test/src/test/java/com/risingwave/connector/sink/jdbc/JDBCSinkTest.java @@ -282,7 +282,7 @@ public void testPostgres() throws SQLException { @Test public void testMySQL() throws SQLException { MySQLContainer mysql = - new MySQLContainer<>("mysql:8") + new MySQLContainer<>("mysql:8.0") .withDatabaseName("test") .withUsername("postgres") .withPassword("password") diff --git a/lints/Cargo.lock b/lints/Cargo.lock index daac2a9301dd..5cd984d3cac4 100644 --- a/lints/Cargo.lock +++ b/lints/Cargo.lock @@ -111,16 +111,25 @@ version = "1.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" +[[package]] +name = "clippy_config" +version = "0.1.77" +source = "git+https://github.com/rust-lang/rust-clippy?rev=6fd0258e45105161b7e759a22e7350958e5cb0b1#6fd0258e45105161b7e759a22e7350958e5cb0b1" +dependencies = [ + "rustc-semver", + "serde", + "toml 0.7.8", +] + [[package]] name = "clippy_utils" -version = "0.1.75" -source = "git+https://github.com/rust-lang/rust-clippy?rev=a585cda701581a16894858dc088eacd5a02fc78b#a585cda701581a16894858dc088eacd5a02fc78b" +version = "0.1.77" +source = "git+https://github.com/rust-lang/rust-clippy?rev=6fd0258e45105161b7e759a22e7350958e5cb0b1#6fd0258e45105161b7e759a22e7350958e5cb0b1" dependencies = [ "arrayvec", - "if_chain", - "itertools 0.10.5", + "clippy_config", + "itertools 0.11.0", "rustc-semver", - "serde", ] [[package]] @@ -224,9 +233,9 @@ dependencies = [ [[package]] name = "dylint" -version = "2.5.0" +version = "2.6.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "683b9feb84cd2620b4b75119b7e914ac132dbd9e523f9a98821f3b3a7e355053" +checksum = "71fdb7b800ab13925402f0048ed0911068db2e5ba6168dd93962269d4f39541d" dependencies = [ "ansi_term", "anyhow", @@ -245,9 +254,9 @@ dependencies = [ [[package]] name = "dylint_internal" -version = "2.5.0" +version = "2.6.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4ede982d9261f23a19b92ed7dc4ddeefc8328fc21c88e2c79ffd6e071c7972be" +checksum = "5154dada2bee2a69f75f54eae57479f56f93ca1db80725a1d82cdb5fe231ef73" dependencies = [ "ansi_term", "anyhow", @@ -257,15 +266,16 @@ dependencies = [ "if_chain", "is-terminal", "log", + "once_cell", "rust-embed", "sedregex", ] [[package]] name = "dylint_linting" -version = "2.5.0" +version = "2.6.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ee7f2f02100bafd2f02c5fcc4ca981adca28397485bf3393ad0a1e8e46ec583b" +checksum = "d203baeb8770847314632f652e0e62dd7fec6a21102a116472eec0d6931f5dd9" dependencies = [ "cargo_metadata", "dylint_internal", @@ -273,14 +283,14 @@ dependencies = [ "rustversion", "serde", "thiserror", - "toml", + "toml 0.8.8", ] [[package]] name = "dylint_testing" -version = "2.5.0" +version = "2.6.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "964c9965a990e1ab44a862cb9823772603fe4d82b777b1190fe2d4d62ed8a885" +checksum = "1208b1f2c40fc2f3c3fa0d5631efbc7a95721d619410dc2da5b0496810d6a941" dependencies = [ "anyhow", "cargo_metadata", @@ -388,11 +398,11 @@ dependencies = [ [[package]] name = "git2" -version = "0.17.2" +version = "0.18.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7b989d6a7ca95a362cf2cfc5ad688b3a467be1f87e480b8dad07fee8c79b0044" +checksum = "fbf97ba92db08df386e10c8ede66a2a0369bd277090afd8710e19e38de9ec0cd" dependencies = [ - "bitflags 1.3.2", + "bitflags 2.4.1", "libc", "libgit2-sys", "log", @@ -480,9 +490,9 @@ dependencies = [ [[package]] name = "itertools" -version = "0.10.5" +version = "0.11.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b0fd2260e829bddf4cb6ea802289de2f86d6a7a690192fbe91b3f46e0f2c8473" +checksum = "b1c173a5686ce8bfa551b3563d0c2170bf24ca44da99c7ca4bfdab5418c3fe57" dependencies = [ "either", ] @@ -519,15 +529,15 @@ checksum = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646" [[package]] name = "libc" -version = "0.2.150" +version = "0.2.152" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "89d92a4743f9a61002fae18374ed11e7973f530cb3a3255fb354818118b2203c" +checksum = "13e3bf6590cbc649f4d1a3eefc9d5d6eb746f5200ffb04e5e142700b8faa56e7" [[package]] name = "libgit2-sys" -version = "0.15.2+1.6.4" +version = "0.16.1+1.7.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a80df2e11fb4a61f4ba2ab42dbe7f74468da143f1a75c74e11dee7c813f694fa" +checksum = "f2a2bb3680b094add03bb3732ec520ece34da31a8cd2d633d1389d0f0fb60d0c" dependencies = [ "cc", "libc", @@ -587,9 +597,9 @@ dependencies = [ [[package]] name = "linux-raw-sys" -version = "0.4.11" +version = "0.4.13" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "969488b55f8ac402214f3f5fd243ebb7206cf82de60d3172994707a4bcc2b829" +checksum = "01cda141df6706de531b6c46c3a33ecca755538219bd484262fa09410c13539c" [[package]] name = "log" @@ -624,9 +634,9 @@ dependencies = [ [[package]] name = "once_cell" -version = "1.18.0" +version = "1.19.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dd8b5dd2ae5ed71462c540258bedcb51965123ad7e7ccf4b9a8cafaa4a63576d" +checksum = "3fdb12b2476b595f9358c5161aa467c2438859caa136dec86c26fdd2efe17b92" [[package]] name = "openssl-probe" @@ -754,9 +764,9 @@ checksum = "c08c74e62047bb2de4ff487b251e4a92e24f48745648451635cec7d591162d9f" [[package]] name = "rust-embed" -version = "8.0.0" +version = "8.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b1e7d90385b59f0a6bf3d3b757f3ca4ece2048265d70db20a2016043d4509a40" +checksum = "a82c0bbc10308ed323529fd3c1dce8badda635aa319a5ff0e6466f33b8101e3f" dependencies = [ "rust-embed-impl", "rust-embed-utils", @@ -765,9 +775,9 @@ dependencies = [ [[package]] name = "rust-embed-impl" -version = "8.0.0" +version = "8.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3c3d8c6fd84090ae348e63a84336b112b5c3918b3bf0493a581f7bd8ee623c29" +checksum = "6227c01b1783cdfee1bcf844eb44594cd16ec71c35305bf1c9fb5aade2735e16" dependencies = [ "proc-macro2", "quote", @@ -778,9 +788,9 @@ dependencies = [ [[package]] name = "rust-embed-utils" -version = "8.0.0" +version = "8.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "873feff8cb7bf86fdf0a71bb21c95159f4e4a37dd7a4bd1855a940909b583ada" +checksum = "8cb0a25bfbb2d4b4402179c2cf030387d9990857ce08a32592c6238db9fa8665" dependencies = [ "globset", "sha2", @@ -807,15 +817,15 @@ dependencies = [ [[package]] name = "rustix" -version = "0.38.25" +version = "0.38.30" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dc99bc2d4f1fed22595588a013687477aedf3cdcfb26558c559edb67b4d9b22e" +checksum = "322394588aaf33c24007e8bb3238ee3e4c5c09c084ab32bc73890b99ff326bca" dependencies = [ "bitflags 2.4.1", "errno", "libc", "linux-raw-sys", - "windows-sys 0.48.0", + "windows-sys 0.52.0", ] [[package]] @@ -921,15 +931,15 @@ dependencies = [ [[package]] name = "tempfile" -version = "3.8.1" +version = "3.9.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7ef1adac450ad7f4b3c28589471ade84f25f731a7a0fe30d71dfa9f60fd808e5" +checksum = "01ce4141aa927a6d1bd34a041795abd0db1cccba5d5f24b009f694bdf3a1f3fa" dependencies = [ "cfg-if", "fastrand", "redox_syscall 0.4.1", "rustix", - "windows-sys 0.48.0", + "windows-sys 0.52.0", ] [[package]] @@ -1000,6 +1010,18 @@ version = "0.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1f3ccbac311fea05f86f61904b462b55fb3df8837a366dfc601a0161d0532f20" +[[package]] +name = "toml" +version = "0.7.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dd79e69d3b627db300ff956027cc6c3798cef26d22526befdfcd12feeb6d2257" +dependencies = [ + "serde", + "serde_spanned", + "toml_datetime", + "toml_edit 0.19.15", +] + [[package]] name = "toml" version = "0.8.8" @@ -1009,7 +1031,7 @@ dependencies = [ "serde", "serde_spanned", "toml_datetime", - "toml_edit", + "toml_edit 0.21.0", ] [[package]] @@ -1021,6 +1043,19 @@ dependencies = [ "serde", ] +[[package]] +name = "toml_edit" +version = "0.19.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1b5bb770da30e5cbfde35a2d7b9b8a2c4b8ef89548a7a6aeab5c9a576e3e7421" +dependencies = [ + "indexmap", + "serde", + "serde_spanned", + "toml_datetime", + "winnow", +] + [[package]] name = "toml_edit" version = "0.21.0" diff --git a/lints/Cargo.toml b/lints/Cargo.toml index 9b0cbdcfc052..6b8f4bbc3d8f 100644 --- a/lints/Cargo.toml +++ b/lints/Cargo.toml @@ -11,13 +11,14 @@ crate-type = ["cdylib"] name = "format_error" path = "ui/format_error.rs" +# See `README.md` before bumping the version. [dependencies] -clippy_utils = { git = "https://github.com/rust-lang/rust-clippy", rev = "e4c626dd9a17a23270bf8e7158e59cf2b9c04840" } # should match the toolchain version (rustc -vV) -dylint_linting = "2.5.0" +clippy_utils = { git = "https://github.com/rust-lang/rust-clippy", rev = "6fd0258e45105161b7e759a22e7350958e5cb0b1" } +dylint_linting = "2.6.0" itertools = "0.12" [dev-dependencies] -dylint_testing = "2.5.0" +dylint_testing = "2.6.0" # UI test dependencies tracing = "0.1" diff --git a/lints/README.md b/lints/README.md index 7b73f84296ad..ba8a7aebb921 100644 --- a/lints/README.md +++ b/lints/README.md @@ -4,10 +4,18 @@ Custom lints for RisingWave to enforce code style and best practices, empowered See [cargo dylint](https://github.com/trailofbits/dylint) for more information. +## Install `cargo-dylint` + +```bash +cargo install dylint +``` + ## Run lints To run all lints, run `cargo dylint --all` in the root of the repository. +If you find there are some compile errors, try updating the `cargo-dylint` binary by installing it again. + ## Add new lints To add a new lint, add a new file in the `src` directory to declare the lint, then register it in `fn register_lints(..)` in `lib.rs`. @@ -19,3 +27,16 @@ To test a lint, create a new file in the `ui` directory and add it as an `exampl ## VS Code integration Duplicate `.vscode/settings.json.example` to `.vscode/settings.json` to enable rust-analyzer integration for developing lints. + +## Bump toolchain + +The version of the toolchain is specified in `rust-toolchain` file under current directory. +It does not have to be exactly the same as the one used to build RisingWave, but it should be close enough to avoid compile errors. + +The information below can be helpful in finding the appropriate version to bump to. + +- The toolchain used by the latest version of `cargo-dylint`: https://github.com/trailofbits/dylint/blob/master/internal/template/rust-toolchain +- The toolchain used by the latest version of `clippy`: https://github.com/rust-lang/rust-clippy/blob/master/rust-toolchain +- The hash of the latest commit in `rust-lang/rust-clippy` repo for the dependency `clippy-utils`. + +Run the lints after bumping the toolchain to verify it works. diff --git a/lints/rust-toolchain b/lints/rust-toolchain index 975abccbffd5..ea1f0e928e5c 100644 --- a/lints/rust-toolchain +++ b/lints/rust-toolchain @@ -1,3 +1,5 @@ +# See `README.md` before bumping the version. + [toolchain] -channel = "nightly-2023-12-26" # should be identical to the root one +channel = "nightly-2024-01-11" components = ["llvm-tools-preview", "rustc-dev"] diff --git a/proto/catalog.proto b/proto/catalog.proto index ec7c68a3802b..741a85d2d3aa 100644 --- a/proto/catalog.proto +++ b/proto/catalog.proto @@ -213,11 +213,12 @@ message Function { uint32 database_id = 3; string name = 4; uint32 owner = 9; + repeated string arg_names = 15; repeated data.DataType arg_types = 5; data.DataType return_type = 6; string language = 7; - string link = 8; - string identifier = 10; + optional string link = 8; + optional string identifier = 10; optional string body = 14; oneof kind { diff --git a/proto/expr.proto b/proto/expr.proto index 9c6dd8e59fbf..f62ee2936d11 100644 --- a/proto/expr.proto +++ b/proto/expr.proto @@ -471,21 +471,27 @@ message WindowFunction { message UserDefinedFunction { repeated ExprNode children = 1; string name = 2; + repeated string arg_names = 8; repeated data.DataType arg_types = 3; string language = 4; // For external UDF: the link to the external function service. // For WASM UDF: the link to the wasm binary file. - string link = 5; + optional string link = 5; // An unique identifier for the function. // For external UDF, it's the name of the function in the external function service. // For WASM UDF, it's the name of the function in the wasm binary file. - string identifier = 6; + // For JavaScript UDF, it's the name of the function. + optional string identifier = 6; + // For JavaScript UDF, it's the body of the function. + optional string body = 7; } // Additional information for user defined table functions. message UserDefinedTableFunction { + repeated string arg_names = 8; repeated data.DataType arg_types = 3; string language = 4; - string link = 5; - string identifier = 6; + optional string link = 5; + optional string identifier = 6; + optional string body = 7; } diff --git a/proto/hummock.proto b/proto/hummock.proto index 1ff11077645d..9b39022f7f73 100644 --- a/proto/hummock.proto +++ b/proto/hummock.proto @@ -186,6 +186,12 @@ message HummockVersionCheckpoint { map stale_objects = 2; } +message HummockVersionArchive { + HummockVersion version = 1; + // some version_deltas since version + repeated HummockVersionDelta version_deltas = 2; +} + // We will have two epoch after decouple message HummockSnapshot { // Epoch with checkpoint, we will read durable data with it. diff --git a/proto/stream_plan.proto b/proto/stream_plan.proto index e69a712c9e3d..246b689bd4c0 100644 --- a/proto/stream_plan.proto +++ b/proto/stream_plan.proto @@ -558,6 +558,9 @@ message StreamCdcScanNode { // The external table that will be backfilled for CDC. plan_common.ExternalTableDesc cdc_table_desc = 5; + + // The rate limit for the stream cdc scan node. + optional uint32 rate_limit = 6; } // BatchPlanNode is used for mv on mv snapshot read. diff --git a/src/batch/benches/utils/mod.rs b/src/batch/benches/utils/mod.rs index 4abd48ffc0ff..979675f2db13 100644 --- a/src/batch/benches/utils/mod.rs +++ b/src/batch/benches/utils/mod.rs @@ -76,11 +76,7 @@ pub fn create_input( chunk_num: usize, ) -> BoxedExecutor { let mut input = MockExecutor::new(Schema { - fields: input_types - .iter() - .map(Clone::clone) - .map(Field::unnamed) - .collect(), + fields: input_types.iter().cloned().map(Field::unnamed).collect(), }); for c in gen_data(chunk_size, chunk_num, input_types) { input.add(c); diff --git a/src/batch/src/executor/project_set.rs b/src/batch/src/executor/project_set.rs index 97ac9b9d88cf..b7291609be58 100644 --- a/src/batch/src/executor/project_set.rs +++ b/src/batch/src/executor/project_set.rs @@ -230,7 +230,7 @@ mod tests { let fields = &proj_executor.schema().fields; assert_eq!(fields[0].data_type, DataType::Int32); - let expected = vec![DataChunk::from_pretty( + let expected = [DataChunk::from_pretty( "I i i i 0 1 1 2 1 1 1 2 diff --git a/src/bench/Cargo.toml b/src/bench/Cargo.toml index 9dcc493117d9..86e88b53a614 100644 --- a/src/bench/Cargo.toml +++ b/src/bench/Cargo.toml @@ -34,6 +34,7 @@ risingwave_storage = { workspace = true } risingwave_stream = { workspace = true } serde = { version = "1", features = ["derive"] } serde_yaml = "0.9" +thiserror-ext = { workspace = true } tokio = { version = "0.2", package = "madsim-tokio", features = [ "fs", "rt", diff --git a/src/bench/sink_bench/main.rs b/src/bench/sink_bench/main.rs index 943c2dc26e1b..56e681cc3eac 100644 --- a/src/bench/sink_bench/main.rs +++ b/src/bench/sink_bench/main.rs @@ -51,6 +51,7 @@ use risingwave_pb::connector_service::SinkPayloadFormat; use risingwave_stream::executor::test_utils::prelude::ColumnDesc; use risingwave_stream::executor::{Barrier, Message, MessageStreamItem, StreamExecutorError}; use serde::{Deserialize, Deserializer}; +use thiserror_ext::AsReport; use tokio::sync::oneshot::Sender; use tokio::time::{sleep, Instant}; @@ -298,7 +299,7 @@ where } let log_sinker = sink.new_log_sinker(sink_writer_param).await.unwrap(); if let Err(e) = log_sinker.consume_log_and_sink(&mut log_reader).await { - return Err(e.to_string()); + return Err(e.to_report_string()); } Err("Stream closed".to_string()) } diff --git a/src/common/src/array/arrow/arrow_impl.rs b/src/common/src/array/arrow/arrow_impl.rs index 4eae49ac6ac0..87f82377f967 100644 --- a/src/common/src/array/arrow/arrow_impl.rs +++ b/src/common/src/array/arrow/arrow_impl.rs @@ -880,7 +880,7 @@ mod tests { #[test] fn int256() { - let values = vec![ + let values = [ None, Some(Int256::from(1)), Some(Int256::from(i64::MAX)), diff --git a/src/common/src/config.rs b/src/common/src/config.rs index 1358105659b7..882c780ea7f9 100644 --- a/src/common/src/config.rs +++ b/src/common/src/config.rs @@ -203,6 +203,14 @@ pub struct MetaConfig { #[serde(default = "default::meta::hummock_version_checkpoint_interval_sec")] pub hummock_version_checkpoint_interval_sec: u64, + /// If enabled, SSTable object file and version delta will be retained. + /// + /// SSTable object file need to be deleted via full GC. + /// + /// version delta need to be manually deleted. + #[serde(default = "default::meta::enable_hummock_data_archive")] + pub enable_hummock_data_archive: bool, + /// The minimum delta log number a new checkpoint should compact, otherwise the checkpoint /// attempt is rejected. #[serde(default = "default::meta::min_delta_log_num_for_hummock_version_checkpoint")] @@ -966,6 +974,10 @@ pub mod default { 30 } + pub fn enable_hummock_data_archive() -> bool { + false + } + pub fn min_delta_log_num_for_hummock_version_checkpoint() -> u64 { 10 } diff --git a/src/common/src/system_param/common.rs b/src/common/src/system_param/common.rs index eb660d2ac3bb..d8ff74139953 100644 --- a/src/common/src/system_param/common.rs +++ b/src/common/src/system_param/common.rs @@ -12,37 +12,28 @@ // See the License for the specific language governing permissions and // limitations under the License. -use std::sync::Mutex; - -use super::reader::{SystemParamsRead, SystemParamsReader}; +use super::diff::SystemParamsDiff; +use super::reader::SystemParamsReader; use crate::util::tracing::layer::toggle_otel_layer; /// Node-independent handler for system parameter changes. /// /// Currently, it is only used to enable or disable the distributed tracing layer. -pub struct CommonHandler { - last_params: Mutex>, -} +#[derive(Debug)] +pub struct CommonHandler; impl CommonHandler { /// Create a new handler with the initial parameters. pub fn new(initial: SystemParamsReader) -> Self { - let this = Self { - last_params: None.into(), - }; - this.handle_change(initial); + let this = Self; + this.handle_change(&SystemParamsDiff::from_initial(initial)); this } /// Handle the change of system parameters. - // TODO: directly call this method with the difference of old and new params. - pub fn handle_change(&self, new_params: SystemParamsReader) { - let mut last_params = self.last_params.lock().unwrap(); - - if last_params.as_ref().map(|p| p.enable_tracing()) != Some(new_params.enable_tracing()) { - toggle_otel_layer(new_params.enable_tracing()); + pub fn handle_change(&self, diff: &SystemParamsDiff) { + if let Some(enabled) = diff.enable_tracing { + toggle_otel_layer(enabled) } - - last_params.replace(new_params); } } diff --git a/src/common/src/system_param/diff.rs b/src/common/src/system_param/diff.rs new file mode 100644 index 000000000000..243b3e247bec --- /dev/null +++ b/src/common/src/system_param/diff.rs @@ -0,0 +1,67 @@ +// Copyright 2024 RisingWave Labs +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use super::reader::SystemParamsRead; +use crate::for_all_params; + +macro_rules! define_diff { + ($({ $field:ident, $type:ty, $default:expr, $is_mutable:expr, $doc:literal, $($rest:tt)* },)*) => { + /// The diff of the system params. + /// + /// Fields that are changed are set to `Some`, otherwise `None`. + #[derive(Default, Debug, Clone)] + pub struct SystemParamsDiff { + $( + #[doc = $doc] + pub $field: Option<$type>, + )* + } + } +} +for_all_params!(define_diff); + +impl SystemParamsDiff { + /// Create a diff between the given two system params. + pub fn diff(prev: impl SystemParamsRead, curr: impl SystemParamsRead) -> Self { + let mut diff = Self::default(); + + macro_rules! set_diff_field { + ($({ $field:ident, $($rest:tt)* },)*) => { + $( + if curr.$field() != prev.$field() { + diff.$field = Some(curr.$field().to_owned()); + } + )* + }; + } + for_all_params!(set_diff_field); + + diff + } + + /// Create a diff from the given initial system params. + /// All fields will be set to `Some`. + pub fn from_initial(initial: impl SystemParamsRead) -> Self { + macro_rules! initial_field { + ($({ $field:ident, $($rest:tt)* },)*) => { + Self { + $( + $field: Some(initial.$field().to_owned()), + )* + } + }; + } + for_all_params!(initial_field) + } +} diff --git a/src/common/src/system_param/local_manager.rs b/src/common/src/system_param/local_manager.rs index 312c5577a0f8..5040d30f811d 100644 --- a/src/common/src/system_param/local_manager.rs +++ b/src/common/src/system_param/local_manager.rs @@ -20,6 +20,7 @@ use risingwave_pb::meta::SystemParams; use tokio::sync::watch::{channel, Receiver, Sender}; use super::common::CommonHandler; +use super::diff::SystemParamsDiff; use super::reader::SystemParamsReader; use super::system_params_for_test; @@ -41,28 +42,39 @@ pub struct LocalSystemParamsManager { } impl LocalSystemParamsManager { + /// Create a new instance of `LocalSystemParamsManager` and spawn a task to run + /// the common handler. pub fn new(initial_params: SystemParamsReader) -> Self { - let params = Arc::new(ArcSwap::from_pointee(initial_params.clone())); - let (tx, _) = channel(params.clone()); + let this = Self::new_inner(initial_params.clone()); // Spawn a task to run the common handler. tokio::spawn({ - let mut rx = tx.subscribe(); + let mut rx = this.tx.subscribe(); async move { + let mut params = initial_params.clone(); let handler = CommonHandler::new(initial_params); while rx.changed().await.is_ok() { + // TODO: directly watch the changes instead of diffing ourselves. let new_params = (**rx.borrow_and_update().load()).clone(); - handler.handle_change(new_params); + let diff = SystemParamsDiff::diff(params.as_ref(), new_params.as_ref()); + handler.handle_change(&diff); + params = new_params; } } }); - Self { params, tx } + this } pub fn for_test() -> Self { - Self::new(system_params_for_test().into()) + Self::new_inner(system_params_for_test().into()) + } + + fn new_inner(initial_params: SystemParamsReader) -> Self { + let params = Arc::new(ArcSwap::from_pointee(initial_params)); + let (tx, _) = channel(params.clone()); + Self { params, tx } } pub fn get_params(&self) -> SystemParamsReaderRef { @@ -89,12 +101,11 @@ mod tests { #[tokio::test] async fn test_manager() { - let p = SystemParams::default().into(); - let manager = LocalSystemParamsManager::new(p); + let manager = LocalSystemParamsManager::for_test(); let shared_params = manager.get_params(); let new_params = SystemParams { - sstable_size_mb: Some(1), + sstable_size_mb: Some(114514), ..Default::default() }; diff --git a/src/common/src/system_param/mod.rs b/src/common/src/system_param/mod.rs index 86dea616519d..a55b236f4b31 100644 --- a/src/common/src/system_param/mod.rs +++ b/src/common/src/system_param/mod.rs @@ -21,6 +21,7 @@ //! - Add a new method to [`reader::SystemParamsReader`]. pub mod common; +pub mod diff; pub mod local_manager; pub mod reader; @@ -31,6 +32,8 @@ use std::str::FromStr; use paste::paste; use risingwave_pb::meta::PbSystemParams; +use self::diff::SystemParamsDiff; + pub type SystemParamsError = String; type Result = core::result::Result; @@ -300,28 +303,48 @@ macro_rules! impl_default_from_other_params { macro_rules! impl_set_system_param { ($({ $field:ident, $type:ty, $default:expr, $($rest:tt)* },)*) => { - /// Set a system parameter with the given value or default one, returns the new value. - pub fn set_system_param(params: &mut PbSystemParams, key: &str, value: Option) -> Result { - match key { + /// Set a system parameter with the given value or default one. + /// + /// Returns the new value if changed, or an error if the parameter is unrecognized + /// or the value is invalid. + pub fn set_system_param( + params: &mut PbSystemParams, + key: &str, + value: Option>, + ) -> Result> { + use crate::system_param::reader::{SystemParamsReader, SystemParamsRead}; + + match key { $( key_of!($field) => { let v = if let Some(v) = value { - v.parse().map_err(|_| format!("cannot parse parameter value"))? + v.as_ref().parse().map_err(|_| format!("cannot parse parameter value"))? } else { $default.ok_or_else(|| format!("{} does not have a default value", key))? }; OverrideValidateOnSet::$field(&v)?; - params.$field = Some(v.clone()); - return Ok(v.to_string()) + + let changed = SystemParamsReader::new(&*params).$field() != v; + if changed { + let new_value = v.to_string(); + let diff = SystemParamsDiff { + $field: Some(v.to_owned()), + ..Default::default() + }; + params.$field = Some(v); + Ok(Some((new_value, diff))) + } else { + Ok(None) + } }, )* _ => { - return Err(format!( + Err(format!( "unrecognized system param {:?}", key - )); + )) } - }; + } } }; } @@ -433,7 +456,7 @@ mod tests { #[test] fn test_set() { - let mut p = PbSystemParams::default(); + let mut p = system_params_for_test(); // Unrecognized param. assert!(set_system_param(&mut p, "?", Some("?".to_string())).is_err()); // Value out of range. diff --git a/src/common/src/system_param/reader.rs b/src/common/src/system_param/reader.rs index 06509c1aa191..c6b8d8c5af6a 100644 --- a/src/common/src/system_param/reader.rs +++ b/src/common/src/system_param/reader.rs @@ -12,6 +12,8 @@ // See the License for the specific language governing permissions and // limitations under the License. +use std::borrow::Borrow; + use risingwave_pb::meta::PbSystemParams; use super::{default, system_params_to_kv, ParamValue}; @@ -40,13 +42,40 @@ for_all_params!(define_system_params_read_trait); /// /// See [`SystemParamsRead`] for more details. #[derive(Clone, Debug, PartialEq)] -pub struct SystemParamsReader { - prost: PbSystemParams, +pub struct SystemParamsReader { + inner: I, +} + +impl From for SystemParamsReader +where + I: Borrow, +{ + fn from(inner: I) -> Self { + Self { inner } + } } -impl From for SystemParamsReader { - fn from(prost: PbSystemParams) -> Self { - Self { prost } +impl SystemParamsReader +where + I: Borrow, +{ + pub fn new(inner: I) -> Self { + Self { inner } + } + + /// Return a new reader with the reference to the inner system params. + pub fn as_ref(&self) -> SystemParamsReader<&PbSystemParams> { + SystemParamsReader { + inner: self.inner(), + } + } + + pub fn to_kv(&self) -> Vec<(String, String)> { + system_params_to_kv(self.inner()).unwrap() + } + + fn inner(&self) -> &PbSystemParams { + self.inner.borrow() } } @@ -54,73 +83,70 @@ impl From for SystemParamsReader { /// For example, if a parameter is introduced before the initial public release. /// /// - Otherwise, specify the fallback logic when the field is missing. -impl SystemParamsRead for SystemParamsReader { +impl SystemParamsRead for SystemParamsReader +where + I: Borrow, +{ fn barrier_interval_ms(&self) -> u32 { - self.prost.barrier_interval_ms.unwrap() + self.inner().barrier_interval_ms.unwrap() } fn checkpoint_frequency(&self) -> u64 { - self.prost.checkpoint_frequency.unwrap() + self.inner().checkpoint_frequency.unwrap() } fn parallel_compact_size_mb(&self) -> u32 { - self.prost.parallel_compact_size_mb.unwrap() + self.inner().parallel_compact_size_mb.unwrap() } fn sstable_size_mb(&self) -> u32 { - self.prost.sstable_size_mb.unwrap() + self.inner().sstable_size_mb.unwrap() } fn block_size_kb(&self) -> u32 { - self.prost.block_size_kb.unwrap() + self.inner().block_size_kb.unwrap() } fn bloom_false_positive(&self) -> f64 { - self.prost.bloom_false_positive.unwrap() + self.inner().bloom_false_positive.unwrap() } fn state_store(&self) -> &str { - self.prost.state_store.as_ref().unwrap() + self.inner().state_store.as_ref().unwrap() } fn data_directory(&self) -> &str { - self.prost.data_directory.as_ref().unwrap() + self.inner().data_directory.as_ref().unwrap() } fn backup_storage_url(&self) -> &str { - self.prost.backup_storage_url.as_ref().unwrap() + self.inner().backup_storage_url.as_ref().unwrap() } fn backup_storage_directory(&self) -> &str { - self.prost.backup_storage_directory.as_ref().unwrap() + self.inner().backup_storage_directory.as_ref().unwrap() } fn max_concurrent_creating_streaming_jobs(&self) -> u32 { - self.prost.max_concurrent_creating_streaming_jobs.unwrap() + self.inner().max_concurrent_creating_streaming_jobs.unwrap() } fn pause_on_next_bootstrap(&self) -> bool { - self.prost + self.inner() .pause_on_next_bootstrap .unwrap_or_else(default::pause_on_next_bootstrap) } fn enable_tracing(&self) -> bool { - self.prost + self.inner() .enable_tracing .unwrap_or_else(default::enable_tracing) } fn wasm_storage_url(&self) -> &str { - self.prost + self.inner() .wasm_storage_url .as_ref() .unwrap_or(&default::WASM_STORAGE_URL) } } - -impl SystemParamsReader { - pub fn to_kv(&self) -> Vec<(String, String)> { - system_params_to_kv(&self.prost).unwrap() - } -} diff --git a/src/common/src/util/chunk_coalesce.rs b/src/common/src/util/chunk_coalesce.rs index d57ebc280047..9e2c6754c57e 100644 --- a/src/common/src/util/chunk_coalesce.rs +++ b/src/common/src/util/chunk_coalesce.rs @@ -456,13 +456,13 @@ mod tests { for v in [1, 2, 3, 4, 5] { left_array_builder.append(&Some(ScalarImpl::Int32(v))); } - let left_arrays = vec![left_array_builder.finish()]; + let left_arrays = [left_array_builder.finish()]; let mut right_array_builder = DataType::Int64.create_array_builder(5); for v in [5, 4, 3, 2, 1] { right_array_builder.append(&Some(ScalarImpl::Int64(v))); } - let right_arrays = vec![right_array_builder.finish()]; + let right_arrays = [right_array_builder.finish()]; let mut output_chunks = Vec::new(); diff --git a/src/common/src/util/sort_util.rs b/src/common/src/util/sort_util.rs index 9c2a8c05b628..42d548354581 100644 --- a/src/common/src/util/sort_util.rs +++ b/src/common/src/util/sort_util.rs @@ -510,9 +510,7 @@ pub fn partial_cmp_datum_iter( ) -> Option { let mut order_types_iter = order_types.into_iter(); lhs.into_iter().partial_cmp_by(rhs, |x, y| { - let Some(order_type) = order_types_iter.next() else { - return None; - }; + let order_type = order_types_iter.next()?; partial_cmp_datum(x, y, order_type) }) } diff --git a/src/compute/src/rpc/service/config_service.rs b/src/compute/src/rpc/service/config_service.rs index 917df842be4b..da1194923030 100644 --- a/src/compute/src/rpc/service/config_service.rs +++ b/src/compute/src/rpc/service/config_service.rs @@ -34,7 +34,7 @@ impl ConfigService for ConfigServiceImpl { ) -> Result, Status> { let batch_config = serde_json::to_string(self.batch_mgr.config()) .map_err(|e| e.to_status(Code::Internal, "compute"))?; - let stream_config = serde_json::to_string(&self.stream_mgr.config().await) + let stream_config = serde_json::to_string(&self.stream_mgr.context().config()) .map_err(|e| e.to_status(Code::Internal, "compute"))?; let show_config_response = ShowConfigResponse { diff --git a/src/compute/src/rpc/service/exchange_service.rs b/src/compute/src/rpc/service/exchange_service.rs index d8b74be65f69..58a39fd5b56d 100644 --- a/src/compute/src/rpc/service/exchange_service.rs +++ b/src/compute/src/rpc/service/exchange_service.rs @@ -106,8 +106,8 @@ impl ExchangeService for ExchangeServiceImpl { let receiver = self .stream_mgr - .take_receiver((up_actor_id, down_actor_id)) - .await?; + .context() + .take_receiver((up_actor_id, down_actor_id))?; // Map the remaining stream to add-permits. let add_permits_stream = request_stream.map_ok(|req| match req.value.unwrap() { diff --git a/src/compute/src/rpc/service/monitor_service.rs b/src/compute/src/rpc/service/monitor_service.rs index 51dfa6dde044..82c254e58d62 100644 --- a/src/compute/src/rpc/service/monitor_service.rs +++ b/src/compute/src/rpc/service/monitor_service.rs @@ -65,7 +65,6 @@ impl MonitorService for MonitorServiceImpl { let actor_traces = self .stream_mgr .get_actor_traces() - .await .into_iter() .map(|(k, v)| (k, v.to_string())) .collect(); diff --git a/src/compute/src/rpc/service/stream_service.rs b/src/compute/src/rpc/service/stream_service.rs index 94194b473f8a..2640b505b787 100644 --- a/src/compute/src/rpc/service/stream_service.rs +++ b/src/compute/src/rpc/service/stream_service.rs @@ -48,7 +48,7 @@ impl StreamService for StreamServiceImpl { request: Request, ) -> std::result::Result, Status> { let req = request.into_inner(); - let res = self.mgr.update_actors(&req.actors).await; + let res = self.mgr.update_actors(&req.actors); match res { Err(e) => { error!(error = %e.as_report(), "failed to update stream actor"); @@ -89,7 +89,7 @@ impl StreamService for StreamServiceImpl { ) -> std::result::Result, Status> { let req = request.into_inner(); - let res = self.mgr.update_actor_info(&req.info).await; + let res = self.mgr.update_actor_info(&req.info); match res { Err(e) => { error!(error = %e.as_report(), "failed to update actor info table actor"); @@ -108,7 +108,7 @@ impl StreamService for StreamServiceImpl { ) -> std::result::Result, Status> { let req = request.into_inner(); let actors = req.actor_ids; - self.mgr.drop_actors(&actors).await?; + self.mgr.drop_actors(&actors)?; Ok(Response::new(DropActorsResponse { request_id: req.request_id, status: None, @@ -143,7 +143,7 @@ impl StreamService for StreamServiceImpl { // recovery. Check it here and return an error here if some actors are not found to // avoid collection hang. We need some refine in meta side to remove this workaround since // it will cause another round of unnecessary recovery. - let actor_ids = self.mgr.all_actor_ids().await; + let actor_ids = self.mgr.all_actor_ids(); let missing_actor_ids = req .actor_ids_to_collect .iter() diff --git a/src/compute/src/server.rs b/src/compute/src/server.rs index 0e4fabe75ac9..123ae5078e71 100644 --- a/src/compute/src/server.rs +++ b/src/compute/src/server.rs @@ -272,13 +272,6 @@ pub async fn compute_node_serve( config.batch.clone(), batch_manager_metrics, )); - let stream_mgr = Arc::new(LocalStreamManager::new( - advertise_addr.clone(), - state_store.clone(), - streaming_metrics.clone(), - config.streaming.clone(), - await_tree_config.clone(), - )); // NOTE: Due to some limits, we use `compute_memory_bytes + storage_memory_bytes` as // `total_compute_memory_bytes` for memory control. This is just a workaround for some @@ -305,10 +298,14 @@ pub async fn compute_node_serve( // Run a background heap profiler heap_profiler.start(); - let watermark_epoch = memory_mgr.get_watermark_epoch(); - // Set back watermark epoch to stream mgr. Executor will read epoch from stream manager instead - // of lru manager. - stream_mgr.set_watermark_epoch(watermark_epoch).await; + let stream_mgr = Arc::new(LocalStreamManager::new( + advertise_addr.clone(), + state_store.clone(), + streaming_metrics.clone(), + config.streaming.clone(), + await_tree_config.clone(), + memory_mgr.get_watermark_epoch(), + )); let grpc_await_tree_reg = await_tree_config .map(|config| AwaitTreeRegistryRef::new(await_tree::Registry::new(config).into())); diff --git a/src/config/example.toml b/src/config/example.toml index 7a7ea2c7aed1..413321d6ff3e 100644 --- a/src/config/example.toml +++ b/src/config/example.toml @@ -21,6 +21,7 @@ periodic_compaction_interval_sec = 60 vacuum_interval_sec = 30 vacuum_spin_interval_ms = 10 hummock_version_checkpoint_interval_sec = 30 +enable_hummock_data_archive = false min_delta_log_num_for_hummock_version_checkpoint = 10 max_heartbeat_interval_secs = 300 disable_recovery = false diff --git a/src/connector/src/parser/plain_parser.rs b/src/connector/src/parser/plain_parser.rs index 1af396db3990..cd45a7286521 100644 --- a/src/connector/src/parser/plain_parser.rs +++ b/src/connector/src/parser/plain_parser.rs @@ -271,7 +271,7 @@ mod tests { async fn source_message_stream(transactional: bool) { let begin_msg = r#"{"schema":null,"payload":{"status":"BEGIN","id":"35352:3962948040","event_count":null,"data_collections":null,"ts_ms":1704269323180}}"#; let commit_msg = r#"{"schema":null,"payload":{"status":"END","id":"35352:3962950064","event_count":11,"data_collections":[{"data_collection":"public.orders_tx","event_count":5},{"data_collection":"public.person","event_count":6}],"ts_ms":1704269323180}}"#; - let data_batches = vec![ + let data_batches = [ vec![ r#"{ "schema": null, "payload": {"after": {"customer_name": "a1", "order_date": "2020-01-30", "order_id": 10021, "order_status": false, "price": "50.50", "product_id": 102}, "before": null, "op": "c", "source": {"connector": "postgresql", "db": "mydb", "lsn": 3963199336, "name": "RW_CDC_1001", "schema": "public", "sequence": "[\"3963198512\",\"3963199336\"]", "snapshot": "false", "table": "orders_tx", "ts_ms": 1704355505506, "txId": 35352, "version": "2.4.2.Final", "xmin": null}, "transaction": {"data_collection_order": 1, "id": "35392:3963199336", "total_order": 1}, "ts_ms": 1704355839905} }"#, r#"{ "schema": null, "payload": {"after": {"customer_name": "a2", "order_date": "2020-02-30", "order_id": 10022, "order_status": false, "price": "50.50", "product_id": 102}, "before": null, "op": "c", "source": {"connector": "postgresql", "db": "mydb", "lsn": 3963199336, "name": "RW_CDC_1001", "schema": "public", "sequence": "[\"3963198512\",\"3963199336\"]", "snapshot": "false", "table": "orders_tx", "ts_ms": 1704355505506, "txId": 35352, "version": "2.4.2.Final", "xmin": null}, "transaction": {"data_collection_order": 1, "id": "35392:3963199336", "total_order": 1}, "ts_ms": 1704355839905} }"#, diff --git a/src/connector/src/sink/iceberg/mod.rs b/src/connector/src/sink/iceberg/mod.rs index 4412388314cf..856c8deea0f4 100644 --- a/src/connector/src/sink/iceberg/mod.rs +++ b/src/connector/src/sink/iceberg/mod.rs @@ -832,11 +832,12 @@ impl SinkCommitCoordinator for IcebergSinkCommitter { txn.append_data_file(s.data_files); txn.append_delete_file(s.delete_files); }); - txn.commit() - .await - .map_err(|err| SinkError::Iceberg(anyhow!(err)))?; + txn.commit().await.map_err(|err| { + tracing::error!(?err, "Failed to commit iceberg table"); + SinkError::Iceberg(anyhow!(err)) + })?; - tracing::info!("Succeeded to commit ti iceberg table in epoch {epoch}."); + tracing::info!("Succeeded to commit to iceberg table in epoch {epoch}."); Ok(()) } } diff --git a/src/connector/src/source/cdc/external/mock_external_table.rs b/src/connector/src/source/cdc/external/mock_external_table.rs index cf3eda6d9210..ef482a09cedb 100644 --- a/src/connector/src/source/cdc/external/mock_external_table.rs +++ b/src/connector/src/source/cdc/external/mock_external_table.rs @@ -72,7 +72,7 @@ impl MockExternalTableReader { ]), ]; - let snapshots = vec![snap0, snap1]; + let snapshots = [snap0, snap1]; if snap_idx >= snapshots.len() { return Ok(()); } diff --git a/src/expr/core/Cargo.toml b/src/expr/core/Cargo.toml index ff1910e9a018..773f27b12032 100644 --- a/src/expr/core/Cargo.toml +++ b/src/expr/core/Cargo.toml @@ -19,6 +19,7 @@ normal = ["workspace-hack", "ctor"] anyhow = "1" arrow-array = { workspace = true } arrow-schema = { workspace = true } +arrow-udf-js = { workspace = true } arrow-udf-wasm = { workspace = true } async-trait = "0.1" auto_impl = "1" diff --git a/src/expr/core/src/error.rs b/src/expr/core/src/error.rs index a9646c1d7703..925d23737431 100644 --- a/src/expr/core/src/error.rs +++ b/src/expr/core/src/error.rs @@ -156,7 +156,7 @@ impl MultiExprError { impl Display for MultiExprError { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { for (i, e) in self.0.iter().enumerate() { - writeln!(f, "{i}: {e}")?; + writeln!(f, "{i}: {}", e.as_report())?; } Ok(()) } diff --git a/src/expr/core/src/expr/expr_udf.rs b/src/expr/core/src/expr/expr_udf.rs index 60f04799838b..260e5bb7a998 100644 --- a/src/expr/core/src/expr/expr_udf.rs +++ b/src/expr/core/src/expr/expr_udf.rs @@ -20,6 +20,7 @@ use std::time::Duration; use anyhow::Context; use arrow_schema::{Field, Fields, Schema}; +use arrow_udf_js::{CallMode, Runtime as JsRuntime}; use arrow_udf_wasm::Runtime as WasmRuntime; use await_tree::InstrumentAwait; use cfg_or_panic::cfg_or_panic; @@ -61,6 +62,7 @@ const INITIAL_RETRY_COUNT: u8 = 16; enum UdfImpl { External(Arc), Wasm(Arc), + JavaScript(JsRuntime), } #[async_trait::async_trait] @@ -123,6 +125,7 @@ impl UserDefinedFunction { let output: arrow_array::RecordBatch = match &self.imp { UdfImpl::Wasm(runtime) => runtime.call(&self.identifier, &input)?, + UdfImpl::JavaScript(runtime) => runtime.call(&self.identifier, &input)?, UdfImpl::External(client) => { let disable_retry_count = self.disable_retry_count.load(Ordering::Relaxed); let result = if disable_retry_count != 0 { @@ -189,16 +192,36 @@ impl Build for UserDefinedFunction { let return_type = DataType::from(prost.get_return_type().unwrap()); let udf = prost.get_rex_node().unwrap().as_udf().unwrap(); + let identifier = udf.get_identifier()?; let imp = match udf.language.as_str() { "wasm" => { + let link = udf.get_link()?; // Use `block_in_place` as an escape hatch to run async code here in sync context. // Calling `block_on` directly will panic. UdfImpl::Wasm(tokio::task::block_in_place(|| { - tokio::runtime::Handle::current() - .block_on(get_or_create_wasm_runtime(&udf.link)) + tokio::runtime::Handle::current().block_on(get_or_create_wasm_runtime(link)) })?) } - _ => UdfImpl::External(get_or_create_flight_client(&udf.link)?), + "javascript" => { + let mut rt = JsRuntime::new()?; + let body = format!( + "export function {}({}) {{ {} }}", + identifier, + udf.arg_names.join(","), + udf.get_body()? + ); + rt.add_function( + identifier, + arrow_schema::DataType::try_from(&return_type)?, + CallMode::CalledOnNullInput, + &body, + )?; + UdfImpl::JavaScript(rt) + } + _ => { + let link = udf.get_link()?; + UdfImpl::External(get_or_create_flight_client(link)?) + } }; let arg_schema = Arc::new(Schema::new( @@ -222,8 +245,8 @@ impl Build for UserDefinedFunction { return_type, arg_schema, imp, - identifier: udf.identifier.clone(), - span: format!("udf_call({})", udf.identifier).into(), + identifier: identifier.clone(), + span: format!("udf_call({})", identifier).into(), disable_retry_count: AtomicU8::new(0), }) } diff --git a/src/expr/core/src/table_function/user_defined.rs b/src/expr/core/src/table_function/user_defined.rs index 83658026ed56..06383543ceb7 100644 --- a/src/expr/core/src/table_function/user_defined.rs +++ b/src/expr/core/src/table_function/user_defined.rs @@ -16,6 +16,7 @@ use std::sync::Arc; use arrow_array::RecordBatch; use arrow_schema::{Field, Fields, Schema, SchemaRef}; +use arrow_udf_js::{CallMode, Runtime as JsRuntime}; use arrow_udf_wasm::Runtime as WasmRuntime; use cfg_or_panic::cfg_or_panic; use futures_util::stream; @@ -42,6 +43,7 @@ pub struct UserDefinedTableFunction { enum UdfImpl { External(Arc), Wasm(Arc), + JavaScript(JsRuntime), } #[async_trait::async_trait] @@ -70,6 +72,11 @@ impl UdfImpl { yield res?; } } + UdfImpl::JavaScript(runtime) => { + for res in runtime.call_table_function(identifier, &input, 1024)? { + yield res?; + } + } UdfImpl::Wasm(runtime) => { for res in runtime.call_table_function(identifier, &input)? { yield res?; @@ -177,28 +184,48 @@ pub fn new_user_defined(prost: &PbTableFunction, chunk_size: usize) -> Result()?, )); + let identifier = udtf.get_identifier()?; + let return_type = DataType::from(prost.get_return_type()?); + let client = match udtf.language.as_str() { "wasm" => { + let link = udtf.get_link()?; // Use `block_in_place` as an escape hatch to run async code here in sync context. // Calling `block_on` directly will panic. UdfImpl::Wasm(tokio::task::block_in_place(|| { - tokio::runtime::Handle::current().block_on( - crate::expr::expr_udf::get_or_create_wasm_runtime(&udtf.link), - ) + tokio::runtime::Handle::current() + .block_on(crate::expr::expr_udf::get_or_create_wasm_runtime(link)) })?) } + "javascript" => { + let mut rt = JsRuntime::new()?; + let body = format!( + "export function* {}({}) {{ {} }}", + identifier, + udtf.arg_names.join(","), + udtf.get_body()? + ); + rt.add_function( + identifier, + arrow_schema::DataType::try_from(&return_type)?, + CallMode::CalledOnNullInput, + &body, + )?; + UdfImpl::JavaScript(rt) + } // connect to UDF service - _ => UdfImpl::External(crate::expr::expr_udf::get_or_create_flight_client( - &udtf.link, - )?), + _ => { + let link = udtf.get_link()?; + UdfImpl::External(crate::expr::expr_udf::get_or_create_flight_client(link)?) + } }; Ok(UserDefinedTableFunction { children: prost.args.iter().map(expr_build_from_prost).try_collect()?, - return_type: prost.return_type.as_ref().expect("no return type").into(), + return_type, arg_schema, client, - identifier: udtf.identifier.clone(), + identifier: identifier.clone(), chunk_size, } .boxed()) diff --git a/src/expr/impl/src/scalar/external/iceberg.rs b/src/expr/impl/src/scalar/external/iceberg.rs index 3973efee559d..cd616aa5e475 100644 --- a/src/expr/impl/src/scalar/external/iceberg.rs +++ b/src/expr/impl/src/scalar/external/iceberg.rs @@ -29,6 +29,7 @@ use risingwave_common::row::OwnedRow; use risingwave_common::types::{DataType, Datum}; use risingwave_expr::expr::BoxedExpression; use risingwave_expr::{build_function, ExprError, Result}; +use thiserror_ext::AsReport; pub struct IcebergTransform { child: BoxedExpression, @@ -93,23 +94,29 @@ fn build(return_type: DataType, mut children: Vec) -> Result | object | | diff --git a/src/frontend/Cargo.toml b/src/frontend/Cargo.toml index 8963dab80abf..3c9a1b62f94d 100644 --- a/src/frontend/Cargo.toml +++ b/src/frontend/Cargo.toml @@ -71,7 +71,7 @@ rw_futures_util = { workspace = true } serde = { version = "1", features = ["derive"] } serde_json = "1" sha2 = "0.10.7" -smallvec = { version = "1.12.0", features = ["serde"] } +smallvec = { version = "1.13.1", features = ["serde"] } tempfile = "3" thiserror = "1" thiserror-ext = { workspace = true } diff --git a/src/frontend/planner_test/src/lib.rs b/src/frontend/planner_test/src/lib.rs index 893188d2b421..f3c7d9250ec4 100644 --- a/src/frontend/planner_test/src/lib.rs +++ b/src/frontend/planner_test/src/lib.rs @@ -430,6 +430,7 @@ impl TestCase { append_only, cdc_table_info, include_column_options, + wildcard_idx, .. } => { let source_schema = source_schema.map(|schema| schema.into_v2_with_warning()); @@ -438,6 +439,7 @@ impl TestCase { handler_args, name, columns, + wildcard_idx, constraints, if_not_exists, source_schema, diff --git a/src/frontend/src/binder/expr/value.rs b/src/frontend/src/binder/expr/value.rs index ed1d8ad0b8f9..fa72b7b77d5c 100644 --- a/src/frontend/src/binder/expr/value.rs +++ b/src/frontend/src/binder/expr/value.rs @@ -242,7 +242,7 @@ mod tests { "0.111111", "-0.01", ]; - let data = vec![ + let data = [ Some(ScalarImpl::Int32(1)), Some(ScalarImpl::Int64(111111111111111)), Some(ScalarImpl::Decimal( @@ -254,7 +254,7 @@ mod tests { Some(ScalarImpl::Decimal(Decimal::from_str("0.111111").unwrap())), Some(ScalarImpl::Decimal(Decimal::from_str("-0.01").unwrap())), ]; - let data_type = vec![ + let data_type = [ DataType::Int32, DataType::Int64, DataType::Decimal, @@ -307,7 +307,7 @@ mod tests { ("1.25e-2"), ("1e15"), ]; - let data = vec![ + let data = [ Some(ScalarImpl::Decimal(Decimal::from_str("1000000").unwrap())), Some(ScalarImpl::Decimal(Decimal::from_str("1250000").unwrap())), Some(ScalarImpl::Decimal(Decimal::from_str("12.5").unwrap())), @@ -317,7 +317,7 @@ mod tests { Decimal::from_str("1000000000000000").unwrap(), )), ]; - let data_type = vec![ + let data_type = [ DataType::Decimal, DataType::Decimal, DataType::Decimal, diff --git a/src/frontend/src/catalog/function_catalog.rs b/src/frontend/src/catalog/function_catalog.rs index d0f037bcb47b..96dbbe77c2a1 100644 --- a/src/frontend/src/catalog/function_catalog.rs +++ b/src/frontend/src/catalog/function_catalog.rs @@ -26,12 +26,13 @@ pub struct FunctionCatalog { pub name: String, pub owner: u32, pub kind: FunctionKind, + pub arg_names: Vec, pub arg_types: Vec, pub return_type: DataType, pub language: String, - pub identifier: String, + pub identifier: Option, pub body: Option, - pub link: String, + pub link: Option, } #[derive(Clone, Display, PartialEq, Eq, Hash, Debug)] @@ -60,6 +61,7 @@ impl From<&PbFunction> for FunctionCatalog { name: prost.name.clone(), owner: prost.owner, kind: prost.kind.as_ref().unwrap().into(), + arg_names: prost.arg_names.clone(), arg_types: prost.arg_types.iter().map(|arg| arg.into()).collect(), return_type: prost.return_type.as_ref().expect("no return type").into(), language: prost.language.clone(), diff --git a/src/frontend/src/catalog/system_catalog/mod.rs b/src/frontend/src/catalog/system_catalog/mod.rs index c5bed6c98796..44b326b144c2 100644 --- a/src/frontend/src/catalog/system_catalog/mod.rs +++ b/src/frontend/src/catalog/system_catalog/mod.rs @@ -288,17 +288,11 @@ pub struct SystemCatalog { } pub fn get_sys_tables_in_schema(schema_name: &str) -> Option>> { - SYS_CATALOGS - .table_by_schema_name - .get(schema_name) - .map(Clone::clone) + SYS_CATALOGS.table_by_schema_name.get(schema_name).cloned() } pub fn get_sys_views_in_schema(schema_name: &str) -> Option>> { - SYS_CATALOGS - .view_by_schema_name - .get(schema_name) - .map(Clone::clone) + SYS_CATALOGS.view_by_schema_name.get(schema_name).cloned() } /// The global registry of all builtin catalogs. diff --git a/src/frontend/src/catalog/system_catalog/rw_catalog/rw_functions.rs b/src/frontend/src/catalog/system_catalog/rw_catalog/rw_functions.rs index 7e618b30ec62..da4f7de4f643 100644 --- a/src/frontend/src/catalog/system_catalog/rw_catalog/rw_functions.rs +++ b/src/frontend/src/catalog/system_catalog/rw_catalog/rw_functions.rs @@ -70,7 +70,7 @@ impl SysCatalogReaderImpl { ))), Some(ScalarImpl::Int32(function.return_type.to_oid())), Some(ScalarImpl::Utf8(function.language.clone().into())), - Some(ScalarImpl::Utf8(function.link.clone().into())), + function.link.clone().map(|s| ScalarImpl::Utf8(s.into())), Some(ScalarImpl::Utf8( get_acl_items( &Object::FunctionId(function.id.function_id()), diff --git a/src/frontend/src/catalog/table_catalog.rs b/src/frontend/src/catalog/table_catalog.rs index 50f74f860e4d..c632cca5c745 100644 --- a/src/frontend/src/catalog/table_catalog.rs +++ b/src/frontend/src/catalog/table_catalog.rs @@ -563,7 +563,7 @@ impl From for TableCatalog { .unwrap_or_else(Cardinality::unknown), created_at_epoch: tb.created_at_epoch.map(Epoch::from), initialized_at_epoch: tb.initialized_at_epoch.map(Epoch::from), - cleaned_by_watermark: matches!(tb.cleaned_by_watermark, true), + cleaned_by_watermark: tb.cleaned_by_watermark, create_type: CreateType::from_prost(create_type), description: tb.description, incoming_sinks: tb.incoming_sinks.clone(), diff --git a/src/frontend/src/expr/table_function.rs b/src/frontend/src/expr/table_function.rs index 7fa8232736ba..e3000d0c245a 100644 --- a/src/frontend/src/expr/table_function.rs +++ b/src/frontend/src/expr/table_function.rs @@ -73,10 +73,12 @@ impl TableFunction { .udtf_catalog .as_ref() .map(|c| UserDefinedTableFunctionPb { + arg_names: c.arg_names.clone(), arg_types: c.arg_types.iter().map(|t| t.to_protobuf()).collect(), language: c.language.clone(), link: c.link.clone(), identifier: c.identifier.clone(), + body: c.body.clone(), }), } } diff --git a/src/frontend/src/expr/user_defined_function.rs b/src/frontend/src/expr/user_defined_function.rs index 165774d1acb4..0724b5525461 100644 --- a/src/frontend/src/expr/user_defined_function.rs +++ b/src/frontend/src/expr/user_defined_function.rs @@ -51,13 +51,13 @@ impl UserDefinedFunction { // FIXME(yuhao): owner is not in udf proto. owner: u32::MAX - 1, kind: FunctionKind::Scalar, + arg_names: udf.arg_names.clone(), arg_types, return_type, language: udf.get_language().clone(), - identifier: udf.get_identifier().clone(), - // TODO: Ensure if we need `body` here - body: None, - link: udf.get_link().clone(), + identifier: udf.identifier.clone(), + body: udf.body.clone(), + link: udf.link.clone(), }; Ok(Self { @@ -81,6 +81,7 @@ impl Expr for UserDefinedFunction { rex_node: Some(RexNode::Udf(UserDefinedFunction { children: self.args.iter().map(Expr::to_expr_proto).collect(), name: self.catalog.name.clone(), + arg_names: self.catalog.arg_names.clone(), arg_types: self .catalog .arg_types @@ -90,6 +91,7 @@ impl Expr for UserDefinedFunction { language: self.catalog.language.clone(), identifier: self.catalog.identifier.clone(), link: self.catalog.link.clone(), + body: self.catalog.body.clone(), })), } } diff --git a/src/frontend/src/handler/alter_parallelism.rs b/src/frontend/src/handler/alter_parallelism.rs index c307c83bb077..50bbb1792ff9 100644 --- a/src/frontend/src/handler/alter_parallelism.rs +++ b/src/frontend/src/handler/alter_parallelism.rs @@ -19,6 +19,7 @@ use risingwave_pb::meta::table_parallelism::{AutoParallelism, FixedParallelism, use risingwave_pb::meta::{PbTableParallelism, TableParallelism}; use risingwave_sqlparser::ast::{ObjectName, SetVariableValue, SetVariableValueSingle, Value}; use risingwave_sqlparser::keywords::Keyword; +use thiserror_ext::AsReport; use super::{HandlerArgs, RwPgResponse}; use crate::catalog::root_catalog::SchemaPath; @@ -115,10 +116,10 @@ fn extract_table_parallelism(parallelism: SetVariableValue) -> Result auto_parallelism, SetVariableValue::Single(SetVariableValueSingle::Literal(Value::Number(v))) => { - let fixed_parallelism = v.parse().map_err(|e| { + let fixed_parallelism = v.parse::().map_err(|e| { ErrorCode::InvalidInputSyntax(format!( "target parallelism must be a valid number or auto: {}", - e + e.as_report() )) })?; diff --git a/src/frontend/src/handler/alter_table_column.rs b/src/frontend/src/handler/alter_table_column.rs index cceeaf789f74..b765c84b6e4f 100644 --- a/src/frontend/src/handler/alter_table_column.rs +++ b/src/frontend/src/handler/alter_table_column.rs @@ -153,6 +153,7 @@ pub async fn handle_alter_table_column( constraints, source_watermarks, append_only, + wildcard_idx, .. } = definition else { @@ -167,6 +168,7 @@ pub async fn handle_alter_table_column( handler_args, col_id_gen, columns, + wildcard_idx, constraints, source_watermarks, append_only, diff --git a/src/frontend/src/handler/create_function.rs b/src/frontend/src/handler/create_function.rs index 64d5d615dce5..10a7fab06267 100644 --- a/src/frontend/src/handler/create_function.rs +++ b/src/frontend/src/handler/create_function.rs @@ -54,7 +54,7 @@ pub async fn handle_create_function( Some(lang) => { let lang = lang.real_value().to_lowercase(); match &*lang { - "python" | "java" | "wasm" => lang, + "python" | "java" | "wasm" | "javascript" => lang, _ => { return Err(ErrorCode::InvalidParameterValue(format!( "language {} is not supported", @@ -96,12 +96,10 @@ pub async fn handle_create_function( } }; - let Some(using) = params.using else { - return Err(ErrorCode::InvalidParameterValue("USING must be specified".to_string()).into()); - }; - + let mut arg_names = vec![]; let mut arg_types = vec![]; for arg in args.unwrap_or_default() { + arg_names.push(arg.name.map_or("".to_string(), |n| n.real_value())); arg_types.push(bind_data_type(&arg.data_type)?); } @@ -124,12 +122,13 @@ pub async fn handle_create_function( return Err(CatalogError::Duplicated("function", name).into()); } - let link; let identifier; + let mut link = None; + let mut body = None; match language.as_str() { "python" | "java" | "" => { - let CreateFunctionUsing::Link(l) = using else { + let Some(CreateFunctionUsing::Link(l)) = params.using else { return Err(ErrorCode::InvalidParameterValue( "USING LINK must be specified".to_string(), ) @@ -141,11 +140,10 @@ pub async fn handle_create_function( ); }; identifier = id; - link = l; // check UDF server { - let client = ArrowFlightUdfClient::connect(&link) + let client = ArrowFlightUdfClient::connect(&l) .await .map_err(|e| anyhow!(e))?; /// A helper function to create a unnamed field from data type. @@ -171,6 +169,20 @@ pub async fn handle_create_function( .await .context("failed to check UDF signature")?; } + link = Some(l); + } + "javascript" => { + identifier = function_name.to_string(); + body = Some(match params.as_ { + Some(FunctionDefinition::SingleQuotedDef(s)) => s, + Some(FunctionDefinition::DoubleDollarDef(s)) => s, + _ => { + return Err(ErrorCode::InvalidParameterValue( + "AS must be specified".to_string(), + ) + .into()) + } + }); } "wasm" => { identifier = wasm_identifier( @@ -179,12 +191,17 @@ pub async fn handle_create_function( &return_type, matches!(kind, Kind::Table(_)), ); - + let Some(using) = params.using else { + return Err(ErrorCode::InvalidParameterValue( + "USING must be specified".to_string(), + ) + .into()); + }; link = match using { CreateFunctionUsing::Link(link) => { let runtime = get_or_create_wasm_runtime(&link).await?; check_wasm_function(&runtime, &identifier)?; - link + Some(link) } CreateFunctionUsing::Base64(encoded) => { // decode wasm binary from base64 @@ -205,7 +222,11 @@ pub async fn handle_create_function( ) .await?; - format!("{}/{}", system_params.wasm_storage_url(), object_name) + Some(format!( + "{}/{}", + system_params.wasm_storage_url(), + object_name + )) } }; } @@ -218,12 +239,13 @@ pub async fn handle_create_function( database_id, name: function_name, kind: Some(kind), + arg_names, arg_types: arg_types.into_iter().map(|t| t.into()).collect(), return_type: Some(return_type.into()), language, - identifier, - body: None, + identifier: Some(identifier), link, + body, owner: session.user_id(), }; diff --git a/src/frontend/src/handler/create_sink.rs b/src/frontend/src/handler/create_sink.rs index 767e4318ab26..f3d905357155 100644 --- a/src/frontend/src/handler/create_sink.rs +++ b/src/frontend/src/handler/create_sink.rs @@ -471,6 +471,7 @@ pub(crate) async fn reparse_table_for_sink( let col_id_gen = ColumnIdGenerator::new_alter(table_catalog); let Statement::CreateTable { columns, + wildcard_idx, constraints, source_watermarks, append_only, @@ -488,6 +489,7 @@ pub(crate) async fn reparse_table_for_sink( handler_args, col_id_gen, columns, + wildcard_idx, constraints, source_watermarks, append_only, diff --git a/src/frontend/src/handler/create_source.rs b/src/frontend/src/handler/create_source.rs index f66bc33e3294..d677d123812d 100644 --- a/src/frontend/src/handler/create_source.rs +++ b/src/frontend/src/handler/create_source.rs @@ -16,6 +16,7 @@ use std::collections::{BTreeMap, HashMap}; use std::rc::Rc; use std::sync::LazyLock; +use anyhow::Context; use either::Either; use itertools::Itertools; use maplit::{convert_args, hashmap}; @@ -24,7 +25,7 @@ use risingwave_common::catalog::{ is_column_ids_dedup, ColumnCatalog, ColumnDesc, TableId, INITIAL_SOURCE_VERSION_ID, KAFKA_TIMESTAMP_COLUMN_NAME, }; -use risingwave_common::error::ErrorCode::{self, InvalidInputSyntax, ProtocolError}; +use risingwave_common::error::ErrorCode::{self, InvalidInputSyntax, NotSupported, ProtocolError}; use risingwave_common::error::{Result, RwError}; use risingwave_common::types::DataType; use risingwave_connector::parser::{ @@ -70,6 +71,7 @@ use crate::handler::util::{ get_connector, is_cdc_connector, is_kafka_connector, SourceSchemaCompatExt, }; use crate::handler::HandlerArgs; +use crate::optimizer::plan_node::generic::SourceNodeKind; use crate::optimizer::plan_node::{LogicalSource, ToStream, ToStreamContext}; use crate::session::SessionImpl; use crate::utils::resolve_privatelink_in_with_option; @@ -385,8 +387,7 @@ pub(crate) async fn bind_columns_from_source( (Format::Plain, Encode::Csv) => { let chars = consume_string_from_options(&mut format_encode_options_to_consume, "delimiter")?.0; - let delimiter = - get_delimiter(chars.as_str()).map_err(|e| RwError::from(e.to_string()))?; + let delimiter = get_delimiter(chars.as_str()).context("failed to parse delimiter")?; let has_header = try_consume_string_from_options( &mut format_encode_options_to_consume, "without_header", @@ -556,17 +557,40 @@ pub(crate) fn bind_all_columns( cols_from_source: Option>, cols_from_sql: Vec, col_defs_from_sql: &[ColumnDef], + wildcard_idx: Option, ) -> Result> { if let Some(cols_from_source) = cols_from_source { if cols_from_sql.is_empty() { Ok(cols_from_source) + } else if let Some(wildcard_idx) = wildcard_idx { + if col_defs_from_sql.iter().any(|c| !c.is_generated()) { + Err(RwError::from(NotSupported( + "Only generated columns are allowed in user-defined schema from SQL" + .to_string(), + "Remove the non-generated columns".to_string(), + ))) + } else { + // Replace `*` with `cols_from_source` + let mut cols_from_sql = cols_from_sql; + let mut cols_from_source = cols_from_source; + let mut cols_from_sql_r = cols_from_sql.split_off(wildcard_idx); + cols_from_sql.append(&mut cols_from_source); + cols_from_sql.append(&mut cols_from_sql_r); + Ok(cols_from_sql) + } } else { // TODO(yuhao): https://github.com/risingwavelabs/risingwave/issues/12209 Err(RwError::from(ProtocolError( - format!("User-defined schema from SQL is not allowed with FORMAT {} ENCODE {}. \ - Please refer to https://www.risingwave.dev/docs/current/sql-create-source/ for more information.", source_schema.format, source_schema.row_encode)))) + format!("User-defined schema from SQL is not allowed with FORMAT {} ENCODE {}. \ + Please refer to https://www.risingwave.dev/docs/current/sql-create-source/ for more information.", source_schema.format, source_schema.row_encode)))) } } else { + if wildcard_idx.is_some() { + return Err(RwError::from(NotSupported( + "Wildcard in user-defined schema is only allowed when there exists columns from external schema".to_string(), + "Remove the wildcard or use a source with external schema".to_string(), + ))); + } // FIXME(yuhao): cols_from_sql should be None is no `()` is given. if cols_from_sql.is_empty() { return Err(RwError::from(ProtocolError( @@ -1147,6 +1171,7 @@ pub async fn handle_create_source( columns_from_resolve_source, columns_from_sql, &stmt.columns, + stmt.wildcard_idx, )?; // add additional columns before bind pk, because `format upsert` requires the key column handle_addition_columns(&with_properties, stmt.include_column_options, &mut columns)?; @@ -1238,12 +1263,9 @@ pub async fn handle_create_source( let graph = { let context = OptimizerContext::from_handler_args(handler_args); // cdc source is an append-only source in plain json format - let source_node = LogicalSource::new( - Some(Rc::new(SourceCatalog::from(&source))), - columns.clone(), - row_id_index, - false, - false, + let source_node = LogicalSource::with_catalog( + Rc::new(SourceCatalog::from(&source)), + SourceNodeKind::CreateSourceWithStreamjob, context.into(), )?; diff --git a/src/frontend/src/handler/create_sql_function.rs b/src/frontend/src/handler/create_sql_function.rs index 21798b17b2ac..2af3f5d9291b 100644 --- a/src/frontend/src/handler/create_sql_function.rs +++ b/src/frontend/src/handler/create_sql_function.rs @@ -168,8 +168,10 @@ pub async fn handle_create_sql_function( } }; + let mut arg_names = vec![]; let mut arg_types = vec![]; for arg in args.unwrap_or_default() { + arg_names.push(arg.name.map_or("".to_string(), |n| n.real_value())); arg_types.push(bind_data_type(&arg.data_type)?); } @@ -235,12 +237,13 @@ pub async fn handle_create_sql_function( database_id, name: function_name, kind: Some(kind), + arg_names, arg_types: arg_types.into_iter().map(|t| t.into()).collect(), return_type: Some(return_type.into()), language, - identifier: "".to_string(), + identifier: None, body: Some(body), - link: "".to_string(), + link: None, owner: session.user_id(), }; diff --git a/src/frontend/src/handler/create_table.rs b/src/frontend/src/handler/create_table.rs index 40d0268d63a7..d0aaa4c5c21a 100644 --- a/src/frontend/src/handler/create_table.rs +++ b/src/frontend/src/handler/create_table.rs @@ -59,6 +59,7 @@ use crate::handler::create_source::{ check_source_schema, handle_addition_columns, validate_compatibility, UPSTREAM_SOURCE_KEY, }; use crate::handler::HandlerArgs; +use crate::optimizer::plan_node::generic::SourceNodeKind; use crate::optimizer::plan_node::{LogicalCdcScan, LogicalSource}; use crate::optimizer::property::{Order, RequiredDist}; use crate::optimizer::{OptimizerContext, OptimizerContextRef, PlanRef, PlanRoot}; @@ -452,6 +453,7 @@ pub(crate) async fn gen_create_table_plan_with_source( context: OptimizerContext, table_name: ObjectName, column_defs: Vec, + wildcard_idx: Option, constraints: Vec, source_schema: ConnectorSchema, source_watermarks: Vec, @@ -487,6 +489,7 @@ pub(crate) async fn gen_create_table_plan_with_source( columns_from_resolve_source, columns_from_sql, &column_defs, + wildcard_idx, )?; // add additional columns before bind pk, because `format upsert` requires the key column @@ -680,8 +683,7 @@ fn gen_table_plan_inner( source_catalog.clone(), columns.clone(), row_id_index, - false, - true, + SourceNodeKind::CreateTable, context.clone(), )? .into(); @@ -882,6 +884,7 @@ fn derive_connect_properties( Ok(connect_properties.into_iter().collect()) } +#[allow(clippy::too_many_arguments)] pub(super) async fn handle_create_table_plan( context: OptimizerContext, col_id_gen: ColumnIdGenerator, @@ -889,6 +892,7 @@ pub(super) async fn handle_create_table_plan( cdc_table_info: Option, table_name: ObjectName, column_defs: Vec, + wildcard_idx: Option, constraints: Vec, source_watermarks: Vec, append_only: bool, @@ -907,6 +911,7 @@ pub(super) async fn handle_create_table_plan( context, table_name.clone(), column_defs, + wildcard_idx, constraints, source_schema, source_watermarks, @@ -958,6 +963,7 @@ pub async fn handle_create_table( handler_args: HandlerArgs, table_name: ObjectName, column_defs: Vec, + wildcard_idx: Option, constraints: Vec, if_not_exists: bool, source_schema: Option, @@ -990,6 +996,7 @@ pub async fn handle_create_table( cdc_table_info, table_name.clone(), column_defs, + wildcard_idx, constraints, source_watermarks, append_only, @@ -1051,6 +1058,7 @@ pub async fn generate_stream_graph_for_table( handler_args: HandlerArgs, col_id_gen: ColumnIdGenerator, columns: Vec, + wildcard_idx: Option, constraints: Vec, source_watermarks: Vec, append_only: bool, @@ -1064,6 +1072,7 @@ pub async fn generate_stream_graph_for_table( context, table_name, columns, + wildcard_idx, constraints, source_schema, source_watermarks, diff --git a/src/frontend/src/handler/explain.rs b/src/frontend/src/handler/explain.rs index ca76abe8e8d8..b7981cf7aec6 100644 --- a/src/frontend/src/handler/explain.rs +++ b/src/frontend/src/handler/explain.rs @@ -64,6 +64,7 @@ async fn do_handle_explain( append_only, cdc_table_info, include_column_options, + wildcard_idx, .. } => { let col_id_gen = ColumnIdGenerator::new_initial(); @@ -77,6 +78,7 @@ async fn do_handle_explain( cdc_table_info, name.clone(), columns, + wildcard_idx, constraints, source_watermarks, append_only, diff --git a/src/frontend/src/handler/mod.rs b/src/frontend/src/handler/mod.rs index b4227a765e85..806daa89ce02 100644 --- a/src/frontend/src/handler/mod.rs +++ b/src/frontend/src/handler/mod.rs @@ -244,6 +244,7 @@ pub async fn handle( Statement::CreateTable { name, columns, + wildcard_idx, constraints, query, with_options: _, // It is put in OptimizerContext @@ -279,6 +280,7 @@ pub async fn handle( handler_args, name, columns, + wildcard_idx, constraints, if_not_exists, source_schema, diff --git a/src/frontend/src/handler/show.rs b/src/frontend/src/handler/show.rs index 704a8ceb1bc0..d5addc3094e4 100644 --- a/src/frontend/src/handler/show.rs +++ b/src/frontend/src/handler/show.rs @@ -252,7 +252,7 @@ pub async fn handle_show_object( Some(t.arg_types.iter().map(|t| t.to_string()).join(", ").into()), Some(t.return_type.to_string().into()), Some(t.language.clone().into()), - Some(t.link.clone().into()), + t.link.clone().map(Into::into), ]) }) .collect_vec(); diff --git a/src/frontend/src/optimizer/mod.rs b/src/frontend/src/optimizer/mod.rs index cc66d1341b4f..4e8caae1cc40 100644 --- a/src/frontend/src/optimizer/mod.rs +++ b/src/frontend/src/optimizer/mod.rs @@ -70,7 +70,7 @@ use self::property::{Cardinality, RequiredDist}; use self::rule::*; use crate::catalog::table_catalog::{TableType, TableVersion}; use crate::expr::TimestamptzExprFinder; -use crate::optimizer::plan_node::generic::Union; +use crate::optimizer::plan_node::generic::{SourceNodeKind, Union}; use crate::optimizer::plan_node::{ BatchExchange, PlanNodeType, PlanTreeNode, RewriteExprsRecursive, StreamExchange, StreamUnion, ToStream, VisitExprsRecursive, @@ -622,8 +622,7 @@ impl PlanRoot { None, columns.clone(), row_id_index, - false, - true, + SourceNodeKind::CreateTable, context.clone(), ) .and_then(|s| s.to_stream(&mut ToStreamContext::new(false)))?; diff --git a/src/frontend/src/optimizer/plan_node/generic/source.rs b/src/frontend/src/optimizer/plan_node/generic/source.rs index bbb1b1f48673..803b74cd6fa1 100644 --- a/src/frontend/src/optimizer/plan_node/generic/source.rs +++ b/src/frontend/src/optimizer/plan_node/generic/source.rs @@ -20,6 +20,7 @@ use educe::Educe; use risingwave_common::catalog::{ColumnCatalog, Field, Schema}; use risingwave_common::types::DataType; use risingwave_common::util::sort_util::OrderType; +use risingwave_connector::source::ConnectorProperties; use super::super::utils::TableCatalogBuilder; use super::GenericPlanNode; @@ -28,20 +29,37 @@ use crate::optimizer::optimizer_context::OptimizerContextRef; use crate::optimizer::property::FunctionalDependencySet; use crate::{TableCatalog, WithOptions}; +/// In which scnario the source node is created +#[derive(Debug, Clone, PartialEq, Eq, Hash)] +#[expect(clippy::enum_variant_names)] +pub enum SourceNodeKind { + /// `CREATE TABLE` with a connector. + CreateTable, + /// `CREATE SOURCE` with a streaming job (backfill-able source). + CreateSourceWithStreamjob, + /// `CREATE MATERIALIZED VIEW` which selects from a source. + /// + /// Note: + /// - For non backfill-able source, `CREATE SOURCE` will not create a source node, and `CREATE MATERIALIZE VIEW` will create a `LogicalSource`. + /// - For backfill-able source, `CREATE MATERIALIZE VIEW` will create `LogicalSourceBackfill` instead of `LogicalSource`. + CreateMViewOrBatch, +} + /// [`Source`] returns contents of a table or other equivalent object #[derive(Debug, Clone, Educe)] #[educe(PartialEq, Eq, Hash)] pub struct Source { /// If there is an external stream source, `catalog` will be `Some`. Otherwise, it is `None`. pub catalog: Option>, - /// NOTE(Yuanxin): Here we store column descriptions, pk column ids, and row id index for plan - /// generating, even if there is no external stream source. + + // NOTE: Here we store `column_catalog` and `row_id_index` + // because they are needed when `catalog` is None. + // When `catalog` is Some, they are the same as these fields in `catalog`. pub column_catalog: Vec, pub row_id_index: Option, - /// Whether the "SourceNode" should generate the row id column for append only source - pub gen_row_id: bool, - /// True if it is a source created when creating table with a source. - pub for_table: bool, + + pub kind: SourceNodeKind, + #[educe(PartialEq(ignore))] #[educe(Hash(ignore))] pub ctx: OptimizerContextRef, @@ -80,6 +98,30 @@ impl GenericPlanNode for Source { } impl Source { + pub fn is_new_fs_connector(&self) -> bool { + self.catalog.as_ref().is_some_and(|catalog| { + ConnectorProperties::is_new_fs_connector_b_tree_map(&catalog.with_properties) + }) + } + + /// The columns in stream/batch source node indicate the actual columns it will produce, + /// instead of the columns defined in source catalog. The difference is generated columns. + pub fn exclude_generated_columns(mut self) -> (Self, Option) { + let original_row_id_index = self.row_id_index; + // minus the number of generated columns before row_id_index. + self.row_id_index = original_row_id_index.map(|idx| { + let mut cnt = 0; + for col in self.column_catalog.iter().take(idx + 1) { + if col.is_generated() { + cnt += 1; + } + } + idx - cnt + }); + self.column_catalog.retain(|c| !c.is_generated()); + (self, original_row_id_index) + } + pub fn kafka_timestamp_range_value(&self) -> (Option, Option) { let (lower_bound, upper_bound) = &self.kafka_timestamp_range; let lower_bound = match lower_bound { diff --git a/src/frontend/src/optimizer/plan_node/logical_source.rs b/src/frontend/src/optimizer/plan_node/logical_source.rs index f86f31c1e076..fbc9fe7a40c8 100644 --- a/src/frontend/src/optimizer/plan_node/logical_source.rs +++ b/src/frontend/src/optimizer/plan_node/logical_source.rs @@ -18,18 +18,17 @@ use std::ops::Bound::{Excluded, Included, Unbounded}; use std::rc::Rc; use fixedbitset::FixedBitSet; -use itertools::Itertools; use pretty_xmlish::{Pretty, XmlNode}; use risingwave_common::bail_not_implemented; use risingwave_common::catalog::{ ColumnCatalog, ColumnDesc, Field, Schema, KAFKA_TIMESTAMP_COLUMN_NAME, }; use risingwave_common::error::Result; -use risingwave_connector::source::{ConnectorProperties, DataType}; +use risingwave_connector::source::DataType; use risingwave_pb::plan_common::column_desc::GeneratedOrDefaultColumn; use risingwave_pb::plan_common::GeneratedColumnDesc; -use super::generic::GenericPlanRef; +use super::generic::{GenericPlanRef, SourceNodeKind}; use super::stream_watermark_filter::StreamWatermarkFilter; use super::utils::{childless_record, Distill}; use super::{ @@ -60,6 +59,9 @@ pub struct LogicalSource { /// Expressions to output. This field presents and will be turned to a `Project` when /// converting to a physical plan, only if there are generated columns. output_exprs: Option>, + /// When there are generated columns, the `StreamRowIdGen`'s row_id_index is different from + /// the one in `core`. So we store the one in `output_exprs` here. + output_row_id_index: Option, } impl LogicalSource { @@ -67,8 +69,7 @@ impl LogicalSource { source_catalog: Option>, column_catalog: Vec, row_id_index: Option, - gen_row_id: bool, - for_table: bool, + kind: SourceNodeKind, ctx: OptimizerContextRef, ) -> Result { let kafka_timestamp_range = (Bound::Unbounded, Bound::Unbounded); @@ -76,8 +77,7 @@ impl LogicalSource { catalog: source_catalog, column_catalog, row_id_index, - gen_row_id, - for_table, + kind, ctx, kafka_timestamp_range, }; @@ -85,33 +85,40 @@ impl LogicalSource { let base = PlanBase::new_logical_with_core(&core); let output_exprs = Self::derive_output_exprs_from_generated_columns(&core.column_catalog)?; + let (core, output_row_id_index) = core.exclude_generated_columns(); Ok(LogicalSource { base, core, output_exprs, + output_row_id_index, }) } pub fn with_catalog( source_catalog: Rc, - for_table: bool, + kind: SourceNodeKind, ctx: OptimizerContextRef, ) -> Result { let column_catalogs = source_catalog.columns.clone(); let row_id_index = source_catalog.row_id_index; - let gen_row_id = source_catalog.append_only; + if !source_catalog.append_only { + assert!(row_id_index.is_none()); + } Self::new( Some(source_catalog), column_catalogs, row_id_index, - gen_row_id, - for_table, + kind, ctx, ) } + /// If there are no generated columns, returns `None`. + /// + /// Otherwise, the returned expressions correspond to all columns. + /// Non-generated columns are represented by `InputRef`. pub fn derive_output_exprs_from_generated_columns( columns: &[ColumnCatalog], ) -> Result>> { @@ -162,9 +169,9 @@ impl LogicalSource { Ok(Some(exprs)) } - fn rewrite_new_s3_plan(&self) -> Result { + /// `StreamSource` (list) -> shuffle -> `StreamDedup` + fn create_fs_list_plan(core: generic::Source) -> Result { let logical_source = generic::Source { - catalog: self.core.catalog.clone(), column_catalog: vec![ ColumnCatalog { column_desc: ColumnDesc::from_field_with_column_id( @@ -204,8 +211,7 @@ impl LogicalSource { }, ], row_id_index: None, - gen_row_id: false, - ..self.core.clone() + ..core }; let mut new_s3_plan: PlanRef = StreamSource { base: PlanBase::new_stream_with_core( @@ -229,20 +235,6 @@ impl LogicalSource { Ok(new_s3_plan) } - /// `row_id_index` in source node should rule out generated column - #[must_use] - fn rewrite_row_id_idx(columns: &[ColumnCatalog], row_id_index: Option) -> Option { - row_id_index.map(|idx| { - let mut cnt = 0; - for col in columns.iter().take(idx + 1) { - if col.is_generated() { - cnt += 1; - } - } - idx - cnt - }) - } - pub fn source_catalog(&self) -> Option> { self.core.catalog.clone() } @@ -254,52 +246,7 @@ impl LogicalSource { base: self.base.clone(), core, output_exprs: self.output_exprs.clone(), - } - } - - /// The columns in stream/batch source node indicate the actual columns it will produce, - /// instead of the columns defined in source catalog. The difference is generated columns. - #[must_use] - fn rewrite_to_stream_batch_source(&self) -> generic::Source { - let column_catalog = self.core.column_catalog.clone(); - // Filter out the generated columns. - let row_id_index = Self::rewrite_row_id_idx(&column_catalog, self.core.row_id_index); - let source_column_catalogs = column_catalog - .into_iter() - .filter(|c| !c.is_generated()) - .collect_vec(); - generic::Source { - catalog: self.core.catalog.clone(), - column_catalog: source_column_catalogs, - row_id_index, - ctx: self.core.ctx.clone(), - ..self.core - } - } - - fn wrap_with_optional_generated_columns_stream_proj( - &self, - input: Option, - ) -> Result { - if let Some(exprs) = &self.output_exprs { - let source: PlanRef = - dispatch_new_s3_plan(self.rewrite_to_stream_batch_source(), input); - let logical_project = generic::Project::new(exprs.to_vec(), source); - Ok(StreamProject::new(logical_project).into()) - } else { - let source = dispatch_new_s3_plan(self.core.clone(), input); - Ok(source) - } - } - - fn wrap_with_optional_generated_columns_batch_proj(&self) -> Result { - if let Some(exprs) = &self.output_exprs { - let source = BatchSource::new(self.rewrite_to_stream_batch_source()); - let logical_project = generic::Project::new(exprs.to_vec(), source.into()); - Ok(BatchProject::new(logical_project).into()) - } else { - let source = BatchSource::new(self.core.clone()); - Ok(source.into()) + output_row_id_index: self.output_row_id_index, } } } @@ -542,49 +489,62 @@ impl PredicatePushdown for LogicalSource { impl ToBatch for LogicalSource { fn to_batch(&self) -> Result { - if self.core.catalog.is_some() - && ConnectorProperties::is_new_fs_connector_b_tree_map( - &self.core.catalog.as_ref().unwrap().with_properties, - ) - { - bail_not_implemented!("New S3 connector for batch"); + if self.core.is_new_fs_connector() { + bail_not_implemented!("New fs connector for batch"); } - let source = self.wrap_with_optional_generated_columns_batch_proj()?; - Ok(source) + let mut plan: PlanRef = BatchSource::new(self.core.clone()).into(); + + if let Some(exprs) = &self.output_exprs { + let logical_project = generic::Project::new(exprs.to_vec(), plan); + plan = BatchProject::new(logical_project).into(); + } + + Ok(plan) } } impl ToStream for LogicalSource { fn to_stream(&self, _ctx: &mut ToStreamContext) -> Result { - let mut plan_prefix: Option = None; let mut plan: PlanRef; - if self.core.catalog.is_some() - && ConnectorProperties::is_new_fs_connector_b_tree_map( - &self.core.catalog.as_ref().unwrap().with_properties, - ) - { - plan_prefix = Some(self.rewrite_new_s3_plan()?); - } - plan = if self.core.for_table { - dispatch_new_s3_plan(self.rewrite_to_stream_batch_source(), plan_prefix) - } else { - // Create MV on source. - self.wrap_with_optional_generated_columns_stream_proj(plan_prefix)? - }; + match self.core.kind { + SourceNodeKind::CreateTable | SourceNodeKind::CreateSourceWithStreamjob => { + // Note: for create table, row_id and generated columns is created in plan_root.gen_table_plan + if self.core.is_new_fs_connector() { + plan = Self::create_fs_list_plan(self.core.clone())?; + plan = StreamFsFetch::new(plan, self.core.clone()).into(); + } else { + plan = StreamSource::new(self.core.clone()).into() + } + } + SourceNodeKind::CreateMViewOrBatch => { + // Create MV on source. + if self.core.is_new_fs_connector() { + plan = Self::create_fs_list_plan(self.core.clone())?; + plan = StreamFsFetch::new(plan, self.core.clone()).into(); + } else { + plan = StreamSource::new(self.core.clone()).into() + } - if let Some(catalog) = self.source_catalog() - && !catalog.watermark_descs.is_empty() - && !self.core.for_table - { - plan = StreamWatermarkFilter::new(plan, catalog.watermark_descs.clone()).into(); - } + if let Some(exprs) = &self.output_exprs { + let logical_project = generic::Project::new(exprs.to_vec(), plan); + plan = StreamProject::new(logical_project).into(); + } + + if let Some(catalog) = self.source_catalog() + && !catalog.watermark_descs.is_empty() + { + plan = StreamWatermarkFilter::new(plan, catalog.watermark_descs.clone()).into(); + } - assert!(!(self.core.gen_row_id && self.core.for_table)); - if let Some(row_id_index) = self.core.row_id_index - && self.core.gen_row_id - { - plan = StreamRowIdGen::new_with_dist(plan, row_id_index, HashShard(vec![row_id_index])) - .into(); + if let Some(row_id_index) = self.output_row_id_index { + plan = StreamRowIdGen::new_with_dist( + plan, + row_id_index, + HashShard(vec![row_id_index]), + ) + .into(); + } + } } Ok(plan) } @@ -599,12 +559,3 @@ impl ToStream for LogicalSource { )) } } - -#[inline] -fn dispatch_new_s3_plan(source: generic::Source, input: Option) -> PlanRef { - if let Some(input) = input { - StreamFsFetch::new(input, source).into() - } else { - StreamSource::new(source).into() - } -} diff --git a/src/frontend/src/optimizer/plan_node/stream_cdc_table_scan.rs b/src/frontend/src/optimizer/plan_node/stream_cdc_table_scan.rs index 49b39ef627e7..bbce650a7cd4 100644 --- a/src/frontend/src/optimizer/plan_node/stream_cdc_table_scan.rs +++ b/src/frontend/src/optimizer/plan_node/stream_cdc_table_scan.rs @@ -255,6 +255,7 @@ impl StreamCdcTableScan { // The table desc used by backfill executor state_table: Some(catalog), cdc_table_desc: Some(self.core.cdc_table_desc.to_protobuf()), + rate_limit: self.base.ctx().overwrite_options().streaming_rate_limit, }); // plan: merge -> filter -> exchange(simple) -> stream_scan diff --git a/src/frontend/src/optimizer/rule/agg_call_merge_rule.rs b/src/frontend/src/optimizer/rule/agg_call_merge_rule.rs index 76f947830c18..6f2045b22066 100644 --- a/src/frontend/src/optimizer/rule/agg_call_merge_rule.rs +++ b/src/frontend/src/optimizer/rule/agg_call_merge_rule.rs @@ -22,9 +22,7 @@ pub struct AggCallMergeRule {} impl Rule for AggCallMergeRule { fn apply(&self, plan: PlanRef) -> Option { - let Some(agg) = plan.as_logical_agg() else { - return None; - }; + let agg = plan.as_logical_agg()?; let calls = agg.agg_calls(); let mut new_calls = Vec::with_capacity(calls.len()); diff --git a/src/frontend/src/planner/relation.rs b/src/frontend/src/planner/relation.rs index 42fdc83a3f93..b411277761e7 100644 --- a/src/frontend/src/planner/relation.rs +++ b/src/frontend/src/planner/relation.rs @@ -25,6 +25,7 @@ use crate::binder::{ BoundWindowTableFunction, Relation, WindowTableFunctionKind, }; use crate::expr::{Expr, ExprImpl, ExprType, FunctionCall, InputRef}; +use crate::optimizer::plan_node::generic::SourceNodeKind; use crate::optimizer::plan_node::{ LogicalApply, LogicalHopWindow, LogicalJoin, LogicalProject, LogicalScan, LogicalShare, LogicalSource, LogicalSysScan, LogicalTableFunction, LogicalValues, PlanRef, @@ -85,7 +86,12 @@ impl Planner { } pub(super) fn plan_source(&mut self, source: BoundSource) -> Result { - Ok(LogicalSource::with_catalog(Rc::new(source.catalog), false, self.ctx())?.into()) + Ok(LogicalSource::with_catalog( + Rc::new(source.catalog), + SourceNodeKind::CreateMViewOrBatch, + self.ctx(), + )? + .into()) } pub(super) fn plan_join(&mut self, join: BoundJoin) -> Result { diff --git a/src/jni_core/src/jvm_runtime.rs b/src/jni_core/src/jvm_runtime.rs index 8c3d3dcf17f3..d0193a7717d2 100644 --- a/src/jni_core/src/jvm_runtime.rs +++ b/src/jni_core/src/jvm_runtime.rs @@ -219,14 +219,14 @@ pub fn execute_with_jni_env( Ok(true) => env .exception_clear() .inspect_err(|e| { - tracing::warn!("Exception occurred but failed to clear: {:?}", e); + tracing::warn!(error = %e.as_report(), "Exception occurred but failed to clear"); }) .unwrap(), Ok(false) => { // No exception, do nothing } Err(e) => { - tracing::warn!("Failed to check exception: {:?}", e); + tracing::warn!(error = %e.as_report(), "Failed to check exception"); } } diff --git a/src/meta/model_v2/migration/src/m20230908_072257_init.rs b/src/meta/model_v2/migration/src/m20230908_072257_init.rs index bc9ce2b08c32..bf8cb8c0fc1e 100644 --- a/src/meta/model_v2/migration/src/m20230908_072257_init.rs +++ b/src/meta/model_v2/migration/src/m20230908_072257_init.rs @@ -703,11 +703,12 @@ impl MigrationTrait for Migration { .table(Function::Table) .col(ColumnDef::new(Function::FunctionId).integer().primary_key()) .col(ColumnDef::new(Function::Name).string().not_null()) + .col(ColumnDef::new(Function::ArgNames).json().not_null()) .col(ColumnDef::new(Function::ArgTypes).json().not_null()) .col(ColumnDef::new(Function::ReturnType).json().not_null()) .col(ColumnDef::new(Function::Language).string().not_null()) - .col(ColumnDef::new(Function::Link).string().not_null()) - .col(ColumnDef::new(Function::Identifier).string().not_null()) + .col(ColumnDef::new(Function::Link).string()) + .col(ColumnDef::new(Function::Identifier).string()) .col(ColumnDef::new(Function::Body).string()) .col(ColumnDef::new(Function::Kind).string().not_null()) .foreign_key( @@ -1095,6 +1096,7 @@ enum Function { Table, FunctionId, Name, + ArgNames, ArgTypes, ReturnType, Language, diff --git a/src/meta/model_v2/src/function.rs b/src/meta/model_v2/src/function.rs index 5976685893af..ae68782a50fd 100644 --- a/src/meta/model_v2/src/function.rs +++ b/src/meta/model_v2/src/function.rs @@ -36,11 +36,13 @@ pub struct Model { #[sea_orm(primary_key, auto_increment = false)] pub function_id: FunctionId, pub name: String, + // encode Vec as comma separated string + pub arg_names: String, pub arg_types: DataTypeArray, pub return_type: DataType, pub language: String, - pub link: String, - pub identifier: String, + pub link: Option, + pub identifier: Option, pub body: Option, pub kind: FunctionKind, } @@ -90,6 +92,7 @@ impl From for ActiveModel { Self { function_id: Set(function.id as _), name: Set(function.name), + arg_names: Set(function.arg_names.join(",")), arg_types: Set(DataTypeArray(function.arg_types)), return_type: Set(DataType(function.return_type.unwrap())), language: Set(function.language), diff --git a/src/meta/node/Cargo.toml b/src/meta/node/Cargo.toml index 4c1237dc16d2..a799c99b98c8 100644 --- a/src/meta/node/Cargo.toml +++ b/src/meta/node/Cargo.toml @@ -41,6 +41,7 @@ sea-orm = { version = "0.12.0", features = [ ] } serde = { version = "1", features = ["derive"] } serde_json = "1" +thiserror-ext = { workspace = true } tokio = { version = "0.2", package = "madsim-tokio", features = [ "rt", "rt-multi-thread", diff --git a/src/meta/node/src/lib.rs b/src/meta/node/src/lib.rs index 46a9e94ed861..2989ceaa5f5c 100644 --- a/src/meta/node/src/lib.rs +++ b/src/meta/node/src/lib.rs @@ -291,6 +291,7 @@ pub fn start(opts: MetaNodeOpts) -> Pin + Send>> { hummock_version_checkpoint_interval_sec: config .meta .hummock_version_checkpoint_interval_sec, + enable_hummock_data_archive: config.meta.enable_hummock_data_archive, min_delta_log_num_for_hummock_version_checkpoint: config .meta .min_delta_log_num_for_hummock_version_checkpoint, diff --git a/src/meta/node/src/server.rs b/src/meta/node/src/server.rs index acad2cfe73c6..71486dbece5c 100644 --- a/src/meta/node/src/server.rs +++ b/src/meta/node/src/server.rs @@ -15,6 +15,7 @@ use std::sync::Arc; use std::time::Duration; +use anyhow::Context; use either::Either; use etcd_client::ConnectOptions; use futures::future::join_all; @@ -68,6 +69,7 @@ use risingwave_pb::meta::SystemParams; use risingwave_pb::user::user_service_server::UserServiceServer; use risingwave_rpc_client::ComputeClientPool; use sea_orm::{ConnectionTrait, DbBackend}; +use thiserror_ext::AsReport; use tokio::sync::oneshot::{channel as OneChannel, Receiver as OneReceiver}; use tokio::sync::watch; use tokio::sync::watch::{Receiver as WatchReceiver, Sender as WatchSender}; @@ -165,7 +167,7 @@ pub async fn rpc_serve( let client = EtcdClient::connect(endpoints.clone(), Some(options.clone()), auth_enabled) .await - .map_err(|e| anyhow::anyhow!("failed to connect etcd {}", e))?; + .context("failed to connect etcd")?; let meta_store = EtcdMetaStore::new(client).into_ref(); if election_client.is_none() { @@ -234,7 +236,7 @@ pub fn rpc_serve_with_store( .run_once(lease_interval_secs as i64, stop_rx.clone()) .await { - tracing::error!("election error happened, {}", e.to_string()); + tracing::error!(error = %e.as_report(), "election error happened"); } }); @@ -252,8 +254,8 @@ pub fn rpc_serve_with_store( tokio::select! { _ = svc_shutdown_rx_clone.changed() => return, res = is_leader_watcher.changed() => { - if let Err(err) = res { - tracing::error!("leader watcher recv failed {}", err.to_string()); + if res.is_err() { + tracing::error!("leader watcher recv failed"); } } } @@ -284,8 +286,8 @@ pub fn rpc_serve_with_store( return; } res = is_leader_watcher.changed() => { - if let Err(err) = res { - tracing::error!("leader watcher recv failed {}", err.to_string()); + if res.is_err() { + tracing::error!("leader watcher recv failed"); } } } @@ -771,13 +773,13 @@ pub async fn start_service_as_election_leader( match tokio::time::timeout(Duration::from_secs(1), join_all(handles)).await { Ok(results) => { for result in results { - if let Err(err) = result { - tracing::warn!("Failed to join shutdown: {:?}", err); + if result.is_err() { + tracing::warn!("Failed to join shutdown"); } } } - Err(e) => { - tracing::warn!("Join shutdown timeout: {:?}", e); + Err(_e) => { + tracing::warn!("Join shutdown timeout"); } } }; diff --git a/src/meta/service/Cargo.toml b/src/meta/service/Cargo.toml index c9b6619565cd..2ba993d7eab5 100644 --- a/src/meta/service/Cargo.toml +++ b/src/meta/service/Cargo.toml @@ -35,6 +35,7 @@ sea-orm = { version = "0.12.0", features = [ "macros", ] } sync-point = { path = "../../utils/sync-point" } +thiserror-ext = { workspace = true } tokio = { version = "0.2", package = "madsim-tokio", features = [ "rt", "rt-multi-thread", diff --git a/src/meta/service/src/cloud_service.rs b/src/meta/service/src/cloud_service.rs index 3b490ef7b37d..2ee28b1427ed 100644 --- a/src/meta/service/src/cloud_service.rs +++ b/src/meta/service/src/cloud_service.rs @@ -22,13 +22,14 @@ use risingwave_connector::source::kafka::private_link::insert_privatelink_broker use risingwave_connector::source::{ ConnectorProperties, SourceEnumeratorContext, SourceProperties, SplitEnumerator, }; -use risingwave_meta::manager::MetadataManager; +use risingwave_meta::manager::{ConnectionId, MetadataManager}; use risingwave_pb::catalog::connection::Info::PrivateLinkService; use risingwave_pb::cloud_service::cloud_service_server::CloudService; use risingwave_pb::cloud_service::rw_cloud_validate_source_response::{Error, ErrorType}; use risingwave_pb::cloud_service::{ RwCloudValidateSourceRequest, RwCloudValidateSourceResponse, SourceType, }; +use thiserror_ext::AsReport; use tonic::{Request, Response, Status}; use crate::rpc::cloud_provider::AwsEc2Client; @@ -77,8 +78,11 @@ impl CloudService for CloudServiceImpl { // if connection_id provided, check whether endpoint service is available and resolve // broker rewrite map currently only support aws privatelink connection if let Some(connection_id_str) = source_cfg.get("connection.id") { - let connection_id = connection_id_str.parse().map_err(|e| { - Status::invalid_argument(format!("connection.id is not an integer: {}", e)) + let connection_id = connection_id_str.parse::().map_err(|e| { + Status::invalid_argument(format!( + "connection.id is not an integer: {}", + e.as_report() + )) })?; let connection = match &self.metadata_manager { @@ -97,7 +101,7 @@ impl CloudService for CloudServiceImpl { if let Err(e) = connection { return Ok(new_rwc_validate_fail_response( ErrorType::PrivatelinkConnectionNotFound, - e.to_string(), + e.to_report_string(), )); } if let Some(PrivateLinkService(service)) = connection.unwrap().info { @@ -115,7 +119,7 @@ impl CloudService for CloudServiceImpl { Err(e) => { return Ok(new_rwc_validate_fail_response( ErrorType::PrivatelinkUnavailable, - e.to_string(), + e.to_report_string(), )); } Ok(false) => { diff --git a/src/meta/service/src/cluster_service.rs b/src/meta/service/src/cluster_service.rs index e94c40fe5271..2d2c9751689a 100644 --- a/src/meta/service/src/cluster_service.rs +++ b/src/meta/service/src/cluster_service.rs @@ -22,6 +22,7 @@ use risingwave_pb::meta::{ ListAllNodesResponse, UpdateWorkerNodeSchedulabilityRequest, UpdateWorkerNodeSchedulabilityResponse, }; +use thiserror_ext::AsReport; use tonic::{Request, Response, Status}; use crate::MetaError; @@ -64,7 +65,7 @@ impl ClusterService for ClusterServiceImpl { return Ok(Response::new(AddWorkerNodeResponse { status: Some(risingwave_pb::common::Status { code: risingwave_pb::common::status::Code::UnknownWorker as i32, - message: format!("{}", e), + message: e.to_report_string(), }), node_id: None, })); diff --git a/src/meta/service/src/heartbeat_service.rs b/src/meta/service/src/heartbeat_service.rs index 7770acc06db5..d2c40b7b4ddb 100644 --- a/src/meta/service/src/heartbeat_service.rs +++ b/src/meta/service/src/heartbeat_service.rs @@ -16,6 +16,7 @@ use itertools::Itertools; use risingwave_meta::manager::MetadataManager; use risingwave_pb::meta::heartbeat_service_server::HeartbeatService; use risingwave_pb::meta::{HeartbeatRequest, HeartbeatResponse}; +use thiserror_ext::AsReport; use tonic::{Request, Response, Status}; #[derive(Clone)] @@ -58,7 +59,7 @@ impl HeartbeatService for HeartbeatServiceImpl { return Ok(Response::new(HeartbeatResponse { status: Some(risingwave_pb::common::Status { code: risingwave_pb::common::status::Code::UnknownWorker as i32, - message: format!("{}", e), + message: e.to_report_string(), }), })); } diff --git a/src/meta/service/src/hummock_service.rs b/src/meta/service/src/hummock_service.rs index a082b723dd12..953bd93b1e75 100644 --- a/src/meta/service/src/hummock_service.rs +++ b/src/meta/service/src/hummock_service.rs @@ -24,6 +24,7 @@ use risingwave_pb::hummock::get_compaction_score_response::PickerInfo; use risingwave_pb::hummock::hummock_manager_service_server::HummockManagerService; use risingwave_pb::hummock::subscribe_compaction_event_request::Event as RequestEvent; use risingwave_pb::hummock::*; +use thiserror_ext::AsReport; use tonic::{Request, Response, Status, Streaming}; use crate::hummock::compaction::selector::ManualCompactionOption; @@ -308,7 +309,7 @@ impl HummockManagerService for HummockServiceImpl { tracing::info!("Full GC results {} SSTs to delete", number); } Err(e) => { - tracing::warn!("Full GC SST failed: {:#?}", e); + tracing::warn!(error = %e.as_report(), "Full GC SST failed"); } } }); diff --git a/src/meta/src/backup_restore/backup_manager.rs b/src/meta/src/backup_restore/backup_manager.rs index 5392e5ef8699..d6e90f0b5c7d 100644 --- a/src/meta/src/backup_restore/backup_manager.rs +++ b/src/meta/src/backup_restore/backup_manager.rs @@ -28,6 +28,7 @@ use risingwave_object_store::object::build_remote_object_store; use risingwave_object_store::object::object_metrics::ObjectStoreMetrics; use risingwave_pb::backup_service::{BackupJobStatus, MetaBackupManifestId}; use risingwave_pb::meta::subscribe_response::{Info, Operation}; +use thiserror_ext::AsReport; use tokio::task::JoinHandle; use crate::backup_restore::meta_snapshot_builder; @@ -134,10 +135,10 @@ impl BackupManager { if let Err(e) = self.set_store(new_config.clone()).await { // Retry is driven by periodic system params notification. tracing::warn!( - "failed to apply new backup config: url={}, dir={}, {:#?}", - new_config.0, - new_config.1, - e + url = &new_config.0, + dir = &new_config.1, + error = %e.as_report(), + "failed to apply new backup config", ); } } @@ -269,7 +270,7 @@ impl BackupManager { } BackupJobResult::Failed(e) => { self.metrics.job_latency_failure.observe(job_latency); - let message = format!("failed backup job {}: {}", job_id, e); + let message = format!("failed backup job {}: {}", job_id, e.as_report()); tracing::warn!(message); self.latest_job_info .store(Arc::new((job_id, BackupJobStatus::Failed, message))); diff --git a/src/meta/src/backup_restore/restore.rs b/src/meta/src/backup_restore/restore.rs index 084aa8ad7192..250477ee3041 100644 --- a/src/meta/src/backup_restore/restore.rs +++ b/src/meta/src/backup_restore/restore.rs @@ -24,6 +24,7 @@ use risingwave_hummock_sdk::version_checkpoint_path; use risingwave_object_store::object::build_remote_object_store; use risingwave_object_store::object::object_metrics::ObjectStoreMetrics; use risingwave_pb::hummock::PbHummockVersionCheckpoint; +use thiserror_ext::AsReport; use crate::backup_restore::restore_impl::v1::{LoaderV1, WriterModelV1ToMetaStoreV1}; use crate::backup_restore::restore_impl::v2::{LoaderV2, WriterModelV2ToMetaStoreV2}; @@ -193,7 +194,7 @@ pub async fn restore(opts: RestoreOpts) -> BackupResult<()> { tracing::info!("command succeeded"); } Err(e) => { - tracing::warn!("command failed: {}", e); + tracing::warn!(error = %e.as_report(), "command failed"); } } result diff --git a/src/meta/src/backup_restore/utils.rs b/src/meta/src/backup_restore/utils.rs index 3e70d4d9aaae..0e40085a2b97 100644 --- a/src/meta/src/backup_restore/utils.rs +++ b/src/meta/src/backup_restore/utils.rs @@ -15,6 +15,7 @@ use std::sync::Arc; use std::time::Duration; +use anyhow::Context; use etcd_client::ConnectOptions; use risingwave_backup::error::BackupResult; use risingwave_backup::storage::{MetaSnapshotStorageRef, ObjectStoreMetaSnapshotStorage}; @@ -74,7 +75,7 @@ pub async fn get_meta_store(opts: RestoreOpts) -> BackupResult Ok(MetaStoreBackendImpl::Mem(MemStore::new())), diff --git a/src/meta/src/barrier/command.rs b/src/meta/src/barrier/command.rs index ae75453d8015..de88c1ae1760 100644 --- a/src/meta/src/barrier/command.rs +++ b/src/meta/src/barrier/command.rs @@ -35,6 +35,7 @@ use risingwave_pb::stream_plan::{ UpdateMutation, }; use risingwave_pb::stream_service::{DropActorsRequest, WaitEpochCommitRequest}; +use thiserror_ext::AsReport; use uuid::Uuid; use super::info::{ActorDesc, CommandActorChanges, InflightActorInfo}; @@ -857,7 +858,7 @@ impl CommandContext { let table_id = table_fragments.table_id().table_id; tracing::warn!( table_id, - reason=?e, + error = %e.as_report(), "cancel_create_table_procedure failed for CancelStreamingJob", ); // If failed, check that table is not in meta store. diff --git a/src/meta/src/barrier/mod.rs b/src/meta/src/barrier/mod.rs index f91262117be1..088456004539 100644 --- a/src/meta/src/barrier/mod.rs +++ b/src/meta/src/barrier/mod.rs @@ -37,6 +37,7 @@ use risingwave_pb::meta::subscribe_response::{Info, Operation}; use risingwave_pb::meta::PausedReason; use risingwave_pb::stream_plan::barrier::BarrierKind; use risingwave_pb::stream_service::BarrierCompleteResponse; +use thiserror_ext::AsReport; use tokio::sync::oneshot::{Receiver, Sender}; use tokio::sync::Mutex; use tokio::task::JoinHandle; @@ -668,7 +669,7 @@ impl GlobalBarrierManager { // back to frontend fail_point!("inject_barrier_err_success"); let fail_node = self.checkpoint_control.barrier_failed(); - tracing::warn!("Failed to complete epoch {}: {:?}", prev_epoch, err); + tracing::warn!(%prev_epoch, error = %err.as_report(), "Failed to complete epoch"); self.failure_recovery(err, fail_node).await; return; } @@ -693,7 +694,7 @@ impl GlobalBarrierManager { .drain(index..) .chain(self.checkpoint_control.barrier_failed().into_iter()) .collect_vec(); - tracing::warn!("Failed to commit epoch {}: {:?}", prev_epoch, err); + tracing::warn!(%prev_epoch, error = %err.as_report(), "Failed to commit epoch"); self.failure_recovery(err, fail_nodes).await; } } @@ -728,7 +729,7 @@ impl GlobalBarrierManager { let prev_epoch = TracedEpoch::new(latest_snapshot.committed_epoch.into()); // we can only recovery from the committed epoch let span = tracing::info_span!( "failure_recovery", - %err, + error = %err.as_report(), prev_epoch = prev_epoch.value().0 ); @@ -741,7 +742,7 @@ impl GlobalBarrierManager { .await; self.context.set_status(BarrierManagerStatus::Running).await; } else { - panic!("failed to execute barrier: {:?}", err); + panic!("failed to execute barrier: {}", err.as_report()); } } diff --git a/src/meta/src/barrier/recovery.rs b/src/meta/src/barrier/recovery.rs index d9a8b5822645..e0ace5f9678a 100644 --- a/src/meta/src/barrier/recovery.rs +++ b/src/meta/src/barrier/recovery.rs @@ -16,7 +16,7 @@ use std::collections::{BTreeSet, HashMap, HashSet}; use std::sync::Arc; use std::time::{Duration, Instant}; -use anyhow::anyhow; +use anyhow::{anyhow, Context}; use futures::future::try_join_all; use futures::stream::FuturesUnordered; use futures::TryStreamExt; @@ -31,6 +31,7 @@ use risingwave_pb::stream_plan::AddMutation; use risingwave_pb::stream_service::{ BroadcastActorInfoTableRequest, BuildActorsRequest, ForceStopActorsRequest, UpdateActorsRequest, }; +use thiserror_ext::AsReport; use tokio::sync::oneshot; use tokio_retry::strategy::{jitter, ExponentialBackoff}; use tracing::{debug, warn, Instrument}; @@ -197,9 +198,7 @@ impl GlobalBarrierManagerContext { tokio::spawn(async move { let res: MetaResult<()> = try { tracing::debug!("recovering stream job {}", table.id); - finished - .await - .map_err(|e| anyhow!("failed to finish command: {}", e))?; + finished.await.context("failed to finish command")?; tracing::debug!("finished stream job {}", table.id); // Once notified that job is finished we need to notify frontend. @@ -212,8 +211,9 @@ impl GlobalBarrierManagerContext { }; if let Err(e) = res.as_ref() { tracing::error!( - "stream job {} interrupted, will retry after recovery: {e:?}", - table.id + id = table.id, + error = %e.as_report(), + "stream job interrupted, will retry after recovery", ); // NOTE(kwannoel): We should not cleanup stream jobs, // we don't know if it's just due to CN killed, @@ -283,16 +283,15 @@ impl GlobalBarrierManagerContext { tokio::spawn(async move { let res: MetaResult<()> = try { tracing::debug!("recovering stream job {}", id); - finished - .await - .map_err(|e| anyhow!("failed to finish command: {}", e))?; - tracing::debug!("finished stream job {}", id); + finished.await.ok().context("failed to finish command")?; + tracing::debug!(id, "finished stream job"); catalog_controller.finish_streaming_job(id).await?; }; if let Err(e) = &res { tracing::error!( - "stream job {} interrupted, will retry after recovery: {e:?}", - id + id, + error = %e.as_report(), + "stream job interrupted, will retry after recovery", ); // NOTE(kwannoel): We should not cleanup stream jobs, // we don't know if it's just due to CN killed, @@ -354,7 +353,7 @@ impl GlobalBarrierManagerContext { let mut info = if self.env.opts.enable_scale_in_when_recovery { let info = self.resolve_actor_info().await; let scaled = self.scale_actors(&info).await.inspect_err(|err| { - warn!(err = ?err, "scale actors failed"); + warn!(error = %err.as_report(), "scale actors failed"); })?; if scaled { self.resolve_actor_info().await @@ -364,13 +363,13 @@ impl GlobalBarrierManagerContext { } else { // Migrate actors in expired CN to newly joined one. self.migrate_actors().await.inspect_err(|err| { - warn!(err = ?err, "migrate actors failed"); + warn!(error = %err.as_report(), "migrate actors failed"); })? }; // Reset all compute nodes, stop and drop existing actors. self.reset_compute_nodes(&info).await.inspect_err(|err| { - warn!(err = ?err, "reset compute nodes failed"); + warn!(error = %err.as_report(), "reset compute nodes failed"); })?; if scheduled_barriers.pre_apply_drop_scheduled().await { @@ -379,10 +378,10 @@ impl GlobalBarrierManagerContext { // update and build all actors. self.update_actors(&info).await.inspect_err(|err| { - warn!(err = ?err, "update actors failed"); + warn!(error = %err.as_report(), "update actors failed"); })?; self.build_actors(&info).await.inspect_err(|err| { - warn!(err = ?err, "build_actors failed"); + warn!(error = %err.as_report(), "build_actors failed"); })?; // get split assignments for all actors @@ -424,14 +423,14 @@ impl GlobalBarrierManagerContext { let res = match await_barrier_complete.await.result { Ok(response) => { if let Err(err) = command_ctx.post_collect().await { - warn!(err = ?err, "post_collect failed"); + warn!(error = %err.as_report(), "post_collect failed"); Err(err) } else { Ok((new_epoch.clone(), response)) } } Err(err) => { - warn!(err = ?err, "inject_barrier failed"); + warn!(error = %err.as_report(), "inject_barrier failed"); Err(err) } }; @@ -674,8 +673,8 @@ impl GlobalBarrierManagerContext { .await { tracing::error!( - "failed to apply reschedule for offline scaling in recovery: {}", - e.to_string() + error = %e.as_report(), + "failed to apply reschedule for offline scaling in recovery", ); mgr.fragment_manager diff --git a/src/meta/src/barrier/rpc.rs b/src/meta/src/barrier/rpc.rs index b9661a37d8e8..55c9fce4c408 100644 --- a/src/meta/src/barrier/rpc.rs +++ b/src/meta/src/barrier/rpc.rs @@ -98,9 +98,9 @@ impl GlobalBarrierManagerContext { } rx.map(move |result| match result { Ok(completion) => completion, - Err(e) => BarrierCompletion { + Err(_e) => BarrierCompletion { prev_epoch, - result: Err(anyhow!("failed to receive barrier completion result: {:?}", e).into()), + result: Err(anyhow!("failed to receive barrier completion result").into()), }, }) } diff --git a/src/meta/src/barrier/schedule.rs b/src/meta/src/barrier/schedule.rs index aab3234d620c..26fd3ea8143e 100644 --- a/src/meta/src/barrier/schedule.rs +++ b/src/meta/src/barrier/schedule.rs @@ -18,7 +18,7 @@ use std::sync::atomic::{AtomicBool, AtomicUsize, Ordering}; use std::sync::Arc; use std::time::Instant; -use anyhow::anyhow; +use anyhow::{anyhow, Context}; use assert_matches::assert_matches; use risingwave_common::catalog::TableId; use risingwave_pb::hummock::HummockSnapshot; @@ -279,20 +279,17 @@ impl BarrierScheduler { for (injected_rx, collect_rx, finish_rx) in contexts { // Wait for this command to be injected, and record the result. - let info = injected_rx - .await - .map_err(|e| anyhow!("failed to inject barrier: {}", e))?; + let info = injected_rx.await.ok().context("failed to inject barrier")?; infos.push(info); // Throw the error if it occurs when collecting this barrier. collect_rx .await - .map_err(|e| anyhow!("failed to collect barrier: {}", e))??; + .ok() + .context("failed to collect barrier")??; // Wait for this command to be finished. - finish_rx - .await - .map_err(|e| anyhow!("failed to finish command: {}", e))?; + finish_rx.await.ok().context("failed to finish command")?; } Ok(infos) diff --git a/src/meta/src/controller/cluster.rs b/src/meta/src/controller/cluster.rs index f8b1490e8006..042993bd5bd4 100644 --- a/src/meta/src/controller/cluster.rs +++ b/src/meta/src/controller/cluster.rs @@ -42,6 +42,7 @@ use sea_orm::{ ActiveModelTrait, ColumnTrait, DatabaseConnection, EntityTrait, QueryFilter, QuerySelect, TransactionTrait, }; +use thiserror_ext::AsReport; use tokio::sync::oneshot::Sender; use tokio::sync::{RwLock, RwLockReadGuard}; use tokio::task::JoinHandle; @@ -268,7 +269,7 @@ impl ClusterController { { Ok(keys) => keys, Err(err) => { - tracing::warn!("Failed to load expire worker info from db: {}", err); + tracing::warn!(error = %err.as_report(), "Failed to load expire worker info from db"); continue; } }; @@ -278,7 +279,7 @@ impl ClusterController { .exec(&inner.db) .await { - tracing::warn!("Failed to delete expire workers from db: {}", err); + tracing::warn!(error = %err.as_report(), "Failed to delete expire workers from db"); continue; } diff --git a/src/meta/src/controller/mod.rs b/src/meta/src/controller/mod.rs index 8128981b1228..562dd1845376 100644 --- a/src/meta/src/controller/mod.rs +++ b/src/meta/src/controller/mod.rs @@ -274,6 +274,12 @@ impl From> for PbFunction { database_id: value.1.database_id.unwrap() as _, name: value.0.name, owner: value.1.owner_id as _, + arg_names: value + .0 + .arg_names + .split(',') + .map(|s| s.to_string()) + .collect(), arg_types: value.0.arg_types.into_inner(), return_type: Some(value.0.return_type.into_inner()), language: value.0.language, diff --git a/src/meta/src/controller/system_param.rs b/src/meta/src/controller/system_param.rs index e483645ead93..bb8f51479856 100644 --- a/src/meta/src/controller/system_param.rs +++ b/src/meta/src/controller/system_param.rs @@ -195,15 +195,21 @@ impl SystemParamsController { }; let mut params = params_guard.clone(); let mut param: system_parameter::ActiveModel = param.into(); - param.value = - Set(set_system_param(&mut params, name, value).map_err(MetaError::system_params)?); + let Some((new_value, diff)) = + set_system_param(&mut params, name, value).map_err(MetaError::system_params)? + else { + // No changes on the parameter. + return Ok(params); + }; + + param.value = Set(new_value); param.update(&self.db).await?; *params_guard = params.clone(); - // TODO: check if the parameter is actually changed. - // Run common handler. - self.common_handler.handle_change(params.clone().into()); + self.common_handler.handle_change(&diff); + + // TODO: notify the diff instead of the snapshot. // Sync params to other managers on the meta node only once, since it's infallible. self.notification_manager diff --git a/src/meta/src/hummock/manager/checkpoint.rs b/src/meta/src/hummock/manager/checkpoint.rs index 25854428c695..81fc47330ef2 100644 --- a/src/meta/src/hummock/manager/checkpoint.rs +++ b/src/meta/src/hummock/manager/checkpoint.rs @@ -22,9 +22,9 @@ use risingwave_hummock_sdk::compaction_group::hummock_version_ext::{ object_size_map, summarize_group_deltas, }; use risingwave_hummock_sdk::version::HummockVersion; -use risingwave_hummock_sdk::HummockSstableObjectId; +use risingwave_hummock_sdk::HummockVersionId; use risingwave_pb::hummock::hummock_version_checkpoint::{PbStaleObjects, StaleObjects}; -use risingwave_pb::hummock::PbHummockVersionCheckpoint; +use risingwave_pb::hummock::{PbHummockVersionArchive, PbHummockVersionCheckpoint}; use crate::hummock::error::Result; use crate::hummock::manager::{read_lock, write_lock}; @@ -34,7 +34,7 @@ use crate::hummock::HummockManager; #[derive(Default)] pub struct HummockVersionCheckpoint { pub version: HummockVersion, - pub stale_objects: HashMap, + pub stale_objects: HashMap, } impl HummockVersionCheckpoint { @@ -44,7 +44,7 @@ impl HummockVersionCheckpoint { stale_objects: checkpoint .stale_objects .iter() - .map(|(object_id, objects)| (*object_id as HummockSstableObjectId, objects.clone())) + .map(|(version_id, objects)| (*version_id as HummockVersionId, objects.clone())) .collect(), } } @@ -92,6 +92,21 @@ impl HummockManager { Ok(()) } + pub(super) async fn write_version_archive( + &self, + archive: &PbHummockVersionArchive, + ) -> Result<()> { + use prost::Message; + let buf = archive.encode_to_vec(); + let archive_path = format!( + "{}/{}", + self.version_archive_dir, + archive.version.as_ref().unwrap().id + ); + self.object_store.upload(&archive_path, buf.into()).await?; + Ok(()) + } + /// Creates a hummock version checkpoint. /// Returns the diff between new and old checkpoint id. /// Note that this method must not be called concurrently, because internally it doesn't hold @@ -109,37 +124,49 @@ impl HummockManager { if new_checkpoint_id < old_checkpoint_id + min_delta_log_num { return Ok(0); } + let mut archive: Option = None; let mut stale_objects = old_checkpoint.stale_objects.clone(); - // `object_sizes` is used to calculate size of stale objects. - let mut object_sizes = object_size_map(&old_checkpoint.version); - for (_, version_delta) in versioning - .hummock_version_deltas - .range((Excluded(old_checkpoint_id), Included(new_checkpoint_id))) - { - for group_deltas in version_delta.group_deltas.values() { - let summary = summarize_group_deltas(group_deltas); - object_sizes.extend( - summary - .insert_table_infos - .iter() - .map(|t| (t.object_id, t.file_size)), + if !self.env.opts.enable_hummock_data_archive { + // `object_sizes` is used to calculate size of stale objects. + let mut object_sizes = object_size_map(&old_checkpoint.version); + for (_, version_delta) in versioning + .hummock_version_deltas + .range((Excluded(old_checkpoint_id), Included(new_checkpoint_id))) + { + for group_deltas in version_delta.group_deltas.values() { + let summary = summarize_group_deltas(group_deltas); + object_sizes.extend( + summary + .insert_table_infos + .iter() + .map(|t| (t.object_id, t.file_size)), + ); + } + let removed_object_ids = version_delta.gc_object_ids.clone(); + if removed_object_ids.is_empty() { + continue; + } + let total_file_size = removed_object_ids + .iter() + .map(|t| object_sizes.get(t).copied().unwrap()) + .sum::(); + stale_objects.insert( + version_delta.id, + StaleObjects { + id: removed_object_ids, + total_file_size, + }, ); } - let removed_object_ids = version_delta.gc_object_ids.clone(); - if removed_object_ids.is_empty() { - continue; - } - let total_file_size = removed_object_ids - .iter() - .map(|t| object_sizes.get(t).copied().unwrap()) - .sum::(); - stale_objects.insert( - version_delta.id, - StaleObjects { - id: removed_object_ids, - total_file_size, - }, - ); + } else { + archive = Some(PbHummockVersionArchive { + version: Some(old_checkpoint.version.to_protobuf()), + version_deltas: versioning + .hummock_version_deltas + .range((Excluded(old_checkpoint_id), Included(new_checkpoint_id))) + .map(|(_, version_delta)| version_delta.to_protobuf()) + .collect(), + }); } let new_checkpoint = HummockVersionCheckpoint { version: current_version.clone(), @@ -148,6 +175,14 @@ impl HummockManager { drop(versioning_guard); // 2. persist the new checkpoint without holding lock self.write_checkpoint(&new_checkpoint).await?; + if let Some(archive) = archive { + if let Err(e) = self.write_version_archive(&archive).await { + tracing::warn!( + "failed to write version archive {}, {e}", + archive.version.as_ref().unwrap().id + ); + } + } // 3. hold write lock and update in memory state let mut versioning_guard = write_lock!(self, versioning).await; let versioning = versioning_guard.deref_mut(); diff --git a/src/meta/src/hummock/manager/compaction_group_manager.rs b/src/meta/src/hummock/manager/compaction_group_manager.rs index ab569f5ed468..51f8e63dd6eb 100644 --- a/src/meta/src/hummock/manager/compaction_group_manager.rs +++ b/src/meta/src/hummock/manager/compaction_group_manager.rs @@ -34,6 +34,7 @@ use risingwave_pb::hummock::{ compact_task, CompactionConfig, CompactionGroupInfo, CompatibilityVersion, GroupConstruct, GroupDelta, GroupDestroy, GroupMetaChange, GroupTableChange, }; +use thiserror_ext::AsReport; use tokio::sync::{OnceCell, RwLock}; use tracing::warn; @@ -397,7 +398,9 @@ impl HummockManager { pub async fn unregister_table_ids_fail_fast(&self, table_ids: &[StateTableId]) { self.unregister_table_ids(table_ids) .await - .unwrap_or_else(|e| panic!("unregister table ids fail: {table_ids:?} {e}")); + .unwrap_or_else(|e| { + panic!("unregister table ids fail: {table_ids:?} {}", e.as_report()) + }); } pub async fn update_compaction_config( diff --git a/src/meta/src/hummock/manager/gc.rs b/src/meta/src/hummock/manager/gc.rs index e17f90aaad17..a9b4b04c35f4 100644 --- a/src/meta/src/hummock/manager/gc.rs +++ b/src/meta/src/hummock/manager/gc.rs @@ -17,6 +17,7 @@ use std::collections::HashSet; use std::ops::DerefMut; use std::time::Duration; +use anyhow::Context; use function_name::named; use futures::{stream, StreamExt}; use itertools::Itertools; @@ -266,8 +267,7 @@ pub async fn collect_global_gc_watermark( } let mut buffered = stream::iter(worker_futures).buffer_unordered(workers.len()); while let Some(worker_result) = buffered.next().await { - let worker_watermark = worker_result - .map_err(|e| anyhow::anyhow!("Failed to collect GC watermark: {:#?}", e))?; + let worker_watermark = worker_result.context("Failed to collect GC watermark")?; // None means either the worker has gone or the worker has not set a watermark. global_watermark = cmp::min( global_watermark, diff --git a/src/meta/src/hummock/manager/mod.rs b/src/meta/src/hummock/manager/mod.rs index 57903711ec95..aaeb7b3d105f 100644 --- a/src/meta/src/hummock/manager/mod.rs +++ b/src/meta/src/hummock/manager/mod.rs @@ -19,6 +19,7 @@ use std::sync::atomic::AtomicBool; use std::sync::{Arc, LazyLock}; use std::time::{Duration, Instant, SystemTime}; +use anyhow::Context; use arc_swap::ArcSwap; use bytes::Bytes; use fail::fail_point; @@ -41,9 +42,9 @@ use risingwave_hummock_sdk::compaction_group::hummock_version_ext::{ }; use risingwave_hummock_sdk::version::HummockVersionDelta; use risingwave_hummock_sdk::{ - version_checkpoint_path, CompactionGroupId, ExtendedSstableInfo, HummockCompactionTaskId, - HummockContextId, HummockEpoch, HummockSstableId, HummockSstableObjectId, HummockVersionId, - SstObjectIdRange, INVALID_VERSION_ID, + version_archive_dir, version_checkpoint_path, CompactionGroupId, ExtendedSstableInfo, + HummockCompactionTaskId, HummockContextId, HummockEpoch, HummockSstableId, + HummockSstableObjectId, HummockVersionId, SstObjectIdRange, INVALID_VERSION_ID, }; use risingwave_meta_model_v2::{ compaction_status, compaction_task, hummock_pinned_snapshot, hummock_pinned_version, @@ -143,6 +144,7 @@ pub struct HummockManager { object_store: ObjectStoreRef, version_checkpoint_path: String, + version_archive_dir: String, pause_version_checkpoint: AtomicBool, history_table_throughput: parking_lot::RwLock>>, @@ -383,7 +385,8 @@ impl HummockManager { } } } - let checkpoint_path = version_checkpoint_path(state_store_dir); + let version_checkpoint_path = version_checkpoint_path(state_store_dir); + let version_archive_dir = version_archive_dir(state_store_dir); let (tx, rx) = tokio::sync::mpsc::unbounded_channel(); let instance = HummockManager { @@ -407,7 +410,8 @@ impl HummockManager { }), event_sender: tx, object_store, - version_checkpoint_path: checkpoint_path, + version_checkpoint_path, + version_archive_dir, pause_version_checkpoint: AtomicBool::new(false), history_table_throughput: parking_lot::RwLock::new(HashMap::default()), compactor_streams_change_tx, @@ -2034,9 +2038,9 @@ impl HummockManager { Ok(_) => true, Err(e) => { tracing::error!( - "failed to send compaction request for compaction group {}. {}", + error = %e.as_report(), + "failed to send compaction request for compaction group {}", compaction_group, - e ); false } @@ -2078,27 +2082,28 @@ impl HummockManager { .into()); } Err(err) => { - tracing::warn!("Failed to get compaction task: {:#?}.", err); - return Err(anyhow::anyhow!( - "Failed to get compaction task: {:#?} compaction_group {}", - err, - compaction_group - ) - .into()); + tracing::warn!(error = %err.as_report(), "Failed to get compaction task"); + + return Err(anyhow::anyhow!(err) + .context(format!( + "Failed to get compaction task for compaction_group {}", + compaction_group, + )) + .into()); } }; // 3. send task to compactor let compact_task_string = compact_task_to_string(&compact_task); - if let Err(e) = compactor.send_event(ResponseEvent::CompactTask(compact_task)) { - // TODO: shall we need to cancel on meta ? - return Err(anyhow::anyhow!( - "Failed to trigger compaction task: {:#?} compaction_group {}", - e, - compaction_group - ) - .into()); - } + // TODO: shall we need to cancel on meta ? + compactor + .send_event(ResponseEvent::CompactTask(compact_task)) + .with_context(|| { + format!( + "Failed to trigger compaction task for compaction_group {}", + compaction_group, + ) + })?; tracing::info!( "Trigger manual compaction task. {}. cost time: {:?}", @@ -2419,8 +2424,12 @@ impl HummockManager { ) .await { - tracing::error!("Attempt to remove compaction task due to elapsed heartbeat failed. We will continue to track its heartbeat - until we can successfully report its status. task_id: {}, ERR: {e:?}", task.task_id); + tracing::error!( + task_id = task.task_id, + error = %e.as_report(), + "Attempt to remove compaction task due to elapsed heartbeat failed. We will continue to track its heartbeat + until we can successfully report its status", + ); } } } @@ -2742,7 +2751,7 @@ impl HummockManager { } (Some(Err(err)), _stream) => { - tracing::warn!("compactor {} leaving the cluster with err {:?}", context_id, err); + tracing::warn!(error = %err.as_report(), "compactor {} leaving the cluster with err", context_id); hummock_manager.compactor_manager .remove_compactor(context_id); continue @@ -2812,10 +2821,10 @@ impl HummockManager { ResponseEvent::CompactTask(compact_task) ) { tracing::warn!( - "Failed to send task {} to {}. {:#?}", + error = %e.as_report(), + "Failed to send task {} to {}", task_id, compactor.context_id(), - e ); compactor_alive = false; @@ -2828,7 +2837,7 @@ impl HummockManager { break; } Err(err) => { - tracing::warn!("Failed to get compaction task: {:#?}.", err); + tracing::warn!(error = %err.as_report(), "Failed to get compaction task"); break; } }; @@ -2839,9 +2848,9 @@ impl HummockManager { if compactor_alive { if let Err(e) = compactor.send_event(ResponseEvent::PullTaskAck(PullTaskAck {})){ tracing::warn!( - "Failed to send ask to {}. {:#?}", + error = %e.as_report(), + "Failed to send ask to {}", context_id, - e ); compactor_alive = false; @@ -2861,7 +2870,7 @@ impl HummockManager { }) => { if let Err(e) = hummock_manager.report_compact_task(task_id, TaskStatus::try_from(task_status).unwrap(), sorted_output_ssts, Some(table_stats_change)) .await { - tracing::error!("report compact_tack fail {e:?}"); + tracing::error!(error = %e.as_report(), "report compact_tack fail"); } }, @@ -2885,8 +2894,12 @@ impl HummockManager { .cancel_compact_task(task.task_id, TaskStatus::HeartbeatCanceled) .await { - tracing::error!("Attempt to remove compaction task due to elapsed heartbeat failed. We will continue to track its heartbeat - until we can successfully report its status. task_id: {}, ERR: {e:?}", task.task_id); + tracing::error!( + task_id = task.task_id, + error = %e.as_report(), + "Attempt to remove compaction task due to elapsed heartbeat failed. We will continue to track its heartbeat + until we can successfully report its status." + ); } if let Some(compactor) = compactor_manager.get_compactor(context_id) { @@ -2940,10 +2953,10 @@ impl HummockManager { for cg_id in self.compaction_group_ids().await { if let Err(e) = self.compaction_state.try_sched_compaction(cg_id, task_type) { tracing::warn!( - "Failed to schedule {:?} compaction for compaction group {}. {}", + error = %e.as_report(), + "Failed to schedule {:?} compaction for compaction group {}", task_type, cg_id, - e ); } } @@ -3062,10 +3075,10 @@ impl HummockManager { } Err(e) => { tracing::info!( - "failed to move state table [{}] from group-{} because {:?}", + error = %e.as_report(), + "failed to move state table [{}] from group-{}", table_id, parent_group_id, - e ) } } diff --git a/src/meta/src/hummock/manager/versioning.rs b/src/meta/src/hummock/manager/versioning.rs index bca0d13cfd24..5cf270076e6c 100644 --- a/src/meta/src/hummock/manager/versioning.rs +++ b/src/meta/src/hummock/manager/versioning.rs @@ -55,15 +55,12 @@ pub struct HummockVersionSafePoint { impl Drop for HummockVersionSafePoint { fn drop(&mut self) { - if let Err(e) = self + if self .event_sender .send(HummockManagerEvent::DropSafePoint(self.id)) + .is_err() { - tracing::debug!( - "failed to drop hummock version safe point {}. {}", - self.id, - e - ); + tracing::debug!("failed to drop hummock version safe point {}", self.id); } } } diff --git a/src/meta/src/hummock/manager/worker.rs b/src/meta/src/hummock/manager/worker.rs index e496a94e614f..549365607b9b 100644 --- a/src/meta/src/hummock/manager/worker.rs +++ b/src/meta/src/hummock/manager/worker.rs @@ -17,6 +17,7 @@ use std::time::Duration; use risingwave_hummock_sdk::HummockVersionId; use risingwave_pb::common::WorkerType; use sync_point::sync_point; +use thiserror_ext::AsReport; use tokio::task::JoinHandle; use tokio_retry::strategy::{jitter, ExponentialBackoff}; @@ -107,9 +108,9 @@ impl HummockManager { || async { if let Err(err) = self.release_contexts(vec![worker_node.id]).await { tracing::warn!( - "Failed to release hummock context {}. {}. Will retry.", + error = %err.as_report(), + "Failed to release hummock context {}, will retry", worker_node.id, - err ); return Err(err); } diff --git a/src/meta/src/hummock/mock_hummock_meta_client.rs b/src/meta/src/hummock/mock_hummock_meta_client.rs index fc77e9284fca..bdc674830458 100644 --- a/src/meta/src/hummock/mock_hummock_meta_client.rs +++ b/src/meta/src/hummock/mock_hummock_meta_client.rs @@ -40,6 +40,7 @@ use risingwave_pb::hummock::{ }; use risingwave_rpc_client::error::{Result, RpcError}; use risingwave_rpc_client::{CompactionEventItem, HummockMetaClient}; +use thiserror_ext::AsReport; use tokio::sync::mpsc::{unbounded_channel, UnboundedSender}; use tokio::task::JoinHandle; use tokio_stream::wrappers::UnboundedReceiverStream; @@ -119,7 +120,7 @@ impl MockHummockMetaClient { } fn mock_err(error: super::error::Error) -> RpcError { - anyhow!("mock error: {}", error).into() + anyhow!(error).context("mock error").into() } #[async_trait] @@ -326,7 +327,7 @@ impl HummockMetaClient for MockHummockMetaClient { ) .await { - tracing::error!("report compact_tack fail {e:?}"); + tracing::error!(error = %e.as_report(), "report compact_tack fail"); } } } diff --git a/src/meta/src/hummock/mod.rs b/src/meta/src/hummock/mod.rs index 6f948fd413ba..65017943cb90 100644 --- a/src/meta/src/hummock/mod.rs +++ b/src/meta/src/hummock/mod.rs @@ -16,6 +16,7 @@ pub mod compactor_manager; pub mod error; mod manager; pub use manager::*; +use thiserror_ext::AsReport; mod level_handler; mod metrics_utils; @@ -84,7 +85,7 @@ pub fn start_vacuum_metadata_loop( } } if let Err(err) = vacuum.vacuum_metadata().await { - tracing::warn!("Vacuum metadata error {:#?}", err); + tracing::warn!(error = %err.as_report(), "Vacuum metadata error"); } } }); @@ -111,7 +112,7 @@ pub fn start_vacuum_object_loop( } } if let Err(err) = vacuum.vacuum_object().await { - tracing::warn!("Vacuum object error {:#?}", err); + tracing::warn!(error = %err.as_report(), "Vacuum object error"); } } }); @@ -146,7 +147,7 @@ pub fn start_checkpoint_loop( .create_version_checkpoint(min_delta_log_num) .await { - tracing::warn!("Hummock version checkpoint error {:#?}", err); + tracing::warn!(error = %err.as_report(), "Hummock version checkpoint error"); } } }); diff --git a/src/meta/src/hummock/vacuum.rs b/src/meta/src/hummock/vacuum.rs index d03a581087bb..10f748b67165 100644 --- a/src/meta/src/hummock/vacuum.rs +++ b/src/meta/src/hummock/vacuum.rs @@ -20,6 +20,7 @@ use itertools::Itertools; use risingwave_hummock_sdk::HummockSstableObjectId; use risingwave_pb::hummock::subscribe_compaction_event_response::Event as ResponseEvent; use risingwave_pb::hummock::VacuumTask; +use thiserror_ext::AsReport; use super::CompactorManagerRef; use crate::backup_restore::BackupManagerRef; @@ -148,9 +149,9 @@ impl VacuumManager { } Err(err) => { tracing::warn!( - "Failed to send vacuum task to worker {}: {:#?}", + error = %err.as_report(), + "Failed to send vacuum task to worker {}", compactor.context_id(), - err ); self.compactor_manager .remove_compactor(compactor.context_id()); diff --git a/src/meta/src/manager/cluster.rs b/src/meta/src/manager/cluster.rs index 357c4a4cc887..8640088f3019 100644 --- a/src/meta/src/manager/cluster.rs +++ b/src/meta/src/manager/cluster.rs @@ -31,6 +31,7 @@ use risingwave_pb::meta::add_worker_node_request::Property as AddNodeProperty; use risingwave_pb::meta::heartbeat_request; use risingwave_pb::meta::subscribe_response::{Info, Operation}; use risingwave_pb::meta::update_worker_node_schedulability_request::Schedulability; +use thiserror_ext::AsReport; use tokio::sync::oneshot::Sender; use tokio::sync::{RwLock, RwLockReadGuard}; use tokio::task::JoinHandle; @@ -400,11 +401,11 @@ impl ClusterManager { } Err(err) => { tracing::warn!( - "Failed to delete expired worker {} {:#?}, current timestamp {}. {:?}", + error = %err.as_report(), + "Failed to delete expired worker {} {:#?}, current timestamp {}", worker_id, key, now, - err, ); } } diff --git a/src/meta/src/manager/env.rs b/src/meta/src/manager/env.rs index e40fbe4f1701..490ad93189bc 100644 --- a/src/meta/src/manager/env.rs +++ b/src/meta/src/manager/env.rs @@ -117,6 +117,7 @@ pub struct MetaOpts { pub vacuum_spin_interval_ms: u64, /// Interval of hummock version checkpoint. pub hummock_version_checkpoint_interval_sec: u64, + pub enable_hummock_data_archive: bool, /// The minimum delta log number a new checkpoint should compact, otherwise the checkpoint /// attempt is rejected. Greater value reduces object store IO, meanwhile it results in /// more loss of in memory `HummockVersionCheckpoint::stale_objects` state when meta node is @@ -228,6 +229,7 @@ impl MetaOpts { vacuum_interval_sec: 30, vacuum_spin_interval_ms: 0, hummock_version_checkpoint_interval_sec: 30, + enable_hummock_data_archive: false, min_delta_log_num_for_hummock_version_checkpoint: 1, min_sst_retention_time_sec: 3600 * 24 * 7, full_gc_interval_sec: 3600 * 24 * 7, diff --git a/src/meta/src/manager/id.rs b/src/meta/src/manager/id.rs index f8373e160961..377589886ec6 100644 --- a/src/meta/src/manager/id.rs +++ b/src/meta/src/manager/id.rs @@ -17,6 +17,7 @@ use std::sync::Arc; use risingwave_common::catalog::{NON_RESERVED_SYS_CATALOG_ID, NON_RESERVED_USER_ID}; use risingwave_hummock_sdk::compaction_group::StaticCompactionGroupId; +use thiserror_ext::AsReport; use tokio::sync::RwLock; use crate::manager::cluster::META_NODE_ID; @@ -57,7 +58,7 @@ impl StoredIdGenerator { let current_id = match res { Ok(value) => memcomparable::from_slice(&value).unwrap(), Err(MetaStoreError::ItemNotFound(_)) => start.unwrap_or(0), - Err(e) => panic!("{:?}", e), + Err(e) => panic!("{}", e.as_report()), }; let next_allocate_id = current_id + ID_PREALLOCATE_INTERVAL; @@ -69,7 +70,7 @@ impl StoredIdGenerator { ) .await { - panic!("{:?}", err) + panic!("{}", err.as_report()); } StoredIdGenerator { diff --git a/src/meta/src/manager/notification.rs b/src/meta/src/manager/notification.rs index 2319ce35d5c8..3132f662cd76 100644 --- a/src/meta/src/manager/notification.rs +++ b/src/meta/src/manager/notification.rs @@ -23,6 +23,7 @@ use risingwave_pb::meta::subscribe_response::{Info, Operation}; use risingwave_pb::meta::{ MetaSnapshot, Relation, RelationGroup, SubscribeResponse, SubscribeType, }; +use thiserror_ext::AsReport; use tokio::sync::mpsc::{self, UnboundedSender}; use tokio::sync::Mutex; use tonic::Status; @@ -265,7 +266,7 @@ impl NotificationManager { let mut core_guard = self.core.lock().await; core_guard.local_senders.retain(|sender| { if let Err(err) = sender.send(notification.clone()) { - tracing::warn!("Failed to notify local subscriber. {}", err); + tracing::warn!(error = %err.as_report(), "Failed to notify local subscriber"); return false; } true diff --git a/src/meta/src/manager/sink_coordination/coordinator_worker.rs b/src/meta/src/manager/sink_coordination/coordinator_worker.rs index 24977acc5760..bebef2d307dc 100644 --- a/src/meta/src/manager/sink_coordination/coordinator_worker.rs +++ b/src/meta/src/manager/sink_coordination/coordinator_worker.rs @@ -30,6 +30,7 @@ use risingwave_pb::connector_service::coordinate_response::{ use risingwave_pb::connector_service::{ coordinate_request, coordinate_response, CoordinateRequest, CoordinateResponse, SinkMetadata, }; +use thiserror_ext::AsReport; use tokio::sync::mpsc::UnboundedReceiver; use tonic::Status; use tracing::{error, warn}; @@ -62,8 +63,9 @@ impl CoordinatorWorker { Ok(sink) => sink, Err(e) => { error!( - "unable to build sink with param {:?}: {:?}", - first_writer_request.param, e + error = %e.as_report(), + "unable to build sink with param {:?}", + first_writer_request.param ); send_await_with_err_check!( first_writer_request.response_tx, @@ -77,8 +79,9 @@ impl CoordinatorWorker { Ok(coordinator) => coordinator, Err(e) => { error!( - "unable to build coordinator with param {:?}: {:?}", - first_writer_request.param, e + error = %e.as_report(), + "unable to build coordinator with param {:?}", + first_writer_request.param ); send_await_with_err_check!( first_writer_request.response_tx, @@ -149,10 +152,9 @@ impl CoordinatorWorker { Either::Right((Some(Ok(None)), _)) => Err(anyhow!( "one sink writer stream reaches the end before initialize" )), - Either::Right((Some(Err(e)), _)) => Err(anyhow!( - "unable to poll from one sink writer stream: {:?}", - e - )), + Either::Right((Some(Err(e)), _)) => { + Err(anyhow!(e).context("unable to poll from one sink writer stream")) + } Either::Right((None, _)) => unreachable!("request_streams must not be empty"), } } @@ -265,10 +267,8 @@ impl CoordinatorWorker { )); } Err(e) => { - return Err(anyhow!( - "failed to poll from one of the writer request streams: {:?}", - e - )); + return Err(anyhow!(e) + .context("failed to poll from one of the writer request streams")); } }, Either::Right((None, _)) => { @@ -285,17 +285,16 @@ impl CoordinatorWorker { async fn start_coordination(&mut self, mut coordinator: impl SinkCommitCoordinator) { let result: Result<(), ()> = try { coordinator.init().await.map_err(|e| { - error!("failed to initialize coordinator: {:?}", e); + error!(error = %e.as_report(), "failed to initialize coordinator"); })?; loop { let (epoch, metadata_list) = self.collect_all_metadata().await.map_err(|e| { - error!("failed to collect all metadata: {:?}", e); + error!(error = %e.as_report(), "failed to collect all metadata"); })?; // TODO: measure commit time - coordinator - .commit(epoch, metadata_list) - .await - .map_err(|e| error!("failed to commit metadata of epoch {}: {:?}", epoch, e))?; + coordinator.commit(epoch, metadata_list).await.map_err( + |e| error!(epoch, error = %e.as_report(), "failed to commit metadata of epoch"), + )?; self.send_to_all_sink_writers(|| { Ok(CoordinateResponse { diff --git a/src/meta/src/manager/sink_coordination/manager.rs b/src/meta/src/manager/sink_coordination/manager.rs index 2c1d248565d4..d174b8aca7c5 100644 --- a/src/meta/src/manager/sink_coordination/manager.rs +++ b/src/meta/src/manager/sink_coordination/manager.rs @@ -25,6 +25,7 @@ use risingwave_connector::sink::SinkParam; use risingwave_pb::connector_service::coordinate_request::Msg; use risingwave_pb::connector_service::{coordinate_request, CoordinateRequest, CoordinateResponse}; use rw_futures_util::pending_on_none; +use thiserror_ext::AsReport; use tokio::sync::mpsc; use tokio::sync::mpsc::{unbounded_channel, UnboundedReceiver, UnboundedSender}; use tokio::sync::oneshot::{channel, Receiver, Sender}; @@ -292,14 +293,15 @@ impl ManagerWorker { match join_result { Ok(()) => { info!( - "sink coordinator of {} has gracefully finished", - sink_id.sink_id + id = sink_id.sink_id, + "sink coordinator has gracefully finished", ); } Err(err) => { error!( - "sink coordinator of {} finished with error {:?}", - sink_id.sink_id, err + id = sink_id.sink_id, + error = %err.as_report(), + "sink coordinator finished with error", ); } } diff --git a/src/meta/src/manager/system_param/mod.rs b/src/meta/src/manager/system_param/mod.rs index 7ff3a4355158..a7336a10e5de 100644 --- a/src/meta/src/manager/system_param/mod.rs +++ b/src/meta/src/manager/system_param/mod.rs @@ -90,7 +90,12 @@ impl SystemParamsManager { let params = params_guard.deref_mut(); let mut mem_txn = VarTransaction::new(params); - set_system_param(mem_txn.deref_mut(), name, value).map_err(MetaError::system_params)?; + let Some((_new_value, diff)) = + set_system_param(mem_txn.deref_mut(), name, value).map_err(MetaError::system_params)? + else { + // No changes on the parameter. + return Ok(params.clone()); + }; let mut store_txn = Transaction::default(); mem_txn.apply_to_txn(&mut store_txn).await?; @@ -98,10 +103,10 @@ impl SystemParamsManager { mem_txn.commit(); - // TODO: check if the parameter is actually changed. - // Run common handler. - self.common_handler.handle_change(params.clone().into()); + self.common_handler.handle_change(&diff); + + // TODO: notify the diff instead of the snapshot. // Sync params to other managers on the meta node only once, since it's infallible. self.notification_manager diff --git a/src/meta/src/model/notification.rs b/src/meta/src/model/notification.rs index be3784836e5b..0ea30bd61d7b 100644 --- a/src/meta/src/model/notification.rs +++ b/src/meta/src/model/notification.rs @@ -12,6 +12,8 @@ // See the License for the specific language governing permissions and // limitations under the License. +use thiserror_ext::AsReport; + use crate::storage::{MetaStore, MetaStoreError, DEFAULT_COLUMN_FAMILY}; /// `NotificationVersion` records the last sent notification version, this will be stored @@ -31,7 +33,7 @@ impl NotificationVersion { { Ok(byte_vec) => memcomparable::from_slice(&byte_vec).unwrap(), Err(MetaStoreError::ItemNotFound(_)) => 0, - Err(e) => panic!("{:?}", e), + Err(e) => panic!("{}", e.as_report()), }; Self(version) } diff --git a/src/meta/src/rpc/ddl_controller.rs b/src/meta/src/rpc/ddl_controller.rs index 8a017f6aa2e5..e4296f7f403c 100644 --- a/src/meta/src/rpc/ddl_controller.rs +++ b/src/meta/src/rpc/ddl_controller.rs @@ -57,6 +57,7 @@ use risingwave_pb::stream_plan::{ Dispatcher, DispatcherType, FragmentTypeFlag, MergeNode, PbStreamFragmentGraph, StreamFragmentGraph as StreamFragmentGraphProto, }; +use thiserror_ext::AsReport; use tokio::sync::Semaphore; use tokio::time::sleep; use tracing::log::warn; @@ -779,7 +780,7 @@ impl DdlController { .await; match result { Err(e) => { - tracing::error!(id=stream_job_id, error = ?e, "finish stream job failed") + tracing::error!(id = stream_job_id, error = %e.as_report(), "finish stream job failed") } Ok(_) => { tracing::info!(id = stream_job_id, "finish stream job succeeded") @@ -1079,7 +1080,7 @@ impl DdlController { let job_id = stream_job.id(); tracing::debug!(id = job_id, "creating stream job"); - let result = try { + let result: MetaResult<()> = try { // Add table fragments to meta store with state: `State::Initial`. mgr.fragment_manager .start_create_table_fragments(table_fragments.clone()) @@ -1093,7 +1094,7 @@ impl DdlController { if let Err(e) = result { match stream_job.create_type() { CreateType::Background => { - tracing::error!(id = job_id, error = ?e, "finish stream job failed"); + tracing::error!(id = job_id, error = %e.as_report(), "finish stream job failed"); let should_cancel = match mgr .fragment_manager .select_table_fragments_by_table_id(&job_id.into()) @@ -1428,7 +1429,10 @@ impl DdlController { .await; creating_internal_table_ids.push(table.id); if let Err(e) = result { - tracing::warn!("Failed to cancel create table procedure, perhaps barrier manager has already cleaned it. Reason: {e:#?}"); + tracing::warn!( + error = %e.as_report(), + "Failed to cancel create table procedure, perhaps barrier manager has already cleaned it." + ); } } StreamingJob::Sink(sink, target_table) => { @@ -1450,7 +1454,10 @@ impl DdlController { ) .await; if let Err(e) = result { - tracing::warn!("Failed to cancel create table procedure, perhaps barrier manager has already cleaned it. Reason: {e:#?}"); + tracing::warn!( + error = %e.as_report(), + "Failed to cancel create table procedure, perhaps barrier manager has already cleaned it." + ); } } creating_internal_table_ids.push(table.id); diff --git a/src/meta/src/rpc/election/etcd.rs b/src/meta/src/rpc/election/etcd.rs index 3cfb0bea6eba..96b16f537356 100644 --- a/src/meta/src/rpc/election/etcd.rs +++ b/src/meta/src/rpc/election/etcd.rs @@ -18,6 +18,7 @@ use std::time::Duration; use etcd_client::{ConnectOptions, Error, GetOptions, LeaderKey, ResignOptions}; use risingwave_common::bail; +use thiserror_ext::AsReport; use tokio::sync::watch::Receiver; use tokio::sync::{oneshot, watch}; use tokio::time; @@ -118,9 +119,9 @@ impl ElectionClient for EtcdElectionClient { Ok(resp) => resp, Err(e) => { tracing::warn!( - "create lease keeper for {} failed {}", + error = %e.as_report(), + "create lease keeper for {} failed", lease_id, - e.to_string() ); keep_alive_fail_tx.send(()).unwrap(); return; @@ -148,7 +149,7 @@ impl ElectionClient for EtcdElectionClient { _ = ticker.tick(), if !keep_alive_sending => { if let Err(err) = keeper.keep_alive().await { - tracing::debug!("keep alive for lease {} failed {}", lease_id, err); + tracing::debug!(error = %err.as_report(), "keep alive for lease {} failed", lease_id); continue } @@ -179,7 +180,7 @@ impl ElectionClient for EtcdElectionClient { continue; } Err(e) => { - tracing::error!("lease keeper failed {}", e.to_string()); + tracing::error!(error = %e.as_report(), "lease keeper failed"); continue; } }; @@ -264,7 +265,7 @@ impl ElectionClient for EtcdElectionClient { } } Some(Err(e)) => { - tracing::warn!("error {} received from leader observe stream", e.to_string()); + tracing::warn!(error = %e.as_report(), "error received from leader observe stream"); continue } } diff --git a/src/meta/src/rpc/election/sql.rs b/src/meta/src/rpc/election/sql.rs index b076f542a370..65c3ad613dde 100644 --- a/src/meta/src/rpc/election/sql.rs +++ b/src/meta/src/rpc/election/sql.rs @@ -20,6 +20,7 @@ use sea_orm::{ ConnectionTrait, DatabaseBackend, DatabaseConnection, FromQueryResult, Statement, TransactionTrait, Value, }; +use thiserror_ext::AsReport; use tokio::sync::watch; use tokio::sync::watch::Receiver; use tokio::time; @@ -614,7 +615,7 @@ where .update_heartbeat(META_ELECTION_KEY, id.as_str()) .await { - tracing::debug!("keep alive for member {} failed {}", id, e); + tracing::debug!(error = %e.as_report(), "keep alive for member {} failed", id); continue } } @@ -669,7 +670,7 @@ where if is_leader { tracing::info!("leader {} resigning", self.id); if let Err(e) = self.driver.resign(META_ELECTION_KEY, self.id.as_str()).await { - tracing::warn!("resign failed {}", e); + tracing::warn!(error = %e.as_report(), "resign failed"); } } diff --git a/src/meta/src/stream/scale.rs b/src/meta/src/stream/scale.rs index 00a8c18885dc..2674975488e4 100644 --- a/src/meta/src/stream/scale.rs +++ b/src/meta/src/stream/scale.rs @@ -42,6 +42,7 @@ use risingwave_pb::stream_plan::{DispatcherType, FragmentTypeFlag, StreamActor, use risingwave_pb::stream_service::{ BroadcastActorInfoTableRequest, BuildActorsRequest, UpdateActorsRequest, }; +use thiserror_ext::AsReport; use tokio::sync::oneshot; use tokio::sync::oneshot::Receiver; use tokio::task::JoinHandle; @@ -1780,13 +1781,8 @@ impl ScaleController { .flatten() .cloned() .exactly_one() - .map_err(|e| { - anyhow!( - "Cannot find a single target ParallelUnit for fragment {}: {}", - fragment_id, - e - ) - })?; + .ok() + .with_context(|| format!("Cannot find a single target ParallelUnit for fragment {fragment_id}"))?; target_plan.insert( fragment_id, @@ -2558,7 +2554,7 @@ impl GlobalStreamManager { changed = false; } Err(e) => { - tracing::warn!(error = e.to_string(), "Failed to trigger scale out, waiting for next tick to retry after {}s", ticker.period().as_secs()); + tracing::warn!(error = %e.as_report(), "Failed to trigger scale out, waiting for next tick to retry after {}s", ticker.period().as_secs()); ticker.reset(); } } diff --git a/src/meta/src/stream/source_manager.rs b/src/meta/src/stream/source_manager.rs index 9f99aa0b405d..f3ce3816522b 100644 --- a/src/meta/src/stream/source_manager.rs +++ b/src/meta/src/stream/source_manager.rs @@ -31,6 +31,7 @@ use risingwave_connector::source::{ use risingwave_pb::catalog::Source; use risingwave_pb::source::{ConnectorSplit, ConnectorSplits}; use risingwave_rpc_client::ConnectorClient; +use thiserror_ext::AsReport; use tokio::sync::mpsc::{UnboundedReceiver, UnboundedSender}; use tokio::sync::{oneshot, Mutex}; use tokio::task::JoinHandle; @@ -172,11 +173,11 @@ impl ConnectorSourceWorker

{ _ = interval.tick() => { if self.fail_cnt > MAX_FAIL_CNT { if let Err(e) = self.refresh().await { - tracing::error!("error happened when refresh from connector source worker: {}", e.to_string()); + tracing::error!(error = %e.as_report(), "error happened when refresh from connector source worker"); } } if let Err(e) = self.tick().await { - tracing::error!("error happened when tick from connector source worker: {}", e.to_string()); + tracing::error!(error = %e.as_report(), "error happened when tick from connector source worker"); } } } @@ -289,7 +290,7 @@ impl SourceManagerCore { { Ok(actor_ids) => actor_ids, Err(err) => { - tracing::warn!("Failed to get the actor of the fragment {}, maybe the fragment doesn't exist anymore", err.to_string()); + tracing::warn!(error = %err.as_report(), "Failed to get the actor of the fragment, maybe the fragment doesn't exist anymore"); continue; } }; @@ -709,8 +710,11 @@ impl SourceManager { handle .sync_call_tx .send(tx) - .map_err(|e| anyhow!(e.to_string()))?; - rx.await.map_err(|e| anyhow!(e.to_string()))??; + .ok() + .context("failed to send sync call")?; + rx.await + .ok() + .context("failed to receive sync call response")??; } let splits = handle.discovered_splits().await.unwrap(); @@ -926,8 +930,8 @@ impl SourceManager { let _pause_guard = self.paused.lock().await; if let Err(e) = self.tick().await { tracing::error!( - "error happened while running source manager tick: {}", - e.to_string() + error = %e.as_report(), + "error happened while running source manager tick", ); } } diff --git a/src/meta/src/stream/stream_manager.rs b/src/meta/src/stream/stream_manager.rs index 28168d3db64d..1950a27013f3 100644 --- a/src/meta/src/stream/stream_manager.rs +++ b/src/meta/src/stream/stream_manager.rs @@ -27,6 +27,7 @@ use risingwave_pb::stream_plan::Dispatcher; use risingwave_pb::stream_service::{ BroadcastActorInfoTableRequest, BuildActorsRequest, DropActorsRequest, UpdateActorsRequest, }; +use thiserror_ext::AsReport; use tokio::sync::mpsc::Sender; use tokio::sync::{oneshot, Mutex, RwLock}; use tracing::Instrument; @@ -272,7 +273,7 @@ impl GlobalStreamManager { }) .await .inspect_err(|_| { - tracing::warn!("failed to notify failed: {table_id}, err: {err}") + tracing::warn!(error = %err.as_report(), "failed to notify failed: {table_id}") }); } } @@ -601,7 +602,7 @@ impl GlobalStreamManager { .drop_streaming_jobs_impl(streaming_job_ids) .await .inspect_err(|err| { - tracing::error!(error = ?err, "Failed to drop streaming jobs"); + tracing::error!(error = %err.as_report(), "Failed to drop streaming jobs"); }); } } diff --git a/src/object_store/src/object/error.rs b/src/object_store/src/object/error.rs index c84527e6bdb0..f1c18bb39118 100644 --- a/src/object_store/src/object/error.rs +++ b/src/object_store/src/object/error.rs @@ -22,6 +22,7 @@ use aws_sdk_s3::primitives::ByteStreamError; use aws_smithy_types::body::SdkBody; use risingwave_common::error::BoxedError; use thiserror::Error; +use thiserror_ext::AsReport; use tokio::sync::oneshot::error::RecvError; #[derive(Error, Debug, thiserror_ext::Box, thiserror_ext::Construct)] @@ -97,13 +98,13 @@ where impl From for ObjectError { fn from(e: RecvError) -> Self { - ObjectErrorInner::Internal(e.to_string()).into() + ObjectErrorInner::Internal(e.to_report_string()).into() } } impl From for ObjectError { fn from(e: ByteStreamError) -> Self { - ObjectErrorInner::Internal(e.to_string()).into() + ObjectErrorInner::Internal(e.to_report_string()).into() } } diff --git a/src/object_store/src/object/mod.rs b/src/object_store/src/object/mod.rs index 32adba09d2bb..ab5abe0d0747 100644 --- a/src/object_store/src/object/mod.rs +++ b/src/object_store/src/object/mod.rs @@ -37,6 +37,7 @@ pub mod object_metrics; pub use error::*; use object_metrics::ObjectStoreMetrics; +use thiserror_ext::AsReport; pub type ObjectStoreRef = Arc; pub type ObjectStreamingUploader = MonitoredStreamingUploader; @@ -274,7 +275,7 @@ fn try_update_failure_metric( operation_type: &'static str, ) { if let Err(e) = &result { - tracing::error!("{:?} failed because of: {:?}", operation_type, e); + tracing::error!(error = %e.as_report(), "{} failed", operation_type); metrics .failure_count .with_label_values(&[operation_type]) @@ -875,7 +876,7 @@ pub async fn build_remote_object_store( panic!("Passing s3-compatible is not supported, please modify the environment variable and pass in s3."); } minio if minio.starts_with("minio://") => ObjectStoreImpl::S3( - S3ObjectStore::with_minio(minio, metrics.clone()) + S3ObjectStore::with_minio(minio, metrics.clone(), config) .await .monitored(metrics), ), diff --git a/src/object_store/src/object/opendal_engine/opendal_object_store.rs b/src/object_store/src/object/opendal_engine/opendal_object_store.rs index 9cf89c65f558..19bddcfc7ac5 100644 --- a/src/object_store/src/object/opendal_engine/opendal_object_store.rs +++ b/src/object_store/src/object/opendal_engine/opendal_object_store.rs @@ -20,6 +20,7 @@ use futures::{stream, StreamExt, TryStreamExt}; use opendal::services::Memory; use opendal::{Metakey, Operator, Writer}; use risingwave_common::range::RangeBoundsExt; +use thiserror_ext::AsReport; use crate::object::{ BoxedStreamingUploader, ObjectDataStream, ObjectError, ObjectMetadata, ObjectMetadataIter, @@ -116,9 +117,9 @@ impl ObjectStore for OpendalObjectStore { )); let range: Range = (range.start as u64)..(range.end as u64); let reader = self.op.reader_with(path).range(range).await?; - let stream = reader - .into_stream() - .map(|item| item.map_err(|e| ObjectError::internal(format!("OpendalError: {:?}", e)))); + let stream = reader.into_stream().map(|item| { + item.map_err(|e| ObjectError::internal(format!("OpendalError: {}", e.as_report()))) + }); Ok(Box::pin(stream)) } diff --git a/src/object_store/src/object/s3.rs b/src/object_store/src/object/s3.rs index 5c6f056d9e53..cddb9c0c75e3 100644 --- a/src/object_store/src/object/s3.rs +++ b/src/object_store/src/object/s3.rs @@ -47,6 +47,7 @@ use itertools::Itertools; use risingwave_common::config::{ObjectStoreConfig, S3ObjectStoreConfig}; use risingwave_common::monitor::connection::monitor_connector; use risingwave_common::range::RangeBoundsExt; +use thiserror_ext::AsReport; use tokio::task::JoinHandle; use tokio_retry::strategy::{jitter, ExponentialBackoff}; @@ -281,7 +282,7 @@ impl StreamingUploader for S3StreamingUploader { Ok(()) } } else if let Err(e) = self.flush_multipart_and_complete().await { - tracing::warn!("Failed to upload object {}: {:?}", self.key, e); + tracing::warn!(key = self.key, error = %e.as_report(), "Failed to upload object"); self.abort_multipart_upload().await?; Err(e) } else { @@ -629,7 +630,11 @@ impl S3ObjectStore { } /// Creates a minio client. The server should be like `minio://key:secret@address:port/bucket`. - pub async fn with_minio(server: &str, metrics: Arc) -> Self { + pub async fn with_minio( + server: &str, + metrics: Arc, + s3_object_store_config: ObjectStoreConfig, + ) -> Self { let server = server.strip_prefix("minio://").unwrap(); let (access_key_id, rest) = server.split_once(':').unwrap(); let (secret_access_key, mut rest) = rest.split_once('@').unwrap(); @@ -644,7 +649,6 @@ impl S3ObjectStore { }; let (address, bucket) = rest.split_once('/').unwrap(); - let s3_object_store_config = ObjectStoreConfig::default(); #[cfg(madsim)] let builder = aws_sdk_s3::config::Builder::new().credentials_provider( Credentials::from_keys(access_key_id, secret_access_key, None), diff --git a/src/risedevtool/Cargo.toml b/src/risedevtool/Cargo.toml index 3b4acc0f9877..47c5726ff145 100644 --- a/src/risedevtool/Cargo.toml +++ b/src/risedevtool/Cargo.toml @@ -32,6 +32,7 @@ serde_json = "1" serde_with = "3" serde_yaml = "0.9" tempfile = "3" +thiserror-ext = { workspace = true } tokio = { version = "0.2", package = "madsim-tokio", features = [ "rt", "rt-multi-thread", diff --git a/src/risedevtool/src/bin/risedev-docslt.rs b/src/risedevtool/src/bin/risedev-docslt.rs index d073ff40f312..627e7b028861 100644 --- a/src/risedevtool/src/bin/risedev-docslt.rs +++ b/src/risedevtool/src/bin/risedev-docslt.rs @@ -17,6 +17,7 @@ use std::path::{Path, PathBuf}; use anyhow::Result; use itertools::Itertools; +use thiserror_ext::AsReport; use tracing::*; #[derive(Clone, PartialEq, Eq, PartialOrd, Ord)] @@ -89,7 +90,7 @@ fn main() -> Result<()> { let path = match entry { Ok(path) => path, Err(e) => { - error!("{:?}", e); + error!("{}", e.as_report()); continue; } }; diff --git a/src/rpc_client/src/connector_client.rs b/src/rpc_client/src/connector_client.rs index cd53cd019ea6..d627a692735c 100644 --- a/src/rpc_client/src/connector_client.rs +++ b/src/rpc_client/src/connector_client.rs @@ -16,7 +16,7 @@ use std::collections::HashMap; use std::fmt::Debug; use std::time::Duration; -use anyhow::anyhow; +use anyhow::{anyhow, Context}; use futures::TryStreamExt; use risingwave_common::config::{MAX_CONNECTION_WINDOW_SIZE, STREAM_WINDOW_SIZE}; use risingwave_common::monitor::connection::{EndpointExt, TcpConfig}; @@ -30,6 +30,7 @@ use risingwave_pb::connector_service::sink_writer_stream_request::{ }; use risingwave_pb::connector_service::sink_writer_stream_response::CommitResponse; use risingwave_pb::connector_service::*; +use thiserror_ext::AsReport; use tokio_stream::wrappers::ReceiverStream; use tonic::transport::{Channel, Endpoint}; use tonic::Streaming; @@ -150,8 +151,9 @@ impl ConnectorClient { Ok(client) => Some(client), Err(e) => { error!( - "invalid connector endpoint {:?}: {:?}", - connector_endpoint, e + endpoint = connector_endpoint, + error = %e.as_report(), + "invalid connector endpoint", ); None } @@ -162,12 +164,7 @@ impl ConnectorClient { #[allow(clippy::unused_async)] pub async fn new(connector_endpoint: &String) -> Result { let endpoint = Endpoint::from_shared(format!("http://{}", connector_endpoint)) - .map_err(|e| { - RpcError::Internal(anyhow!(format!( - "invalid connector endpoint `{}`: {:?}", - &connector_endpoint, e - ))) - })? + .with_context(|| format!("invalid connector endpoint `{}`", connector_endpoint))? .initial_connection_window_size(MAX_CONNECTION_WINDOW_SIZE) .initial_stream_window_size(STREAM_WINDOW_SIZE) .connect_timeout(Duration::from_secs(5)); diff --git a/src/rpc_client/src/lib.rs b/src/rpc_client/src/lib.rs index 17168d94f3ac..0485465499f5 100644 --- a/src/rpc_client/src/lib.rs +++ b/src/rpc_client/src/lib.rs @@ -33,7 +33,7 @@ use std::future::Future; use std::iter::repeat; use std::sync::Arc; -use anyhow::anyhow; +use anyhow::{anyhow, Context}; use async_trait::async_trait; use futures::future::try_join_all; use futures::stream::{BoxStream, Peekable}; @@ -46,7 +46,7 @@ use risingwave_pb::meta::heartbeat_request::extra_info; use tokio::sync::mpsc::{channel, Receiver, Sender}; pub mod error; -use error::{Result, RpcError}; +use error::Result; mod compactor_client; mod compute_client; mod connector_client; @@ -120,9 +120,7 @@ where S::new_clients(addr.clone(), self.connection_pool_size as usize), ) .await - .map_err(|e| -> RpcError { - anyhow!("failed to create RPC client to {addr}: {:?}", e).into() - })? + .with_context(|| format!("failed to create RPC client to {addr}"))? .choose(&mut rand::thread_rng()) .unwrap() .clone()) diff --git a/src/rpc_client/src/meta_client.rs b/src/rpc_client/src/meta_client.rs index 2a9d337fd3ae..812c522e5e03 100644 --- a/src/rpc_client/src/meta_client.rs +++ b/src/rpc_client/src/meta_client.rs @@ -19,7 +19,7 @@ use std::sync::Arc; use std::thread; use std::time::{Duration, SystemTime}; -use anyhow::anyhow; +use anyhow::{anyhow, Context}; use async_trait::async_trait; use either::Either; use futures::stream::BoxStream; @@ -86,6 +86,7 @@ use risingwave_pb::stream_plan::StreamFragmentGraph; use risingwave_pb::user::update_user_request::UpdateField; use risingwave_pb::user::user_service_client::UserServiceClient; use risingwave_pb::user::*; +use thiserror_ext::AsReport; use tokio::sync::mpsc::{unbounded_channel, Receiver, UnboundedSender}; use tokio::sync::oneshot::Sender; use tokio::sync::{mpsc, oneshot, RwLock}; @@ -739,10 +740,10 @@ impl MetaClient { { Ok(Ok(_)) => {} Ok(Err(err)) => { - tracing::warn!("Failed to send_heartbeat: error {}", err); + tracing::warn!(error = %err.as_report(), "Failed to send_heartbeat"); } - Err(err) => { - tracing::warn!("Failed to send_heartbeat: timeout {}", err); + Err(_) => { + tracing::warn!("Failed to send_heartbeat: timeout"); } } } @@ -1422,7 +1423,7 @@ impl HummockMetaClient for MetaClient { .expect("Clock may have gone backwards") .as_millis() as u64, }) - .map_err(|err| RpcError::Internal(anyhow!(err.to_string())))?; + .context("Failed to subscribe compaction event")?; let stream = self .inner @@ -1438,7 +1439,10 @@ impl HummockMetaClient for MetaClient { #[async_trait] impl TelemetryInfoFetcher for MetaClient { async fn fetch_telemetry_info(&self) -> std::result::Result, String> { - let resp = self.get_telemetry_info().await.map_err(|e| e.to_string())?; + let resp = self + .get_telemetry_info() + .await + .map_err(|e| e.to_report_string())?; let tracking_id = resp.get_tracking_id().ok(); Ok(tracking_id.map(|id| id.to_owned())) } @@ -1574,12 +1578,12 @@ impl MetaMemberManagement { } }; if let Err(err) = client { - tracing::warn!("failed to create client from {}: {}", addr, err); + tracing::warn!(%addr, error = %err.as_report(), "failed to create client"); continue; } match client.unwrap().members(MembersRequest {}).await { Err(err) => { - tracing::warn!("failed to fetch members from {}: {}", addr, err); + tracing::warn!(%addr, error = %err.as_report(), "failed to fetch members"); continue; } Ok(resp) => { @@ -1696,7 +1700,7 @@ impl GrpcMetaClient { let tick_result = member_management.refresh_members().await; if let Err(e) = tick_result.as_ref() { - tracing::warn!("refresh meta member client failed {}", e); + tracing::warn!(error = %e.as_report(), "refresh meta member client failed"); } if let Some(sender) = event { @@ -1777,9 +1781,9 @@ impl GrpcMetaClient { } Err(e) => { tracing::warn!( - "Failed to connect to meta server {}, trying again: {}", + error = %e.as_report(), + "Failed to connect to meta server {}, trying again", addr, - e ) } } @@ -1948,7 +1952,7 @@ impl GrpcMetaClient { .is_ok() { if let Ok(Err(e)) = result_receiver.await { - tracing::warn!("force refresh meta client failed {}", e); + tracing::warn!(error = %e.as_report(), "force refresh meta client failed"); } } else { tracing::debug!("skipping the current refresh, somewhere else is already doing it") diff --git a/src/source/src/connector_source.rs b/src/source/src/connector_source.rs index f126c2692a77..230566d5ada3 100644 --- a/src/source/src/connector_source.rs +++ b/src/source/src/connector_source.rs @@ -85,7 +85,7 @@ impl ConnectorSource { .ok_or_else(|| { anyhow!("Failed to find column id: {} in source: {:?}", id, self).into() }) - .map(|col| col.clone()) + .cloned() }) .collect::>>() } diff --git a/src/sqlparser/src/ast/mod.rs b/src/sqlparser/src/ast/mod.rs index eef14722ee84..b11d4dc784bb 100644 --- a/src/sqlparser/src/ast/mod.rs +++ b/src/sqlparser/src/ast/mod.rs @@ -1117,6 +1117,8 @@ pub enum Statement { name: ObjectName, /// Optional schema columns: Vec, + // The wildchar position in columns defined in sql. Only exist when using external schema. + wildcard_idx: Option, constraints: Vec, with_options: Vec, /// Optional schema of the external source with which the table is created @@ -1606,6 +1608,7 @@ impl fmt::Display for Statement { Statement::CreateTable { name, columns, + wildcard_idx, constraints, with_options, or_replace, @@ -1634,7 +1637,7 @@ impl fmt::Display for Statement { name = name, )?; if !columns.is_empty() || !constraints.is_empty() { - write!(f, " {}", fmt_create_items(columns, constraints, source_watermarks)?)?; + write!(f, " {}", fmt_create_items(columns, constraints, source_watermarks, *wildcard_idx)?)?; } else if query.is_none() { // PostgreSQL allows `CREATE TABLE t ();`, but requires empty parens write!(f, " ()")?; diff --git a/src/sqlparser/src/ast/statement.rs b/src/sqlparser/src/ast/statement.rs index 5624a74c621e..e394697d45a9 100644 --- a/src/sqlparser/src/ast/statement.rs +++ b/src/sqlparser/src/ast/statement.rs @@ -80,6 +80,8 @@ macro_rules! impl_fmt_display { pub struct CreateSourceStatement { pub if_not_exists: bool, pub columns: Vec, + // The wildchar position in columns defined in sql. Only exist when using external schema. + pub wildcard_idx: Option, pub constraints: Vec, pub source_name: ObjectName, pub with_properties: WithProperties, @@ -325,7 +327,8 @@ impl ParseTo for CreateSourceStatement { impl_parse_to!(source_name: ObjectName, p); // parse columns - let (columns, constraints, source_watermarks) = p.parse_columns_with_watermark()?; + let (columns, constraints, source_watermarks, wildcard_idx) = + p.parse_columns_with_watermark()?; let include_options = p.parse_include_options()?; let with_options = p.parse_with_properties()?; @@ -343,6 +346,7 @@ impl ParseTo for CreateSourceStatement { Ok(Self { if_not_exists, columns, + wildcard_idx, constraints, source_name, with_properties: WithProperties(with_options), @@ -357,11 +361,28 @@ pub(super) fn fmt_create_items( columns: &[ColumnDef], constraints: &[TableConstraint], watermarks: &[SourceWatermark], + wildcard_idx: Option, ) -> std::result::Result { let mut items = String::new(); - let has_items = !columns.is_empty() || !constraints.is_empty() || !watermarks.is_empty(); + let has_items = !columns.is_empty() + || !constraints.is_empty() + || !watermarks.is_empty() + || wildcard_idx.is_some(); has_items.then(|| write!(&mut items, "(")); - write!(&mut items, "{}", display_comma_separated(columns))?; + if let Some(wildcard_idx) = wildcard_idx { + let (columns_l, columns_r) = columns.split_at(wildcard_idx); + write!(&mut items, "{}", display_comma_separated(columns_l))?; + if !columns_l.is_empty() { + write!(&mut items, ", ")?; + } + write!(&mut items, "{}", Token::Mul)?; + if !columns_r.is_empty() { + write!(&mut items, ", ")?; + } + write!(&mut items, "{}", display_comma_separated(columns_r))?; + } else { + write!(&mut items, "{}", display_comma_separated(columns))?; + } if !columns.is_empty() && (!constraints.is_empty() || !watermarks.is_empty()) { write!(&mut items, ", ")?; } @@ -380,7 +401,12 @@ impl fmt::Display for CreateSourceStatement { impl_fmt_display!(if_not_exists => [Keyword::IF, Keyword::NOT, Keyword::EXISTS], v, self); impl_fmt_display!(source_name, v, self); - let items = fmt_create_items(&self.columns, &self.constraints, &self.source_watermarks)?; + let items = fmt_create_items( + &self.columns, + &self.constraints, + &self.source_watermarks, + self.wildcard_idx, + )?; if !items.is_empty() { v.push(items); } diff --git a/src/sqlparser/src/parser.rs b/src/sqlparser/src/parser.rs index 1eae3eccc03f..e88a1df3157d 100644 --- a/src/sqlparser/src/parser.rs +++ b/src/sqlparser/src/parser.rs @@ -131,7 +131,12 @@ impl fmt::Display for ParserError { #[cfg(feature = "std")] impl std::error::Error for ParserError {} -type ColumnsDefTuple = (Vec, Vec, Vec); +type ColumnsDefTuple = ( + Vec, + Vec, + Vec, + Option, +); /// Reference: /// @@ -2455,7 +2460,8 @@ impl Parser { let if_not_exists = self.parse_keywords(&[Keyword::IF, Keyword::NOT, Keyword::EXISTS]); let table_name = self.parse_object_name()?; // parse optional column list (schema) and watermarks on source. - let (columns, constraints, source_watermarks) = self.parse_columns_with_watermark()?; + let (columns, constraints, source_watermarks, wildcard_idx) = + self.parse_columns_with_watermark()?; let append_only = if self.parse_keyword(Keyword::APPEND) { self.expect_keyword(Keyword::ONLY)?; @@ -2507,6 +2513,7 @@ impl Parser { name: table_name, temporary, columns, + wildcard_idx, constraints, with_options, or_replace, @@ -2538,12 +2545,21 @@ impl Parser { let mut columns = vec![]; let mut constraints = vec![]; let mut watermarks = vec![]; + let mut wildcard_idx = None; if !self.consume_token(&Token::LParen) || self.consume_token(&Token::RParen) { - return Ok((columns, constraints, watermarks)); + return Ok((columns, constraints, watermarks, wildcard_idx)); } loop { - if let Some(constraint) = self.parse_optional_table_constraint()? { + if self.consume_token(&Token::Mul) { + if wildcard_idx.is_none() { + wildcard_idx = Some(columns.len()); + } else { + return Err(ParserError::ParserError( + "At most 1 wildcard is allowed in source definetion".to_string(), + )); + } + } else if let Some(constraint) = self.parse_optional_table_constraint()? { constraints.push(constraint); } else if let Some(watermark) = self.parse_optional_watermark()? { watermarks.push(watermark); @@ -2567,7 +2583,7 @@ impl Parser { } } - Ok((columns, constraints, watermarks)) + Ok((columns, constraints, watermarks, wildcard_idx)) } fn parse_column_def(&mut self) -> Result { diff --git a/src/sqlparser/tests/testdata/create.yaml b/src/sqlparser/tests/testdata/create.yaml index 69167205591a..94873541e1d8 100644 --- a/src/sqlparser/tests/testdata/create.yaml +++ b/src/sqlparser/tests/testdata/create.yaml @@ -35,13 +35,13 @@ Near "pad CHARACTER VARYING) FROM sbtest" - input: CREATE SOURCE IF NOT EXISTS src WITH (kafka.topic = 'abc', kafka.servers = 'localhost:1001') FORMAT PLAIN ENCODE PROTOBUF (message = 'Foo', schema.location = 'file://') formatted_sql: CREATE SOURCE IF NOT EXISTS src WITH (kafka.topic = 'abc', kafka.servers = 'localhost:1001') FORMAT PLAIN ENCODE PROTOBUF (message = 'Foo', schema.location = 'file://') - formatted_ast: 'CreateSource { stmt: CreateSourceStatement { if_not_exists: true, columns: [], constraints: [], source_name: ObjectName([Ident { value: "src", quote_style: None }]), with_properties: WithProperties([SqlOption { name: ObjectName([Ident { value: "kafka", quote_style: None }, Ident { value: "topic", quote_style: None }]), value: SingleQuotedString("abc") }, SqlOption { name: ObjectName([Ident { value: "kafka", quote_style: None }, Ident { value: "servers", quote_style: None }]), value: SingleQuotedString("localhost:1001") }]), source_schema: V2(ConnectorSchema { format: Plain, row_encode: Protobuf, row_options: [SqlOption { name: ObjectName([Ident { value: "message", quote_style: None }]), value: SingleQuotedString("Foo") }, SqlOption { name: ObjectName([Ident { value: "schema", quote_style: None }, Ident { value: "location", quote_style: None }]), value: SingleQuotedString("file://") }] }), source_watermarks: [], include_column_options: [] } }' + formatted_ast: 'CreateSource { stmt: CreateSourceStatement { if_not_exists: true, columns: [], wildcard_idx: None, constraints: [], source_name: ObjectName([Ident { value: "src", quote_style: None }]), with_properties: WithProperties([SqlOption { name: ObjectName([Ident { value: "kafka", quote_style: None }, Ident { value: "topic", quote_style: None }]), value: SingleQuotedString("abc") }, SqlOption { name: ObjectName([Ident { value: "kafka", quote_style: None }, Ident { value: "servers", quote_style: None }]), value: SingleQuotedString("localhost:1001") }]), source_schema: V2(ConnectorSchema { format: Plain, row_encode: Protobuf, row_options: [SqlOption { name: ObjectName([Ident { value: "message", quote_style: None }]), value: SingleQuotedString("Foo") }, SqlOption { name: ObjectName([Ident { value: "schema", quote_style: None }, Ident { value: "location", quote_style: None }]), value: SingleQuotedString("file://") }] }), source_watermarks: [], include_column_options: [] } }' - input: CREATE SOURCE IF NOT EXISTS src WITH (kafka.topic = 'abc', kafka.servers = 'localhost:1001') FORMAT PLAIN ENCODE PROTOBUF (message = 'Foo', schema.registry = 'http://') formatted_sql: CREATE SOURCE IF NOT EXISTS src WITH (kafka.topic = 'abc', kafka.servers = 'localhost:1001') FORMAT PLAIN ENCODE PROTOBUF (message = 'Foo', schema.registry = 'http://') - formatted_ast: 'CreateSource { stmt: CreateSourceStatement { if_not_exists: true, columns: [], constraints: [], source_name: ObjectName([Ident { value: "src", quote_style: None }]), with_properties: WithProperties([SqlOption { name: ObjectName([Ident { value: "kafka", quote_style: None }, Ident { value: "topic", quote_style: None }]), value: SingleQuotedString("abc") }, SqlOption { name: ObjectName([Ident { value: "kafka", quote_style: None }, Ident { value: "servers", quote_style: None }]), value: SingleQuotedString("localhost:1001") }]), source_schema: V2(ConnectorSchema { format: Plain, row_encode: Protobuf, row_options: [SqlOption { name: ObjectName([Ident { value: "message", quote_style: None }]), value: SingleQuotedString("Foo") }, SqlOption { name: ObjectName([Ident { value: "schema", quote_style: None }, Ident { value: "registry", quote_style: None }]), value: SingleQuotedString("http://") }] }), source_watermarks: [], include_column_options: [] } }' + formatted_ast: 'CreateSource { stmt: CreateSourceStatement { if_not_exists: true, columns: [], wildcard_idx: None, constraints: [], source_name: ObjectName([Ident { value: "src", quote_style: None }]), with_properties: WithProperties([SqlOption { name: ObjectName([Ident { value: "kafka", quote_style: None }, Ident { value: "topic", quote_style: None }]), value: SingleQuotedString("abc") }, SqlOption { name: ObjectName([Ident { value: "kafka", quote_style: None }, Ident { value: "servers", quote_style: None }]), value: SingleQuotedString("localhost:1001") }]), source_schema: V2(ConnectorSchema { format: Plain, row_encode: Protobuf, row_options: [SqlOption { name: ObjectName([Ident { value: "message", quote_style: None }]), value: SingleQuotedString("Foo") }, SqlOption { name: ObjectName([Ident { value: "schema", quote_style: None }, Ident { value: "registry", quote_style: None }]), value: SingleQuotedString("http://") }] }), source_watermarks: [], include_column_options: [] } }' - input: CREATE SOURCE bid (auction INTEGER, bidder INTEGER, price INTEGER, WATERMARK FOR auction AS auction - 1, "date_time" TIMESTAMP) with (connector = 'nexmark', nexmark.table.type = 'Bid', nexmark.split.num = '12', nexmark.min.event.gap.in.ns = '0') formatted_sql: CREATE SOURCE bid (auction INT, bidder INT, price INT, "date_time" TIMESTAMP, WATERMARK FOR auction AS auction - 1) WITH (connector = 'nexmark', nexmark.table.type = 'Bid', nexmark.split.num = '12', nexmark.min.event.gap.in.ns = '0') FORMAT NATIVE ENCODE NATIVE - formatted_ast: 'CreateSource { stmt: CreateSourceStatement { if_not_exists: false, columns: [ColumnDef { name: Ident { value: "auction", quote_style: None }, data_type: Some(Int), collation: None, options: [] }, ColumnDef { name: Ident { value: "bidder", quote_style: None }, data_type: Some(Int), collation: None, options: [] }, ColumnDef { name: Ident { value: "price", quote_style: None }, data_type: Some(Int), collation: None, options: [] }, ColumnDef { name: Ident { value: "date_time", quote_style: Some(''"'') }, data_type: Some(Timestamp(false)), collation: None, options: [] }], constraints: [], source_name: ObjectName([Ident { value: "bid", quote_style: None }]), with_properties: WithProperties([SqlOption { name: ObjectName([Ident { value: "connector", quote_style: None }]), value: SingleQuotedString("nexmark") }, SqlOption { name: ObjectName([Ident { value: "nexmark", quote_style: None }, Ident { value: "table", quote_style: None }, Ident { value: "type", quote_style: None }]), value: SingleQuotedString("Bid") }, SqlOption { name: ObjectName([Ident { value: "nexmark", quote_style: None }, Ident { value: "split", quote_style: None }, Ident { value: "num", quote_style: None }]), value: SingleQuotedString("12") }, SqlOption { name: ObjectName([Ident { value: "nexmark", quote_style: None }, Ident { value: "min", quote_style: None }, Ident { value: "event", quote_style: None }, Ident { value: "gap", quote_style: None }, Ident { value: "in", quote_style: None }, Ident { value: "ns", quote_style: None }]), value: SingleQuotedString("0") }]), source_schema: V2(ConnectorSchema { format: Native, row_encode: Native, row_options: [] }), source_watermarks: [SourceWatermark { column: Ident { value: "auction", quote_style: None }, expr: BinaryOp { left: Identifier(Ident { value: "auction", quote_style: None }), op: Minus, right: Value(Number("1")) } }], include_column_options: [] } }' + formatted_ast: 'CreateSource { stmt: CreateSourceStatement { if_not_exists: false, columns: [ColumnDef { name: Ident { value: "auction", quote_style: None }, data_type: Some(Int), collation: None, options: [] }, ColumnDef { name: Ident { value: "bidder", quote_style: None }, data_type: Some(Int), collation: None, options: [] }, ColumnDef { name: Ident { value: "price", quote_style: None }, data_type: Some(Int), collation: None, options: [] }, ColumnDef { name: Ident { value: "date_time", quote_style: Some(''"'') }, data_type: Some(Timestamp(false)), collation: None, options: [] }], wildcard_idx: None, constraints: [], source_name: ObjectName([Ident { value: "bid", quote_style: None }]), with_properties: WithProperties([SqlOption { name: ObjectName([Ident { value: "connector", quote_style: None }]), value: SingleQuotedString("nexmark") }, SqlOption { name: ObjectName([Ident { value: "nexmark", quote_style: None }, Ident { value: "table", quote_style: None }, Ident { value: "type", quote_style: None }]), value: SingleQuotedString("Bid") }, SqlOption { name: ObjectName([Ident { value: "nexmark", quote_style: None }, Ident { value: "split", quote_style: None }, Ident { value: "num", quote_style: None }]), value: SingleQuotedString("12") }, SqlOption { name: ObjectName([Ident { value: "nexmark", quote_style: None }, Ident { value: "min", quote_style: None }, Ident { value: "event", quote_style: None }, Ident { value: "gap", quote_style: None }, Ident { value: "in", quote_style: None }, Ident { value: "ns", quote_style: None }]), value: SingleQuotedString("0") }]), source_schema: V2(ConnectorSchema { format: Native, row_encode: Native, row_options: [] }), source_watermarks: [SourceWatermark { column: Ident { value: "auction", quote_style: None }, expr: BinaryOp { left: Identifier(Ident { value: "auction", quote_style: None }), op: Minus, right: Value(Number("1")) } }], include_column_options: [] } }' - input: CREATE TABLE T (v1 INT, v2 STRUCT) formatted_sql: CREATE TABLE T (v1 INT, v2 STRUCT) - input: CREATE TABLE T (v1 INT, v2 STRUCT>) diff --git a/src/storage/compactor/src/server.rs b/src/storage/compactor/src/server.rs index d93e0b46ade9..6d2c01527ceb 100644 --- a/src/storage/compactor/src/server.rs +++ b/src/storage/compactor/src/server.rs @@ -304,12 +304,12 @@ pub async fn compactor_serve( _ = tokio::signal::ctrl_c() => {}, _ = &mut shutdown_recv => { for (join_handle, shutdown_sender) in sub_tasks { - if let Err(err) = shutdown_sender.send(()) { - tracing::warn!("Failed to send shutdown: {:?}", err); + if shutdown_sender.send(()).is_err() { + tracing::warn!("Failed to send shutdown"); continue; } - if let Err(err) = join_handle.await { - tracing::warn!("Failed to join shutdown: {:?}", err); + if join_handle.await.is_err() { + tracing::warn!("Failed to join shutdown"); } } }, @@ -414,12 +414,12 @@ pub async fn shared_compactor_serve( tokio::select! { _ = tokio::signal::ctrl_c() => {}, _ = &mut shutdown_recv => { - if let Err(err) = shutdown_sender.send(()) { - tracing::warn!("Failed to send shutdown: {:?}", err); - } - if let Err(err) = join_handle.await { - tracing::warn!("Failed to join shutdown: {:?}", err); - } + if shutdown_sender.send(()).is_err() { + tracing::warn!("Failed to send shutdown"); + } + if join_handle.await.is_err() { + tracing::warn!("Failed to join shutdown"); + } }, } }, diff --git a/src/storage/hummock_sdk/src/lib.rs b/src/storage/hummock_sdk/src/lib.rs index 0048a60d1d96..ad5fe438e94f 100644 --- a/src/storage/hummock_sdk/src/lib.rs +++ b/src/storage/hummock_sdk/src/lib.rs @@ -264,11 +264,16 @@ pub fn can_concat(ssts: &[SstableInfo]) -> bool { const CHECKPOINT_DIR: &str = "checkpoint"; const CHECKPOINT_NAME: &str = "0"; +const ARCHIVE_DIR: &str = "archive"; pub fn version_checkpoint_path(root_dir: &str) -> String { format!("{}/{}/{}", root_dir, CHECKPOINT_DIR, CHECKPOINT_NAME) } +pub fn version_archive_dir(root_dir: &str) -> String { + format!("{}/{}", root_dir, ARCHIVE_DIR) +} + pub fn version_checkpoint_dir(checkpoint_path: &str) -> String { checkpoint_path.trim_end_matches(|c| c != '/').to_string() } diff --git a/src/storage/hummock_test/src/bin/replay/main.rs b/src/storage/hummock_test/src/bin/replay/main.rs index e8637de48773..d5190532f2d9 100644 --- a/src/storage/hummock_test/src/bin/replay/main.rs +++ b/src/storage/hummock_test/src/bin/replay/main.rs @@ -16,6 +16,9 @@ #![feature(coroutines)] #![feature(stmt_expr_attributes)] #![feature(proc_macro_hygiene)] +#![feature(register_tool)] +#![register_tool(rw)] +#![allow(rw::format_error)] // test code #[macro_use] mod replay_impl; diff --git a/src/storage/hummock_test/src/hummock_storage_tests.rs b/src/storage/hummock_test/src/hummock_storage_tests.rs index a65821d5fefa..80e2d509aca6 100644 --- a/src/storage/hummock_test/src/hummock_storage_tests.rs +++ b/src/storage/hummock_test/src/hummock_storage_tests.rs @@ -1910,13 +1910,16 @@ async fn test_table_watermark() { local.flush(vec![]).await.unwrap(); local.seal_current_epoch( epoch2, - SealCurrentEpochOptions::new( - vec![VnodeWatermark::new( - Arc::new(vnode_bitmap), - gen_inner_key(watermark1), - )], - WatermarkDirection::Ascending, - ), + SealCurrentEpochOptions { + table_watermarks: Some(( + WatermarkDirection::Ascending, + vec![VnodeWatermark::new( + Arc::new(vnode_bitmap), + gen_inner_key(watermark1), + )], + )), + switch_op_consistency_level: None, + }, ); } @@ -2009,7 +2012,13 @@ async fn test_table_watermark() { local.insert(key, value, None).unwrap(); } local.flush(vec![]).await.unwrap(); - local.seal_current_epoch(epoch3, SealCurrentEpochOptions::no_watermark()); + local.seal_current_epoch( + epoch3, + SealCurrentEpochOptions { + table_watermarks: None, + switch_op_consistency_level: None, + }, + ); } let indexes_after_epoch2 = || gen_range().filter(|index| index % 3 == 0 || index % 3 == 1); @@ -2250,13 +2259,16 @@ async fn test_table_watermark() { // regress watermark local.seal_current_epoch( epoch4, - SealCurrentEpochOptions::new( - vec![VnodeWatermark::new( - Arc::new(vnode_bitmap), - gen_inner_key(5), - )], - WatermarkDirection::Ascending, - ), + SealCurrentEpochOptions { + table_watermarks: Some(( + WatermarkDirection::Ascending, + vec![VnodeWatermark::new( + Arc::new(vnode_bitmap), + gen_inner_key(5), + )], + )), + switch_op_consistency_level: None, + }, ); } diff --git a/src/storage/hummock_test/src/state_store_tests.rs b/src/storage/hummock_test/src/state_store_tests.rs index d46831b8d30c..bb23ec298f8e 100644 --- a/src/storage/hummock_test/src/state_store_tests.rs +++ b/src/storage/hummock_test/src/state_store_tests.rs @@ -534,7 +534,7 @@ async fn test_reload_storage() { let anchor = gen_key_from_str(VirtualNode::ZERO, "aa"); // First batch inserts the anchor and others. - let mut batch1 = vec![ + let mut batch1 = [ (anchor.clone(), StorageValue::new_put("111")), ( gen_key_from_str(VirtualNode::ZERO, "bb"), @@ -546,7 +546,7 @@ async fn test_reload_storage() { batch1.sort_by(|(k1, _), (k2, _)| k1.cmp(k2)); // Second batch modifies the anchor. - let mut batch2 = vec![ + let mut batch2 = [ ( gen_key_from_str(VirtualNode::ZERO, "cc"), StorageValue::new_put("333"), diff --git a/src/storage/hummock_trace/src/opts.rs b/src/storage/hummock_trace/src/opts.rs index 54a1ca0340ec..241a8e26769d 100644 --- a/src/storage/hummock_trace/src/opts.rs +++ b/src/storage/hummock_trace/src/opts.rs @@ -228,4 +228,5 @@ pub struct TracedInitOptions { pub struct TracedSealCurrentEpochOptions { // The watermark is serialized into protobuf pub table_watermarks: Option<(bool, Vec>)>, + pub switch_op_consistency_level: Option, } diff --git a/src/storage/src/filter_key_extractor.rs b/src/storage/src/filter_key_extractor.rs index ac3ef6eb02e8..019139eedeed 100644 --- a/src/storage/src/filter_key_extractor.rs +++ b/src/storage/src/filter_key_extractor.rs @@ -27,6 +27,7 @@ use risingwave_hummock_sdk::key::{get_table_id, TABLE_PREFIX_LEN}; use risingwave_pb::catalog::Table; use risingwave_rpc_client::error::{Result as RpcResult, RpcError}; use risingwave_rpc_client::MetaClient; +use thiserror_ext::AsReport; use crate::hummock::{HummockError, HummockResult}; @@ -315,8 +316,8 @@ impl FilterKeyExtractorManagerInner { .await .map_err(|e| { HummockError::other(format!( - "request rpc list_tables for meta failed because {:?}", - e + "request rpc list_tables for meta failed: {}", + e.as_report() )) })?; let mut guard = self.table_id_to_filter_key_extractor.write(); diff --git a/src/storage/src/hummock/backup_reader.rs b/src/storage/src/hummock/backup_reader.rs index d8757e5f5ea1..0b99970f26e7 100644 --- a/src/storage/src/hummock/backup_reader.rs +++ b/src/storage/src/hummock/backup_reader.rs @@ -30,6 +30,7 @@ use risingwave_common::system_param::local_manager::SystemParamsReaderRef; use risingwave_common::system_param::reader::SystemParamsRead; use risingwave_object_store::object::build_remote_object_store; use risingwave_object_store::object::object_metrics::ObjectStoreMetrics; +use thiserror_ext::AsReport; use crate::error::{StorageError, StorageResult}; use crate::hummock::local_version::pinned_version::{PinVersionAction, PinnedVersion}; @@ -131,7 +132,7 @@ impl BackupReader { } if let Err(e) = current_store.0.refresh_manifest().await { // reschedule refresh request - tracing::warn!("failed to refresh backup manifest, will retry. {}", e); + tracing::warn!(error = %e.as_report(), "failed to refresh backup manifest, will retry"); let backup_reader_clone = backup_reader.clone(); tokio::spawn(async move { tokio::time::sleep(Duration::from_secs(60)).await; @@ -155,10 +156,9 @@ impl BackupReader { } pub fn try_refresh_manifest(self: &BackupReaderRef, min_manifest_id: u64) { - let _ = self - .refresh_tx - .send(min_manifest_id) - .inspect_err(|e| tracing::warn!("failed to send refresh_manifest request {}", e)); + let _ = self.refresh_tx.send(min_manifest_id).inspect_err(|_| { + tracing::warn!(min_manifest_id, "failed to send refresh_manifest request") + }); } /// Tries to get a hummock version eligible for querying `epoch`. @@ -198,7 +198,11 @@ impl BackupReader { // TODO: change to v2 let snapshot: meta_snapshot_v1::MetaSnapshotV1 = current_store.0.get(snapshot_id).await.map_err(|e| { - format!("failed to get meta snapshot {}. {}", snapshot_id, e) + format!( + "failed to get meta snapshot {}: {}", + snapshot_id, + e.as_report() + ) })?; let version_holder = build_version_holder(snapshot); let version_clone = version_holder.0.clone(); @@ -238,10 +242,9 @@ impl BackupReader { if let Err(e) = self.set_store(config.clone()).await { // Retry is driven by periodic system params notification. tracing::warn!( - "failed to update backup reader: url={}, dir={}, {:#?}", - config.0, - config.1, - e + url = config.0, dir = config.1, + error = %e.as_report(), + "failed to update backup reader", ); } } diff --git a/src/storage/src/hummock/compactor/compactor_runner.rs b/src/storage/src/hummock/compactor/compactor_runner.rs index 1778e263c54f..b542eb141492 100644 --- a/src/storage/src/hummock/compactor/compactor_runner.rs +++ b/src/storage/src/hummock/compactor/compactor_runner.rs @@ -30,6 +30,7 @@ use risingwave_hummock_sdk::table_stats::{add_table_stats_map, TableStats, Table use risingwave_hummock_sdk::{can_concat, EpochWithGap, HummockEpoch}; use risingwave_pb::hummock::compact_task::{TaskStatus, TaskType}; use risingwave_pb::hummock::{BloomFilterType, CompactTask, LevelType}; +use thiserror_ext::AsReport; use tokio::sync::oneshot::Receiver; use super::iterator::MonitoredCompactorIterator; @@ -327,7 +328,7 @@ pub async fn compact( .await { Err(e) => { - tracing::error!("Failed to fetch filter key extractor tables [{:?}], it may caused by some RPC error {:?}", compact_task.existing_table_ids, e); + tracing::error!(error = %e.as_report(), "Failed to fetch filter key extractor tables [{:?}], it may caused by some RPC error", compact_task.existing_table_ids); let task_status = TaskStatus::ExecuteFailed; return compact_done(compact_task, context.clone(), vec![], task_status); } @@ -407,7 +408,7 @@ pub async fn compact( } } Err(e) => { - tracing::warn!("Failed to generate_splits {:#?}", e); + tracing::warn!(error = %e.as_report(), "Failed to generate_splits"); task_status = TaskStatus::ExecuteFailed; return compact_done(compact_task, context.clone(), vec![], task_status); } @@ -527,9 +528,9 @@ pub async fn compact( Err(e) => { task_status = TaskStatus::ExecuteFailed; tracing::warn!( - "Compaction task {} failed with error: {:#?}", + error = %e.as_report(), + "Compaction task {} failed with error", compact_task.task_id, - e ); } } @@ -597,18 +598,18 @@ pub async fn compact( Some(Ok(Err(e))) => { task_status = TaskStatus::ExecuteFailed; tracing::warn!( - "Compaction task {} failed with error: {:#?}", + error = %e.as_report(), + "Compaction task {} failed with error", compact_task.task_id, - e ); break; } Some(Err(e)) => { task_status = TaskStatus::JoinHandleFailed; tracing::warn!( - "Compaction task {} failed with join handle error: {:#?}", + error = %e.as_report(), + "Compaction task {} failed with join handle error", compact_task.task_id, - e ); break; } diff --git a/src/storage/src/hummock/compactor/mod.rs b/src/storage/src/hummock/compactor/mod.rs index 81ae96c1b415..158b5b33d414 100644 --- a/src/storage/src/hummock/compactor/mod.rs +++ b/src/storage/src/hummock/compactor/mod.rs @@ -22,6 +22,7 @@ use risingwave_pb::hummock::report_compaction_task_request::{ }; use risingwave_pb::hummock::{ReportFullScanTaskRequest, ReportVacuumTaskRequest}; use risingwave_rpc_client::GrpcCompactorProxyClient; +use thiserror_ext::AsReport; use tokio::sync::mpsc; use tonic::Request; @@ -344,8 +345,8 @@ pub fn start_compactor( Err(e) => { tracing::warn!( - "Subscribing to compaction tasks failed with error: {}. Will retry.", - e + error = %e.as_report(), + "Subscribing to compaction tasks failed with error. Will retry.", ); continue 'start_stream; } @@ -385,7 +386,7 @@ pub fn start_compactor( .expect("Clock may have gone backwards") .as_millis() as u64, }) { - tracing::warn!("Failed to report task progress. {e:?}"); + tracing::warn!(error = %e.as_report(), "Failed to report task progress"); // re subscribe stream continue 'start_stream; } @@ -408,7 +409,7 @@ pub fn start_compactor( .expect("Clock may have gone backwards") .as_millis() as u64, }) { - tracing::warn!("Failed to pull task {e:?}"); + tracing::warn!(error = %e.as_report(), "Failed to pull task"); // re subscribe stream continue 'start_stream; @@ -473,26 +474,10 @@ pub fn start_compactor( sstable_object_id_manager.remove_watermark_object_id(tracker_id); }, ); - let enable_check_compaction_result = context.storage_opts.check_compaction_result; - let compact_result = compactor_runner::compact(context.clone(), compact_task, rx, Box::new(sstable_object_id_manager.clone()), filter_key_extractor_manager.clone()).await; - let need_check_task = !compact_result.0.sorted_output_ssts.is_empty() && compact_result.0.task_status() == TaskStatus::Success; - - if enable_check_compaction_result && need_check_task { - match check_compaction_result(&compact_result.0, context.clone()).await { - Err(e) => { - tracing::warn!("Failed to check compaction task {} because: {:?}",compact_result.0.task_id, e); - }, - Ok(true) => (), - Ok(false) => { - panic!("Failed to pass consistency check for result of compaction task:\n{:?}", compact_task_to_string(&compact_result.0)); - } - } - } - - compact_result + compactor_runner::compact(context.clone(), compact_task, rx, Box::new(sstable_object_id_manager.clone()), filter_key_extractor_manager.clone()).await }, Err(err) => { - tracing::warn!("Failed to track pending SST object id. {:#?}", err); + tracing::warn!(error = %err.as_report(), "Failed to track pending SST object id"); let mut compact_task = compact_task; compact_task.set_task_status(TaskStatus::TrackSstObjectIdFailed); (compact_task, HashMap::default()) @@ -500,12 +485,14 @@ pub fn start_compactor( }; shutdown.lock().unwrap().remove(&task_id); + let enable_check_compaction_result = context.storage_opts.check_compaction_result; + let need_check_task = !compact_task.sorted_output_ssts.is_empty() && compact_task.task_status() == TaskStatus::Success; if let Err(e) = request_sender.send(SubscribeCompactionEventRequest { event: Some(RequestEvent::ReportTask( ReportTask { task_id: compact_task.task_id, task_status: compact_task.task_status, - sorted_output_ssts: compact_task.sorted_output_ssts, + sorted_output_ssts: compact_task.sorted_output_ssts.clone(), table_stats_change:to_prost_table_stats_map(table_stats), } )), @@ -514,7 +501,19 @@ pub fn start_compactor( .expect("Clock may have gone backwards") .as_millis() as u64, }) { - tracing::warn!("Failed to report task {task_id:?} . {e:?}"); + tracing::warn!(error = %e.as_report(), "Failed to report task {task_id:?}"); + if enable_check_compaction_result && need_check_task { + match check_compaction_result(&compact_task, context.clone()).await { + Err(e) => { + tracing::warn!(error = %e.as_report(), "Failed to check compaction task {}",compact_task.task_id); + }, + Ok(true) => (), + Ok(false) => { + panic!("Failed to pass consistency check for result of compaction task:\n{:?}", compact_task_to_string(&compact_task)); + } + } + } + } } ResponseEvent::VacuumTask(vacuum_task) => { @@ -527,7 +526,7 @@ pub fn start_compactor( Vacuum::report_vacuum_task(vacuum_task, meta_client).await; } Err(e) => { - tracing::warn!("Failed to vacuum task: {:#?}", e) + tracing::warn!(error = %e.as_report(), "Failed to vacuum task") } } } @@ -537,7 +536,7 @@ pub fn start_compactor( Vacuum::report_full_scan_task(object_ids, total_object_count, total_object_size, meta_client).await; } Err(e) => { - tracing::warn!("Failed to iter object: {:#?}", e); + tracing::warn!(error = %e.as_report(), "Failed to iter object"); } } } @@ -622,7 +621,7 @@ pub fn start_shared_compactor( )), }; if let Err(e) = report_heartbeat_client.report_compaction_task(report_compaction_task_request).await{ - tracing::warn!("Failed to report heartbeat {:#?}", e); + tracing::warn!(error = %e.as_report(), "Failed to report heartbeat"); } continue } @@ -698,7 +697,7 @@ pub fn start_shared_compactor( if enable_check_compaction_result && need_check_task { match check_compaction_result(&compact_task, context.clone()).await { Err(e) => { - tracing::warn!("Failed to check compaction task {} because: {:?}",compact_task.task_id, e); + tracing::warn!(error = %e.as_report(), "Failed to check compaction task {}", compact_task.task_id); }, Ok(true) => (), Ok(false) => { @@ -707,7 +706,7 @@ pub fn start_shared_compactor( } } } - Err(e) => tracing::warn!("Failed to report task {task_id:?} . {e:?}"), + Err(e) => tracing::warn!(error = %e.as_report(), "Failed to report task {task_id:?}"), } } @@ -724,11 +723,11 @@ pub fn start_shared_compactor( }; match cloned_grpc_proxy_client.report_vacuum_task(report_vacuum_task_request).await { Ok(_) => tracing::info!("Finished vacuuming SSTs"), - Err(e) => tracing::warn!("Failed to report vacuum task: {:#?}", e), + Err(e) => tracing::warn!(error = %e.as_report(), "Failed to report vacuum task"), } } Err(e) => { - tracing::warn!("Failed to vacuum task: {:#?}", e) + tracing::warn!(error = %e.as_report(), "Failed to vacuum task") } } } @@ -747,11 +746,11 @@ pub fn start_shared_compactor( .await { Ok(_) => tracing::info!("Finished full scan SSTs"), - Err(e) => tracing::warn!("Failed to report full scan task: {:#?}", e), + Err(e) => tracing::warn!(error = %e.as_report(), "Failed to report full scan task"), } } Err(e) => { - tracing::warn!("Failed to iter object: {:#?}", e); + tracing::warn!(error = %e.as_report(), "Failed to iter object"); } } } diff --git a/src/storage/src/hummock/compactor/shared_buffer_compact.rs b/src/storage/src/hummock/compactor/shared_buffer_compact.rs index 73f5d154d79f..8b94364fb2f8 100644 --- a/src/storage/src/hummock/compactor/shared_buffer_compact.rs +++ b/src/storage/src/hummock/compactor/shared_buffer_compact.rs @@ -28,6 +28,7 @@ use risingwave_hummock_sdk::key::{FullKey, PointRange, TableKey, UserKey}; use risingwave_hummock_sdk::key_range::KeyRange; use risingwave_hummock_sdk::{CompactionGroupId, EpochWithGap, HummockEpoch, LocalSstableInfo}; use risingwave_pb::hummock::compact_task; +use thiserror_ext::AsReport; use tracing::error; use crate::filter_key_extractor::{FilterKeyExtractorImpl, FilterKeyExtractorManager}; @@ -203,14 +204,14 @@ async fn compact_shared_buffer( } Ok(Err(e)) => { compact_success = false; - tracing::warn!("Shared Buffer Compaction failed with error: {:#?}", e); + tracing::warn!(error = %e.as_report(), "Shared Buffer Compaction failed with error"); err = Some(e); } Err(e) => { compact_success = false; tracing::warn!( - "Shared Buffer Compaction failed with future error: {:#?}", - e + error = %e.as_report(), + "Shared Buffer Compaction failed with future error", ); err = Some(HummockError::compaction_executor( "failed while execute in tokio", @@ -265,7 +266,7 @@ async fn compact_shared_buffer( .await { Err(e) => { - tracing::warn!("Failed check flush result of memtable because of {:?}", e); + tracing::warn!(error = %e.as_report(), "Failed check flush result of memtable"); } Ok(true) => (), Ok(false) => { diff --git a/src/storage/src/hummock/error.rs b/src/storage/src/hummock/error.rs index e3dcf712f0e5..d1b662ce148b 100644 --- a/src/storage/src/hummock/error.rs +++ b/src/storage/src/hummock/error.rs @@ -14,6 +14,7 @@ use risingwave_object_store::object::ObjectError; use thiserror::Error; +use thiserror_ext::AsReport; use tokio::sync::oneshot::error::RecvError; // TODO(error-handling): should prefer use error types than strings. @@ -152,7 +153,7 @@ impl HummockError { impl From for HummockError { fn from(error: prost::DecodeError) -> Self { - HummockErrorInner::DecodeError(error.to_string()).into() + HummockErrorInner::DecodeError(error.to_report_string()).into() } } diff --git a/src/storage/src/hummock/event_handler/hummock_event_handler.rs b/src/storage/src/hummock/event_handler/hummock_event_handler.rs index 1fefdb0f5ea5..9d6e72dc488d 100644 --- a/src/storage/src/hummock/event_handler/hummock_event_handler.rs +++ b/src/storage/src/hummock/event_handler/hummock_event_handler.rs @@ -22,6 +22,7 @@ use await_tree::InstrumentAwait; use parking_lot::RwLock; use prometheus::core::{AtomicU64, GenericGauge}; use risingwave_hummock_sdk::{HummockEpoch, LocalSstableInfo}; +use thiserror_ext::AsReport; use tokio::spawn; use tokio::sync::{mpsc, oneshot}; use tracing::{debug, error, info, trace, warn}; @@ -140,7 +141,7 @@ async fn flush_imms( .add_watermark_object_id(Some(*epoch)) .await .inspect_err(|e| { - error!("unable to set watermark sst id. epoch: {}, {:?}", epoch, e); + error!(epoch, error = %e.as_report(), "unable to set watermark sst id"); }); } compact( @@ -707,6 +708,9 @@ fn to_sync_result(result: &HummockResult) -> HummockResult Err(HummockError::other(format!("sync task failed for {:?}", e))), + Err(e) => Err(HummockError::other(format!( + "sync task failed: {}", + e.as_report() + ))), } } diff --git a/src/storage/src/hummock/event_handler/mod.rs b/src/storage/src/hummock/event_handler/mod.rs index 42b636fa8a1b..ffce8c622fbd 100644 --- a/src/storage/src/hummock/event_handler/mod.rs +++ b/src/storage/src/hummock/event_handler/mod.rs @@ -18,6 +18,7 @@ use std::sync::Arc; use parking_lot::RwLock; use risingwave_common::catalog::TableId; use risingwave_hummock_sdk::HummockEpoch; +use thiserror_ext::AsReport; use tokio::sync::{mpsc, oneshot}; use crate::hummock::shared_buffer::shared_buffer_batch::SharedBufferBatch; @@ -192,10 +193,10 @@ impl Drop for LocalInstanceGuard { }) .unwrap_or_else(|err| { tracing::error!( - "LocalInstanceGuard table_id {:?} instance_id {} Drop SendError {:?}", - self.table_id, - self.instance_id, - err + error = %err.as_report(), + table_id = %self.table_id, + instance_id = self.instance_id, + "LocalInstanceGuard Drop SendError", ) }) } diff --git a/src/storage/src/hummock/event_handler/refiller.rs b/src/storage/src/hummock/event_handler/refiller.rs index d124d067ac67..5e7a285d7070 100644 --- a/src/storage/src/hummock/event_handler/refiller.rs +++ b/src/storage/src/hummock/event_handler/refiller.rs @@ -33,6 +33,7 @@ use risingwave_common::monitor::GLOBAL_METRICS_REGISTRY; use risingwave_common::util::iter_util::ZipEqFast; use risingwave_hummock_sdk::compaction_group::hummock_version_ext::SstDeltaInfo; use risingwave_hummock_sdk::{HummockSstableObjectId, KeyComparator}; +use thiserror_ext::AsReport; use tokio::sync::Semaphore; use tokio::task::JoinHandle; @@ -304,7 +305,7 @@ impl CacheRefillTask { let holders = match Self::meta_cache_refill(&context, delta).await { Ok(holders) => holders, Err(e) => { - tracing::warn!("meta cache refill error: {:?}", e); + tracing::warn!(error = %e.as_report(), "meta cache refill error"); return; } }; @@ -514,7 +515,9 @@ impl CacheRefillTask { }); let parent_ssts = match try_join_all(futures).await { Ok(parent_ssts) => parent_ssts.into_iter().flatten().collect_vec(), - Err(e) => return tracing::error!("get old meta from cache error: {}", e), + Err(e) => { + return tracing::error!(error = %e.as_report(), "get old meta from cache error") + } }; let units = Self::get_units_to_refill_by_inheritance(context, &holders, &parent_ssts); @@ -525,7 +528,7 @@ impl CacheRefillTask { async move { let sst = ssts.get(&unit.sst_obj_id).unwrap(); if let Err(e) = Self::data_file_cache_refill_unit(context, sst, unit).await { - tracing::error!("data file cache unit refill error: {}", e); + tracing::error!(error = %e.as_report(), "data file cache unit refill error"); } } }); diff --git a/src/storage/src/hummock/event_handler/uploader.rs b/src/storage/src/hummock/event_handler/uploader.rs index af64d62acf16..12c775165a75 100644 --- a/src/storage/src/hummock/event_handler/uploader.rs +++ b/src/storage/src/hummock/event_handler/uploader.rs @@ -35,6 +35,7 @@ use risingwave_hummock_sdk::table_watermark::{ TableWatermarks, VnodeWatermark, WatermarkDirection, }; use risingwave_hummock_sdk::{CompactionGroupId, HummockEpoch, LocalSstableInfo}; +use thiserror_ext::AsReport; use tokio::task::JoinHandle; use tracing::{debug, error, info}; @@ -137,8 +138,8 @@ impl MergingImmTask { Poll::Ready(match ready!(self.join_handle.poll_unpin(cx)) { Ok(task_result) => task_result, Err(err) => Err(HummockError::other(format!( - "fail to join imm merge join handle: {:?}", - err + "fail to join imm merge join handle: {}", + err.as_report() ))), }) } @@ -230,8 +231,8 @@ impl UploadingTask { }), Err(err) => Err(HummockError::other(format!( - "fail to join upload join handle: {:?}", - err + "fail to join upload join handle: {}", + err.as_report() ))), }) } @@ -244,8 +245,9 @@ impl UploadingTask { Ok(sstables) => return Poll::Ready(sstables), Err(e) => { error!( - "a flush task {:?} failed, start retry. Task info: {:?}", - self.task_info, e + error = %e.as_report(), + task_info = ?self.task_info, + "a flush task failed, start retry", ); self.join_handle = (self.spawn_upload_task)(self.payload.clone(), self.task_info.clone()); diff --git a/src/storage/src/hummock/local_version/pinned_version.rs b/src/storage/src/hummock/local_version/pinned_version.rs index 419bcfc6f515..46ef8edc442b 100644 --- a/src/storage/src/hummock/local_version/pinned_version.rs +++ b/src/storage/src/hummock/local_version/pinned_version.rs @@ -25,6 +25,7 @@ use risingwave_hummock_sdk::{CompactionGroupId, HummockVersionId, INVALID_VERSIO use risingwave_pb::hummock::hummock_version::Levels; use risingwave_pb::hummock::PbLevel; use risingwave_rpc_client::HummockMetaClient; +use thiserror_ext::AsReport; use tokio::sync::mpsc::error::TryRecvError; use tokio::sync::mpsc::{UnboundedReceiver, UnboundedSender}; use tokio_retry::strategy::jitter; @@ -274,8 +275,8 @@ pub(crate) async fn start_pinned_version_worker( Err(err) => { let retry_after = retry_backoff.next().unwrap_or(max_retry_interval); tracing::warn!( - "Failed to unpin version {:?}. Will retry after about {} milliseconds", - err, + error = %err.as_report(), + "Failed to unpin version. Will retry after about {} milliseconds", retry_after.as_millis() ); tokio::time::sleep(retry_after).await; diff --git a/src/storage/src/hummock/observer_manager.rs b/src/storage/src/hummock/observer_manager.rs index 2921eb064de9..e96d575ce599 100644 --- a/src/storage/src/hummock/observer_manager.rs +++ b/src/storage/src/hummock/observer_manager.rs @@ -81,7 +81,7 @@ impl ObserverState for HummockObserverNode { ), )) .inspect_err(|e| { - tracing::error!("unable to send version delta: {:?}", e); + tracing::error!(event = ?e.0, "unable to send version delta"); }); } @@ -131,7 +131,7 @@ impl ObserverState for HummockObserverNode { )), )) .inspect_err(|e| { - tracing::error!("unable to send full version: {:?}", e); + tracing::error!(event = ?e.0, "unable to send full version"); }); let snapshot_version = snapshot.version.unwrap(); self.version = snapshot_version.catalog_version; diff --git a/src/storage/src/hummock/shared_buffer/shared_buffer_batch.rs b/src/storage/src/hummock/shared_buffer/shared_buffer_batch.rs index 840578ed7415..00facd7bec10 100644 --- a/src/storage/src/hummock/shared_buffer/shared_buffer_batch.rs +++ b/src/storage/src/hummock/shared_buffer/shared_buffer_batch.rs @@ -1327,7 +1327,7 @@ mod tests { table_id, ); - let batch_items = vec![ + let batch_items = [ shared_buffer_items1, shared_buffer_items2, shared_buffer_items3, diff --git a/src/storage/src/hummock/sstable/forward_sstable_iterator.rs b/src/storage/src/hummock/sstable/forward_sstable_iterator.rs index 729ffb59303c..7acf9070ae99 100644 --- a/src/storage/src/hummock/sstable/forward_sstable_iterator.rs +++ b/src/storage/src/hummock/sstable/forward_sstable_iterator.rs @@ -18,6 +18,7 @@ use std::sync::Arc; use await_tree::InstrumentAwait; use risingwave_hummock_sdk::key::FullKey; +use thiserror_ext::AsReport; use super::super::{HummockResult, HummockValue}; use crate::hummock::block_stream::BlockStream; @@ -131,14 +132,20 @@ impl SstableIterator { if self.preload_stream.is_none() && idx + 1 < self.preload_end_block_idx { match self .sstable_store - .prefetch_blocks(self.sst.value(), idx, self.preload_end_block_idx, - self.options.cache_policy, - &mut self.stats, + .prefetch_blocks( + self.sst.value(), + idx, + self.preload_end_block_idx, + self.options.cache_policy, + &mut self.stats, ) .verbose_instrument_await("prefetch_blocks") - .await { + .await + { Ok(preload_stream) => self.preload_stream = Some(preload_stream), - Err(e) => tracing::warn!("failed to create stream for prefetch data because of {:?}, fall back to block get.", e), + Err(e) => { + tracing::warn!(error = %e.as_report(), "failed to create stream for prefetch data, fall back to block get") + } } } @@ -177,7 +184,7 @@ impl SstableIterator { } if self.preload_stream.is_none() && idx + 1 < self.preload_end_block_idx { if let Err(e) = ret { - tracing::warn!("recreate stream because the connection to remote storage has closed, reason: {:?}", e); + tracing::warn!(error = %e.as_report(), "recreate stream because the connection to remote storage has closed"); if self.preload_retry_times >= self.options.max_preload_retry_times { break; } @@ -200,7 +207,7 @@ impl SstableIterator { self.preload_stream = Some(stream); } Err(e) => { - tracing::warn!("failed to recreate stream meet IO error: {:?}", e); + tracing::warn!(error = %e.as_report(), "failed to recreate stream meet IO error"); break; } } diff --git a/src/storage/src/hummock/sstable/sstable_object_id_manager.rs b/src/storage/src/hummock/sstable/sstable_object_id_manager.rs index 21fe86dcaad7..c32867e45d1e 100644 --- a/src/storage/src/hummock/sstable/sstable_object_id_manager.rs +++ b/src/storage/src/hummock/sstable/sstable_object_id_manager.rs @@ -26,6 +26,7 @@ use risingwave_pb::hummock::GetNewSstIdsRequest; use risingwave_pb::meta::heartbeat_request::extra_info::Info; use risingwave_rpc_client::{ExtraInfoSource, GrpcCompactorProxyClient, HummockMetaClient}; use sync_point::sync_point; +use thiserror_ext::AsReport; use tokio::sync::oneshot; use crate::hummock::{HummockError, HummockResult}; @@ -284,8 +285,8 @@ impl GetObjectId for SharedComapctorObjectIdManager { Ok(start_id) } Err(e) => Err(HummockError::other(format!( - "Fail to get new sst id, {}", - e + "Fail to get new sst id: {}", + e.as_report() ))), } } diff --git a/src/storage/src/hummock/sstable_store.rs b/src/storage/src/hummock/sstable_store.rs index 9918274b8ee9..06bb5ee05486 100644 --- a/src/storage/src/hummock/sstable_store.rs +++ b/src/storage/src/hummock/sstable_store.rs @@ -33,6 +33,7 @@ use risingwave_object_store::object::{ ObjectError, ObjectMetadataIter, ObjectStoreRef, ObjectStreamingUploader, }; use risingwave_pb::hummock::SstableInfo; +use thiserror_ext::AsReport; use tokio::task::JoinHandle; use tokio::time::Instant; use zstd::zstd_safe::WriteBuf; @@ -286,7 +287,7 @@ impl SstableStore { pub fn delete_cache(&self, object_id: HummockSstableObjectId) { self.meta_cache.erase(object_id, &object_id); if let Err(e) = self.meta_file_cache.remove(&object_id) { - tracing::warn!("meta file cache remove error: {}", e); + tracing::warn!(error = %e.as_report(), "meta file cache remove error"); } } @@ -561,7 +562,7 @@ impl SstableStore { pub fn clear_block_cache(&self) { self.block_cache.clear(); if let Err(e) = self.data_file_cache.clear() { - tracing::warn!("data file cache clear error: {}", e); + tracing::warn!(error = %e.as_report(), "data file cache clear error"); } } @@ -569,7 +570,7 @@ impl SstableStore { pub fn clear_meta_cache(&self) { self.meta_cache.clear(); if let Err(e) = self.meta_file_cache.clear() { - tracing::warn!("meta file cache clear error: {}", e); + tracing::warn!(error = %e.as_report(), "meta file cache clear error"); } } @@ -714,8 +715,8 @@ impl SstableStore { Ok(Err(e)) => return Err(HummockError::from(e)), Err(e) => { return Err(HummockError::other(format!( - "failed to get result, this read request may be canceled: {:?}", - e + "failed to get result, this read request may be canceled: {}", + e.as_report() ))) } }; diff --git a/src/storage/src/hummock/store/hummock_storage.rs b/src/storage/src/hummock/store/hummock_storage.rs index 96049bd90d14..d1308e32a9ae 100644 --- a/src/storage/src/hummock/store/hummock_storage.rs +++ b/src/storage/src/hummock/store/hummock_storage.rs @@ -32,7 +32,7 @@ use risingwave_pb::hummock::SstableInfo; use risingwave_rpc_client::HummockMetaClient; use tokio::sync::mpsc::{unbounded_channel, UnboundedSender}; use tokio::sync::oneshot; -use tracing::log::error; +use tracing::error; use super::local_hummock_storage::LocalHummockStorage; use super::version::{CommittedVersion, HummockVersionReader}; @@ -69,7 +69,7 @@ impl Drop for HummockStorageShutdownGuard { let _ = self .shutdown_sender .send(HummockEvent::Shutdown) - .inspect_err(|e| error!("unable to send shutdown: {:?}", e)); + .inspect_err(|e| error!(event = ?e.0, "unable to send shutdown")); } } diff --git a/src/storage/src/hummock/store/local_hummock_storage.rs b/src/storage/src/hummock/store/local_hummock_storage.rs index 4de14e3e0de1..2bddff818d9f 100644 --- a/src/storage/src/hummock/store/local_hummock_storage.rs +++ b/src/storage/src/hummock/store/local_hummock_storage.rs @@ -401,6 +401,10 @@ impl LocalStateStore for LocalHummockStorage { fn seal_current_epoch(&mut self, next_epoch: u64, mut opts: SealCurrentEpochOptions) { assert!(!self.is_dirty()); + if let Some(new_level) = &opts.switch_op_consistency_level { + self.mem_table.op_consistency_level.update(new_level); + self.op_consistency_level.update(new_level); + } let prev_epoch = self .epoch .replace(next_epoch) diff --git a/src/storage/src/hummock/utils.rs b/src/storage/src/hummock/utils.rs index 0b4700500f40..f40e6b1aa043 100644 --- a/src/storage/src/hummock/utils.rs +++ b/src/storage/src/hummock/utils.rs @@ -578,7 +578,7 @@ pub(crate) async fn wait_for_epoch( } loop { match tokio::time::timeout(Duration::from_secs(30), receiver.changed()).await { - Err(elapsed) => { + Err(_) => { // The reason that we need to retry here is batch scan in // chain/rearrange_chain is waiting for an // uncommitted epoch carried by the CreateMV barrier, which @@ -589,9 +589,8 @@ pub(crate) async fn wait_for_epoch( // CN with the same distribution as the upstream MV. // See #3845 for more details. tracing::warn!( - "wait_epoch {:?} timeout when waiting for version update elapsed {:?}s", - wait_epoch, - elapsed + epoch = wait_epoch, + "wait_epoch timeout when waiting for version update", ); continue; } diff --git a/src/storage/src/hummock/vacuum.rs b/src/storage/src/hummock/vacuum.rs index 0a5bbf144541..5242a6eae078 100644 --- a/src/storage/src/hummock/vacuum.rs +++ b/src/storage/src/hummock/vacuum.rs @@ -21,6 +21,7 @@ use risingwave_hummock_sdk::HummockSstableObjectId; use risingwave_object_store::object::ObjectMetadataIter; use risingwave_pb::hummock::{FullScanTask, VacuumTask}; use risingwave_rpc_client::HummockMetaClient; +use thiserror_ext::AsReport; use super::{HummockError, HummockResult}; use crate::hummock::{SstableStore, SstableStoreRef}; @@ -48,7 +49,7 @@ impl Vacuum { tracing::info!("Finished vacuuming SSTs"); } Err(e) => { - tracing::warn!("Failed to report vacuum task: {:#?}", e); + tracing::warn!(error = %e.as_report(), "Failed to report vacuum task"); return false; } } @@ -128,7 +129,7 @@ impl Vacuum { tracing::info!("Finished full scan SSTs"); } Err(e) => { - tracing::warn!("Failed to report full scan task: {:#?}", e); + tracing::warn!(error = %e.as_report(), "Failed to report full scan task"); return false; } } diff --git a/src/storage/src/mem_table.rs b/src/storage/src/mem_table.rs index 259122e918f2..de57a3f98f6e 100644 --- a/src/storage/src/mem_table.rs +++ b/src/storage/src/mem_table.rs @@ -29,6 +29,7 @@ use risingwave_common::hash::VnodeBitmapExt; use risingwave_hummock_sdk::key::{prefixed_range_with_vnode, FullKey, TableKey, TableKeyRange}; use risingwave_hummock_sdk::table_watermark::WatermarkDirection; use thiserror::Error; +use thiserror_ext::AsReport; use tracing::error; use crate::error::{StorageError, StorageResult}; @@ -609,6 +610,9 @@ impl LocalStateStore for MemtableLocalState fn seal_current_epoch(&mut self, next_epoch: u64, opts: SealCurrentEpochOptions) { assert!(!self.is_dirty()); + if let Some(value_checker) = opts.switch_op_consistency_level { + self.mem_table.op_consistency_level.update(&value_checker); + } let prev_epoch = self .epoch .replace(next_epoch) @@ -649,7 +653,7 @@ impl LocalStateStore for MemtableLocalState table_id: self.table_id, }, ) { - error!(err = ?e, "failed to write delete ranges of table watermark"); + error!(error = %e.as_report(), "failed to write delete ranges of table watermark"); } } } diff --git a/src/storage/src/monitor/hummock_state_store_metrics.rs b/src/storage/src/monitor/hummock_state_store_metrics.rs index 45b72bd1eee8..6072d2676f49 100644 --- a/src/storage/src/monitor/hummock_state_store_metrics.rs +++ b/src/storage/src/monitor/hummock_state_store_metrics.rs @@ -29,6 +29,7 @@ use risingwave_common::monitor::GLOBAL_METRICS_REGISTRY; use risingwave_common::{ register_guarded_histogram_vec_with_registry, register_guarded_int_counter_vec_with_registry, }; +use thiserror_ext::AsReport; use tracing::warn; /// [`HummockStateStoreMetrics`] stores the performance and IO metrics of `XXXStore` such as @@ -516,8 +517,8 @@ pub fn monitor_cache(memory_collector: Arc) { let collector = Box::new(StateStoreCollector::new(memory_collector)); if let Err(e) = GLOBAL_METRICS_REGISTRY.register(collector) { warn!( - "unable to monitor cache. May have been registered if in all-in-one deployment: {:?}", - e + "unable to monitor cache. May have been registered if in all-in-one deployment: {}", + e.as_report() ); } } diff --git a/src/storage/src/monitor/monitored_store.rs b/src/storage/src/monitor/monitored_store.rs index facea1f09090..404301516344 100644 --- a/src/storage/src/monitor/monitored_store.rs +++ b/src/storage/src/monitor/monitored_store.rs @@ -22,6 +22,7 @@ use futures_async_stream::try_stream; use risingwave_common::catalog::TableId; use risingwave_hummock_sdk::key::{TableKey, TableKeyRange}; use risingwave_hummock_sdk::HummockReadEpoch; +use thiserror_ext::AsReport; use tokio::time::Instant; use tracing::error; @@ -99,7 +100,7 @@ impl MonitoredStateStore { let iter_stream = iter_stream_future .verbose_instrument_await("store_create_iter") .await - .inspect_err(|e| error!("Failed in iter: {:?}", e))?; + .inspect_err(|e| error!(error = %e.as_report(), "Failed in iter"))?; self.storage_metrics .iter_init_duration @@ -153,7 +154,7 @@ impl MonitoredStateStore { .verbose_instrument_await("store_get") .instrument(tracing::trace_span!("store_get")) .await - .inspect_err(|e| error!("Failed in get: {:?}", e))?; + .inspect_err(|e| error!(error = %e.as_report(), "Failed in get"))?; timer.observe_duration(); @@ -303,7 +304,7 @@ impl StateStore for MonitoredStateStore { self.inner .try_wait_epoch(epoch) .verbose_instrument_await("store_wait_epoch") - .inspect_err(|e| error!("Failed in wait_epoch: {:?}", e)) + .inspect_err(|e| error!(error = %e.as_report(), "Failed in wait_epoch")) } async fn sync(&self, epoch: u64) -> StorageResult { @@ -315,7 +316,7 @@ impl StateStore for MonitoredStateStore { .sync(epoch) .instrument_await("store_sync") .await - .inspect_err(|e| error!("Failed in sync: {:?}", e))?; + .inspect_err(|e| error!(error = %e.as_report(), "Failed in sync"))?; timer.observe_duration(); if sync_result.sync_size != 0 { self.storage_metrics @@ -340,7 +341,7 @@ impl StateStore for MonitoredStateStore { self.inner .clear_shared_buffer() .verbose_instrument_await("store_clear_shared_buffer") - .inspect_err(|e| error!("Failed in clear_shared_buffer: {:?}", e)) + .inspect_err(|e| error!(error = %e.as_report(), "Failed in clear_shared_buffer")) } async fn new_local(&self, option: NewLocalOptions) -> Self::Local { @@ -393,7 +394,7 @@ impl MonitoredStateStoreIter { while let Some((key, value)) = inner .try_next() .await - .inspect_err(|e| error!("Failed in next: {:?}", e))? + .inspect_err(|e| error!(error = %e.as_report(), "Failed in next"))? { stats.total_items += 1; stats.total_size += key.encoded_len() + value.len(); diff --git a/src/storage/src/store.rs b/src/storage/src/store.rs index 2d1bc73fffb4..27de6b85c4bb 100644 --- a/src/storage/src/store.rs +++ b/src/storage/src/store.rs @@ -14,6 +14,7 @@ use std::collections::HashMap; use std::default::Default; +use std::fmt::{Debug, Formatter}; use std::future::Future; use std::ops::Bound; use std::sync::{Arc, LazyLock}; @@ -409,6 +410,41 @@ pub enum OpConsistencyLevel { ConsistentOldValue(Arc), } +impl Debug for OpConsistencyLevel { + fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { + match self { + OpConsistencyLevel::Inconsistent => f.write_str("OpConsistencyLevel::Inconsistent"), + OpConsistencyLevel::ConsistentOldValue(_) => { + f.write_str("OpConsistencyLevel::ConsistentOldValue") + } + } + } +} + +impl PartialEq for OpConsistencyLevel { + fn eq(&self, other: &Self) -> bool { + matches!( + (self, other), + ( + OpConsistencyLevel::Inconsistent, + OpConsistencyLevel::Inconsistent + ) | ( + OpConsistencyLevel::ConsistentOldValue(_), + OpConsistencyLevel::ConsistentOldValue(_), + ) + ) + } +} + +impl Eq for OpConsistencyLevel {} + +impl OpConsistencyLevel { + pub fn update(&mut self, new_level: &OpConsistencyLevel) { + assert_ne!(self, new_level); + *self = new_level.clone() + } +} + #[derive(Clone, Default)] pub struct NewLocalOptions { pub table_id: TableId, @@ -534,6 +570,7 @@ impl From for InitOptions { #[derive(Clone, Debug)] pub struct SealCurrentEpochOptions { pub table_watermarks: Option<(WatermarkDirection, Vec)>, + pub switch_op_consistency_level: Option, } impl From for TracedSealCurrentEpochOptions { @@ -548,6 +585,9 @@ impl From for TracedSealCurrentEpochOptions { .collect(), ) }), + switch_op_consistency_level: value + .switch_op_consistency_level + .map(|level| matches!(level, OpConsistencyLevel::ConsistentOldValue(_))), } } } @@ -572,24 +612,22 @@ impl From for SealCurrentEpochOptions { .collect(), ) }), + switch_op_consistency_level: value.switch_op_consistency_level.map(|enable| { + if enable { + OpConsistencyLevel::ConsistentOldValue(CHECK_BYTES_EQUAL.clone()) + } else { + OpConsistencyLevel::Inconsistent + } + }), } } } impl SealCurrentEpochOptions { - pub fn new(watermarks: Vec, direction: WatermarkDirection) -> Self { - Self { - table_watermarks: Some((direction, watermarks)), - } - } - - pub fn no_watermark() -> Self { + pub fn for_test() -> Self { Self { table_watermarks: None, + switch_op_consistency_level: None, } } - - pub fn for_test() -> Self { - Self::no_watermark() - } } diff --git a/src/stream/src/common/log_store_impl/kv_log_store/writer.rs b/src/stream/src/common/log_store_impl/kv_log_store/writer.rs index a5f002bd73b2..8d6fafc8bc31 100644 --- a/src/stream/src/common/log_store_impl/kv_log_store/writer.rs +++ b/src/stream/src/common/log_store_impl/kv_log_store/writer.rs @@ -162,7 +162,10 @@ impl LogWriter for KvLogStoreWriter { let watermark = watermark.into_iter().collect_vec(); self.state_store.seal_current_epoch( next_epoch, - SealCurrentEpochOptions::new(watermark, WatermarkDirection::Ascending), + SealCurrentEpochOptions { + table_watermarks: Some((WatermarkDirection::Ascending, watermark)), + switch_op_consistency_level: None, + }, ); self.tx.barrier(epoch, is_checkpoint, next_epoch); self.seq_id = FIRST_SEQ_ID; diff --git a/src/stream/src/common/table/state_table.rs b/src/stream/src/common/table/state_table.rs index 269e1dd0490f..265794b0d9bd 100644 --- a/src/stream/src/common/table/state_table.rs +++ b/src/stream/src/common/table/state_table.rs @@ -60,6 +60,7 @@ use risingwave_storage::store::{ use risingwave_storage::table::merge_sort::merge_sort; use risingwave_storage::table::{KeyedRow, TableDistribution}; use risingwave_storage::StateStore; +use thiserror_ext::AsReport; use tracing::{trace, Instrument}; use super::watermark::{WatermarkBufferByEpoch, WatermarkBufferStrategy}; @@ -149,6 +150,8 @@ pub struct StateTableInner< /// 1. Computing output_value_indices to ser/de replicated rows. /// 2. Computing output pk indices to used them for backfill state. output_indices: Vec, + + is_consistent_op: bool, } /// `StateTable` will use `BasicSerde` as default @@ -207,14 +210,14 @@ fn consistent_old_value_op(row_serde: impl ValueRowSerde) -> OpConsistencyLevel let first = match row_serde.deserialize(first) { Ok(rows) => rows, Err(e) => { - error!(err = %e, value = ?first, "fail to deserialize serialized value"); + error!(error = %e.as_report(), value = ?first, "fail to deserialize serialized value"); return false; } }; let second = match row_serde.deserialize(second) { Ok(rows) => rows, Err(e) => { - error!(err = %e, value = ?second, "fail to deserialize serialized value"); + error!(error = %e.as_report(), value = ?second, "fail to deserialize serialized value"); return false; } }; @@ -425,6 +428,7 @@ where data_types, output_indices, i2o_mapping, + is_consistent_op, } } @@ -604,6 +608,7 @@ where data_types, output_indices: vec![], i2o_mapping: ColIndexMapping::new(vec![], 0), + is_consistent_op, } } @@ -1025,7 +1030,24 @@ where } pub async fn commit(&mut self, new_epoch: EpochPair) -> StreamExecutorResult<()> { + self.commit_with_switch_consistent_op(new_epoch, None).await + } + + pub async fn commit_with_switch_consistent_op( + &mut self, + new_epoch: EpochPair, + switch_consistent_op: Option, + ) -> StreamExecutorResult<()> { assert_eq!(self.epoch(), new_epoch.prev); + let switch_op_consistency_level = switch_consistent_op.map(|enable_consistent_op| { + assert_ne!(self.is_consistent_op, enable_consistent_op); + self.is_consistent_op = enable_consistent_op; + if enable_consistent_op { + consistent_old_value_op(self.row_serde.clone()) + } else { + OpConsistencyLevel::Inconsistent + } + }); trace!( table_id = %self.table_id, epoch = ?self.epoch(), @@ -1036,11 +1058,16 @@ where self.watermark_buffer_strategy.tick(); if !self.is_dirty() { // If the state table is not modified, go fast path. - self.local_store - .seal_current_epoch(new_epoch.curr, SealCurrentEpochOptions::no_watermark()); + self.local_store.seal_current_epoch( + new_epoch.curr, + SealCurrentEpochOptions { + table_watermarks: None, + switch_op_consistency_level, + }, + ); return Ok(()); } else { - self.seal_current_epoch(new_epoch.curr) + self.seal_current_epoch(new_epoch.curr, switch_op_consistency_level) .instrument(tracing::info_span!("state_table_commit")) .await?; } @@ -1105,12 +1132,21 @@ where // Tick the watermark buffer here because state table is expected to be committed once // per epoch. self.watermark_buffer_strategy.tick(); - self.local_store - .seal_current_epoch(new_epoch.curr, SealCurrentEpochOptions::no_watermark()); + self.local_store.seal_current_epoch( + new_epoch.curr, + SealCurrentEpochOptions { + table_watermarks: None, + switch_op_consistency_level: None, + }, + ); } /// Write to state store. - async fn seal_current_epoch(&mut self, next_epoch: u64) -> StreamExecutorResult<()> { + async fn seal_current_epoch( + &mut self, + next_epoch: u64, + switch_op_consistency_level: Option, + ) -> StreamExecutorResult<()> { let watermark = self.state_clean_watermark.take(); watermark.as_ref().inspect(|watermark| { trace!(table_id = %self.table_id, watermark = ?watermark, "state cleaning"); @@ -1197,13 +1233,16 @@ where } self.local_store.flush(vec![]).await?; - let seal_opt = match seal_watermark { - Some((direction, watermark)) => { - SealCurrentEpochOptions::new(vec![watermark], direction) - } - None => SealCurrentEpochOptions::no_watermark(), - }; - self.local_store.seal_current_epoch(next_epoch, seal_opt); + let table_watermarks = + seal_watermark.map(|(direction, watermark)| (direction, vec![watermark])); + + self.local_store.seal_current_epoch( + next_epoch, + SealCurrentEpochOptions { + table_watermarks, + switch_op_consistency_level, + }, + ); Ok(()) } diff --git a/src/stream/src/executor/actor.rs b/src/stream/src/executor/actor.rs index ffc29c2d25da..19ccd4cf893b 100644 --- a/src/stream/src/executor/actor.rs +++ b/src/stream/src/executor/actor.rs @@ -27,6 +27,7 @@ use risingwave_common::util::epoch::EpochPair; use risingwave_expr::expr_context::expr_context_scope; use risingwave_expr::ExprError; use risingwave_pb::plan_common::ExprContext; +use risingwave_pb::stream_plan::PbStreamActor; use thiserror_ext::AsReport; use tokio_stream::StreamExt; use tracing::Instrument; @@ -35,12 +36,13 @@ use super::monitor::StreamingMetrics; use super::subtask::SubtaskHandle; use super::StreamConsumer; use crate::error::StreamResult; -use crate::task::{ActorId, SharedContext}; +use crate::task::{ActorId, LocalBarrierManager}; /// Shared by all operators of an actor. pub struct ActorContext { pub id: ActorId, pub fragment_id: u32, + pub mview_definition: String, // TODO(eric): these seem to be useless now? last_mem_val: Arc, @@ -58,6 +60,7 @@ impl ActorContext { Arc::new(Self { id, fragment_id: 0, + mview_definition: "".to_string(), cur_mem_val: Arc::new(0.into()), last_mem_val: Arc::new(0.into()), total_mem_val: Arc::new(TrAdder::new()), @@ -67,15 +70,15 @@ impl ActorContext { } pub fn create_with_metrics( - id: ActorId, - fragment_id: u32, + stream_actor: &PbStreamActor, total_mem_val: Arc>, streaming_metrics: Arc, unique_user_errors: usize, ) -> ActorContextRef { Arc::new(Self { - id, - fragment_id, + id: stream_actor.actor_id, + fragment_id: stream_actor.fragment_id, + mview_definition: stream_actor.mview_definition.clone(), cur_mem_val: Arc::new(0.into()), last_mem_val: Arc::new(0.into()), total_mem_val, @@ -132,10 +135,10 @@ pub struct Actor { /// The subtasks to execute concurrently. subtasks: Vec, - context: Arc, _metrics: Arc, - actor_context: ActorContextRef, + pub actor_context: ActorContextRef, expr_context: ExprContext, + barrier_manager: LocalBarrierManager, } impl Actor @@ -145,18 +148,18 @@ where pub fn new( consumer: C, subtasks: Vec, - context: Arc, metrics: Arc, actor_context: ActorContextRef, expr_context: ExprContext, + barrier_manager: LocalBarrierManager, ) -> Self { Self { consumer, subtasks, - context, _metrics: metrics, actor_context, expr_context, + barrier_manager, } } @@ -219,7 +222,7 @@ where .into())); // Collect barriers to local barrier manager - self.context.barrier_manager().collect(id, &barrier); + self.barrier_manager.collect(id, &barrier); // Then stop this actor if asked if barrier.is_stop(id) { diff --git a/src/stream/src/executor/dispatch.rs b/src/stream/src/executor/dispatch.rs index fd3d3903fc65..a21012c5f166 100644 --- a/src/stream/src/executor/dispatch.rs +++ b/src/stream/src/executor/dispatch.rs @@ -1209,7 +1209,7 @@ mod tests { // 2. Take downstream receivers. let mut rxs = [untouched, old, new, old_simple, new_simple] .into_iter() - .map(|id| (id, ctx.take_receiver(&(actor_id, id)).unwrap())) + .map(|id| (id, ctx.take_receiver((actor_id, id)).unwrap())) .collect::>(); macro_rules! try_recv { ($down_id:expr) => { diff --git a/src/stream/src/executor/exchange/input.rs b/src/stream/src/executor/exchange/input.rs index 42d2c5da95f1..5eb583a1cadd 100644 --- a/src/stream/src/executor/exchange/input.rs +++ b/src/stream/src/executor/exchange/input.rs @@ -236,7 +236,7 @@ pub(crate) fn new_input( let input = if is_local_address(&context.addr, &upstream_addr) { LocalInput::new( - context.take_receiver(&(upstream_actor_id, actor_id))?, + context.take_receiver((upstream_actor_id, actor_id))?, upstream_actor_id, ) .boxed_input() diff --git a/src/stream/src/executor/integration_tests.rs b/src/stream/src/executor/integration_tests.rs index 958f341ada3b..3ba532976b07 100644 --- a/src/stream/src/executor/integration_tests.rs +++ b/src/stream/src/executor/integration_tests.rs @@ -36,7 +36,7 @@ use crate::executor::test_utils::agg_executor::{ generate_agg_schema, new_boxed_simple_agg_executor, }; use crate::executor::{Executor, MergeExecutor, ProjectExecutor, StatelessSimpleAggExecutor}; -use crate::task::SharedContext; +use crate::task::{LocalBarrierManager, SharedContext}; /// This test creates a merger-dispatcher pair, and run a sum. Each chunk /// has 0~9 elements. We first insert the 10 chunks, then delete them, @@ -76,14 +76,13 @@ async fn test_merger_sum_aggr() { input: aggregator.boxed(), channel: Box::new(LocalOutput::new(233, tx)), }; - let context = SharedContext::for_test().into(); let actor = Actor::new( consumer, vec![], - context, StreamingMetrics::unused().into(), actor_ctx.clone(), expr_context.clone(), + LocalBarrierManager::for_test(), ); (actor, rx) }; @@ -128,14 +127,13 @@ async fn test_merger_sum_aggr() { ctx, metrics, ); - let context = SharedContext::for_test().into(); let actor = Actor::new( dispatcher, vec![], - context, StreamingMetrics::unused().into(), actor_ctx.clone(), expr_context.clone(), + LocalBarrierManager::for_test(), ); handles.push(tokio::spawn(actor.run())); @@ -184,14 +182,13 @@ async fn test_merger_sum_aggr() { input: projection.boxed(), data: items.clone(), }; - let context = SharedContext::for_test().into(); let actor = Actor::new( consumer, vec![], - context, StreamingMetrics::unused().into(), actor_ctx.clone(), expr_context.clone(), + LocalBarrierManager::for_test(), ); handles.push(tokio::spawn(actor.run())); diff --git a/src/stream/src/executor/over_window/general.rs b/src/stream/src/executor/over_window/general.rs index 0ba4808b9362..f9f7883b603b 100644 --- a/src/stream/src/executor/over_window/general.rs +++ b/src/stream/src/executor/over_window/general.rs @@ -40,9 +40,9 @@ use super::over_partition::{ }; use crate::cache::{new_unbounded, ManagedLruCache}; use crate::common::metrics::MetricsInfo; +use crate::common::table::state_table::StateTable; use crate::common::StreamChunkBuilder; use crate::executor::monitor::StreamingMetrics; -use crate::executor::test_utils::prelude::StateTable; use crate::executor::{ expect_first_barrier, ActorContextRef, BoxedExecutor, Executor, ExecutorInfo, Message, StreamExecutorError, StreamExecutorResult, diff --git a/src/stream/src/executor/over_window/over_partition.rs b/src/stream/src/executor/over_window/over_partition.rs index 7a395821f603..00c4d09ab5da 100644 --- a/src/stream/src/executor/over_window/over_partition.rs +++ b/src/stream/src/executor/over_window/over_partition.rs @@ -31,7 +31,7 @@ use risingwave_storage::store::PrefetchOptions; use risingwave_storage::StateStore; use super::general::RowConverter; -use crate::executor::test_utils::prelude::StateTable; +use crate::common::table::state_table::StateTable; use crate::executor::StreamExecutorResult; pub(super) type CacheKey = Sentinelled; diff --git a/src/stream/src/from_proto/append_only_dedup.rs b/src/stream/src/from_proto/append_only_dedup.rs index cdbc86683dc4..4ae2ee24ce79 100644 --- a/src/stream/src/from_proto/append_only_dedup.rs +++ b/src/stream/src/from_proto/append_only_dedup.rs @@ -21,7 +21,7 @@ use super::ExecutorBuilder; use crate::common::table::state_table::StateTable; use crate::error::StreamResult; use crate::executor::{AppendOnlyDedupExecutor, BoxedExecutor}; -use crate::task::{ExecutorParams, LocalStreamManagerCore}; +use crate::task::ExecutorParams; pub struct AppendOnlyDedupExecutorBuilder; @@ -32,7 +32,6 @@ impl ExecutorBuilder for AppendOnlyDedupExecutorBuilder { params: ExecutorParams, node: &Self::Node, store: impl StateStore, - stream: &mut LocalStreamManagerCore, ) -> StreamResult { let [input]: [_; 1] = params.input.try_into().unwrap(); let table = node.get_state_table()?; @@ -43,8 +42,8 @@ impl ExecutorBuilder for AppendOnlyDedupExecutorBuilder { state_table, params.info, params.actor_context, - stream.get_watermark_epoch(), - stream.streaming_metrics.clone(), + params.watermark_epoch, + params.executor_stats.clone(), ))) } } diff --git a/src/stream/src/from_proto/barrier_recv.rs b/src/stream/src/from_proto/barrier_recv.rs index 8d834642147f..032afde2ee1d 100644 --- a/src/stream/src/from_proto/barrier_recv.rs +++ b/src/stream/src/from_proto/barrier_recv.rs @@ -27,7 +27,6 @@ impl ExecutorBuilder for BarrierRecvExecutorBuilder { params: ExecutorParams, _node: &Self::Node, _store: impl StateStore, - stream: &mut LocalStreamManagerCore, ) -> StreamResult { assert!( params.input.is_empty(), @@ -35,9 +34,8 @@ impl ExecutorBuilder for BarrierRecvExecutorBuilder { ); let (sender, barrier_receiver) = unbounded_channel(); - stream - .context - .barrier_manager() + params + .local_barrier_manager .register_sender(params.actor_context.id, sender); Ok(BarrierRecvExecutor::new(params.actor_context, params.info, barrier_receiver).boxed()) diff --git a/src/stream/src/from_proto/batch_query.rs b/src/stream/src/from_proto/batch_query.rs index 30112807266c..4fc492f7c984 100644 --- a/src/stream/src/from_proto/batch_query.rs +++ b/src/stream/src/from_proto/batch_query.rs @@ -30,7 +30,6 @@ impl ExecutorBuilder for BatchQueryExecutorBuilder { params: ExecutorParams, node: &Self::Node, state_store: impl StateStore, - stream: &mut LocalStreamManagerCore, ) -> StreamResult { if node.table_desc.is_none() { // used in sharing cdc source backfill as a dummy batch plan node @@ -57,7 +56,7 @@ impl ExecutorBuilder for BatchQueryExecutorBuilder { assert_eq!(table.schema().data_types(), params.info.schema.data_types()); let executor = - BatchQueryExecutor::new(table, stream.config.developer.chunk_size, params.info); + BatchQueryExecutor::new(table, params.env.config().developer.chunk_size, params.info); Ok(executor.boxed()) } diff --git a/src/stream/src/from_proto/cdc_filter.rs b/src/stream/src/from_proto/cdc_filter.rs index 4add7360b7db..5747e1894d4a 100644 --- a/src/stream/src/from_proto/cdc_filter.rs +++ b/src/stream/src/from_proto/cdc_filter.rs @@ -28,7 +28,6 @@ impl ExecutorBuilder for CdcFilterExecutorBuilder { params: ExecutorParams, node: &Self::Node, _store: impl StateStore, - _stream: &mut LocalStreamManagerCore, ) -> StreamResult { let [input]: [_; 1] = params.input.try_into().unwrap(); let search_condition = diff --git a/src/stream/src/from_proto/dml.rs b/src/stream/src/from_proto/dml.rs index e039b59016d4..d7f21fda7bf2 100644 --- a/src/stream/src/from_proto/dml.rs +++ b/src/stream/src/from_proto/dml.rs @@ -21,7 +21,7 @@ use super::ExecutorBuilder; use crate::error::StreamResult; use crate::executor::dml::DmlExecutor; use crate::executor::BoxedExecutor; -use crate::task::{ExecutorParams, LocalStreamManagerCore}; +use crate::task::ExecutorParams; pub struct DmlExecutorBuilder; @@ -32,7 +32,6 @@ impl ExecutorBuilder for DmlExecutorBuilder { params: ExecutorParams, node: &Self::Node, _store: impl StateStore, - _stream_manager: &mut LocalStreamManagerCore, ) -> StreamResult { let [upstream]: [_; 1] = params.input.try_into().unwrap(); let table_id = TableId::new(node.table_id); diff --git a/src/stream/src/from_proto/dynamic_filter.rs b/src/stream/src/from_proto/dynamic_filter.rs index a4bdc63f3736..babb2680a957 100644 --- a/src/stream/src/from_proto/dynamic_filter.rs +++ b/src/stream/src/from_proto/dynamic_filter.rs @@ -32,7 +32,6 @@ impl ExecutorBuilder for DynamicFilterExecutorBuilder { params: ExecutorParams, node: &Self::Node, store: impl StateStore, - _stream: &mut LocalStreamManagerCore, ) -> StreamResult { let [source_l, source_r]: [_; 2] = params.input.try_into().unwrap(); let key_l = node.get_left_key() as usize; diff --git a/src/stream/src/from_proto/eowc_over_window.rs b/src/stream/src/from_proto/eowc_over_window.rs index 7ba9aceaa93f..b2136f51c397 100644 --- a/src/stream/src/from_proto/eowc_over_window.rs +++ b/src/stream/src/from_proto/eowc_over_window.rs @@ -24,7 +24,7 @@ use crate::error::StreamResult; use crate::executor::{ BoxedExecutor, EowcOverWindowExecutor, EowcOverWindowExecutorArgs, Executor, }; -use crate::task::{ExecutorParams, LocalStreamManagerCore}; +use crate::task::ExecutorParams; pub struct EowcOverWindowExecutorBuilder; @@ -35,7 +35,6 @@ impl ExecutorBuilder for EowcOverWindowExecutorBuilder { params: ExecutorParams, node: &Self::Node, store: impl StateStore, - stream: &mut LocalStreamManagerCore, ) -> StreamResult { let [input]: [_; 1] = params.input.try_into().unwrap(); let calls: Vec<_> = node @@ -67,7 +66,7 @@ impl ExecutorBuilder for EowcOverWindowExecutorBuilder { partition_key_indices, order_key_index, state_table, - watermark_epoch: stream.get_watermark_epoch(), + watermark_epoch: params.watermark_epoch, }) .boxed()) } diff --git a/src/stream/src/from_proto/expand.rs b/src/stream/src/from_proto/expand.rs index d2eb7c581afc..40bb972cdbd3 100644 --- a/src/stream/src/from_proto/expand.rs +++ b/src/stream/src/from_proto/expand.rs @@ -26,7 +26,6 @@ impl ExecutorBuilder for ExpandExecutorBuilder { params: ExecutorParams, node: &Self::Node, _store: impl StateStore, - _stream: &mut LocalStreamManagerCore, ) -> StreamResult { let [input]: [_; 1] = params.input.try_into().unwrap(); let column_subsets = node diff --git a/src/stream/src/from_proto/filter.rs b/src/stream/src/from_proto/filter.rs index d61cb177f946..a82b9cc65dea 100644 --- a/src/stream/src/from_proto/filter.rs +++ b/src/stream/src/from_proto/filter.rs @@ -27,7 +27,6 @@ impl ExecutorBuilder for FilterExecutorBuilder { params: ExecutorParams, node: &Self::Node, _store: impl StateStore, - _stream: &mut LocalStreamManagerCore, ) -> StreamResult { let [input]: [_; 1] = params.input.try_into().unwrap(); let search_condition = diff --git a/src/stream/src/from_proto/group_top_n.rs b/src/stream/src/from_proto/group_top_n.rs index 4c73d5eeeac2..a90e13d3f496 100644 --- a/src/stream/src/from_proto/group_top_n.rs +++ b/src/stream/src/from_proto/group_top_n.rs @@ -33,7 +33,6 @@ impl ExecutorBuilder for GroupTopNExecutorBuilder StreamResult { let group_by: Vec = node .get_group_key() @@ -68,7 +67,7 @@ impl ExecutorBuilder for GroupTopNExecutorBuilder StreamResult { let group_key_indices = node .get_group_key() @@ -102,14 +101,14 @@ impl ExecutorBuilder for HashAggExecutorBuilder { actor_ctx: params.actor_context, info: params.info, - extreme_cache_size: stream.config.developer.unsafe_extreme_cache_size, + extreme_cache_size: params.env.config().developer.unsafe_extreme_cache_size, agg_calls, row_count_index: node.get_row_count_index() as usize, storages, intermediate_state_table, distinct_dedup_tables, - watermark_epoch: stream.get_watermark_epoch(), + watermark_epoch: params.watermark_epoch, extra: HashAggExecutorExtraArgs { group_key_indices, chunk_size: params.env.config().developer.chunk_size, diff --git a/src/stream/src/from_proto/hash_join.rs b/src/stream/src/from_proto/hash_join.rs index 008ee0d2d0b1..d04db948853b 100644 --- a/src/stream/src/from_proto/hash_join.rs +++ b/src/stream/src/from_proto/hash_join.rs @@ -39,7 +39,6 @@ impl ExecutorBuilder for HashJoinExecutorBuilder { params: ExecutorParams, node: &Self::Node, store: impl StateStore, - stream: &mut LocalStreamManagerCore, ) -> StreamResult { let is_append_only = node.is_append_only; let vnodes = Arc::new(params.vnode_bitmap.expect("vnodes not set for hash join")); @@ -150,7 +149,7 @@ impl ExecutorBuilder for HashJoinExecutorBuilder { degree_state_table_l, state_table_r, degree_state_table_r, - lru_manager: stream.get_watermark_epoch(), + lru_manager: params.watermark_epoch, is_append_only, metrics: params.executor_stats, join_type_proto: node.get_join_type()?, diff --git a/src/stream/src/from_proto/hop_window.rs b/src/stream/src/from_proto/hop_window.rs index b4dbe1fd010e..a56cbc2d8c97 100644 --- a/src/stream/src/from_proto/hop_window.rs +++ b/src/stream/src/from_proto/hop_window.rs @@ -27,7 +27,6 @@ impl ExecutorBuilder for HopWindowExecutorBuilder { params: ExecutorParams, node: &Self::Node, _store: impl StateStore, - _stream: &mut LocalStreamManagerCore, ) -> StreamResult { let ExecutorParams { actor_context, diff --git a/src/stream/src/from_proto/lookup.rs b/src/stream/src/from_proto/lookup.rs index fab103294ad4..1f873f31127c 100644 --- a/src/stream/src/from_proto/lookup.rs +++ b/src/stream/src/from_proto/lookup.rs @@ -30,7 +30,6 @@ impl ExecutorBuilder for LookupExecutorBuilder { params: ExecutorParams, node: &Self::Node, store: impl StateStore, - stream_manager: &mut LocalStreamManagerCore, ) -> StreamResult { let lookup = node; @@ -82,7 +81,7 @@ impl ExecutorBuilder for LookupExecutorBuilder { arrange_join_key_indices: lookup.arrange_key.iter().map(|x| *x as usize).collect(), column_mapping: lookup.column_mapping.iter().map(|x| *x as usize).collect(), storage_table, - watermark_epoch: stream_manager.get_watermark_epoch(), + watermark_epoch: params.watermark_epoch, chunk_size: params.env.config().developer.chunk_size, }))) } diff --git a/src/stream/src/from_proto/lookup_union.rs b/src/stream/src/from_proto/lookup_union.rs index 9ff765a96d3e..e9cc0ed33311 100644 --- a/src/stream/src/from_proto/lookup_union.rs +++ b/src/stream/src/from_proto/lookup_union.rs @@ -26,7 +26,6 @@ impl ExecutorBuilder for LookupUnionExecutorBuilder { params: ExecutorParams, node: &Self::Node, _store: impl StateStore, - _stream: &mut LocalStreamManagerCore, ) -> StreamResult { Ok(LookupUnionExecutor::new(params.info, params.input, node.order.clone()).boxed()) } diff --git a/src/stream/src/from_proto/merge.rs b/src/stream/src/from_proto/merge.rs index 6259aba71acc..aa0a6a9d2bb9 100644 --- a/src/stream/src/from_proto/merge.rs +++ b/src/stream/src/from_proto/merge.rs @@ -27,7 +27,6 @@ impl ExecutorBuilder for MergeExecutorBuilder { params: ExecutorParams, node: &Self::Node, _store: impl StateStore, - stream: &mut LocalStreamManagerCore, ) -> StreamResult { let upstreams = node.get_upstream_actor_id(); let upstream_fragment_id = node.get_upstream_fragment_id(); @@ -36,8 +35,8 @@ impl ExecutorBuilder for MergeExecutorBuilder { .iter() .map(|&upstream_actor_id| { new_input( - &stream.context, - stream.streaming_metrics.clone(), + ¶ms.shared_context, + params.executor_stats.clone(), params.actor_context.id, params.fragment_id, upstream_actor_id, @@ -64,9 +63,9 @@ impl ExecutorBuilder for MergeExecutorBuilder { params.fragment_id, upstream_fragment_id, inputs.into_iter().exactly_one().unwrap(), - stream.context.clone(), + params.shared_context.clone(), params.operator_id, - stream.streaming_metrics.clone(), + params.executor_stats.clone(), ) .boxed()) } else { @@ -76,9 +75,9 @@ impl ExecutorBuilder for MergeExecutorBuilder { params.fragment_id, upstream_fragment_id, inputs, - stream.context.clone(), + params.shared_context.clone(), params.operator_id, - stream.streaming_metrics.clone(), + params.executor_stats.clone(), ) .boxed()) } diff --git a/src/stream/src/from_proto/mod.rs b/src/stream/src/from_proto/mod.rs index 9a9e83c0a328..747e619f149a 100644 --- a/src/stream/src/from_proto/mod.rs +++ b/src/stream/src/from_proto/mod.rs @@ -94,7 +94,7 @@ use self::watermark_filter::WatermarkFilterBuilder; use crate::error::StreamResult; use crate::executor::{BoxedExecutor, Executor, ExecutorInfo}; use crate::from_proto::values::ValuesExecutorBuilder; -use crate::task::{ExecutorParams, LocalStreamManagerCore}; +use crate::task::ExecutorParams; trait ExecutorBuilder { type Node; @@ -104,16 +104,15 @@ trait ExecutorBuilder { params: ExecutorParams, node: &Self::Node, store: impl StateStore, - stream: &mut LocalStreamManagerCore, ) -> impl std::future::Future> + Send; } macro_rules! build_executor { - ($source:expr, $node:expr, $store:expr, $stream:expr, $($proto_type_name:path => $data_type:ty),* $(,)?) => { + ($source:expr, $node:expr, $store:expr, $($proto_type_name:path => $data_type:ty),* $(,)?) => { match $node.get_node_body().unwrap() { $( $proto_type_name(node) => { - <$data_type>::new_boxed_executor($source, node, $store, $stream).await + <$data_type>::new_boxed_executor($source, node, $store).await }, )* NodeBody::Exchange(_) | NodeBody::DeltaIndexJoin(_) => unreachable!() @@ -124,7 +123,6 @@ macro_rules! build_executor { /// Create an executor from protobuf [`StreamNode`]. pub async fn create_executor( params: ExecutorParams, - stream: &mut LocalStreamManagerCore, node: &StreamNode, store: impl StateStore, ) -> StreamResult { @@ -132,7 +130,6 @@ pub async fn create_executor( params, node, store, - stream, NodeBody::Source => SourceExecutorBuilder, NodeBody::Sink => SinkExecutorBuilder, NodeBody::Project => ProjectExecutorBuilder, diff --git a/src/stream/src/from_proto/mview.rs b/src/stream/src/from_proto/mview.rs index 153108d00a91..ecc54b20d853 100644 --- a/src/stream/src/from_proto/mview.rs +++ b/src/stream/src/from_proto/mview.rs @@ -32,7 +32,6 @@ impl ExecutorBuilder for MaterializeExecutorBuilder { params: ExecutorParams, node: &Self::Node, store: impl StateStore, - stream: &mut LocalStreamManagerCore, ) -> StreamResult { let [input]: [_; 1] = params.input.try_into().unwrap(); @@ -58,9 +57,9 @@ impl ExecutorBuilder for MaterializeExecutorBuilder { params.actor_context, params.vnode_bitmap.map(Arc::new), table, - stream.get_watermark_epoch(), + params.watermark_epoch, conflict_behavior, - stream.streaming_metrics.clone(), + params.executor_stats.clone(), ) .await .boxed() @@ -86,7 +85,6 @@ impl ExecutorBuilder for ArrangeExecutorBuilder { params: ExecutorParams, node: &Self::Node, store: impl StateStore, - stream: &mut LocalStreamManagerCore, ) -> StreamResult { let [input]: [_; 1] = params.input.try_into().unwrap(); @@ -112,9 +110,9 @@ impl ExecutorBuilder for ArrangeExecutorBuilder { params.actor_context, vnodes, table, - stream.get_watermark_epoch(), + params.watermark_epoch, conflict_behavior, - stream.streaming_metrics.clone(), + params.executor_stats.clone(), ) .await; diff --git a/src/stream/src/from_proto/no_op.rs b/src/stream/src/from_proto/no_op.rs index 73dacf10043c..606b584d9109 100644 --- a/src/stream/src/from_proto/no_op.rs +++ b/src/stream/src/from_proto/no_op.rs @@ -18,7 +18,7 @@ use risingwave_storage::StateStore; use super::ExecutorBuilder; use crate::error::StreamResult; use crate::executor::{BoxedExecutor, Executor, NoOpExecutor}; -use crate::task::{ExecutorParams, LocalStreamManagerCore}; +use crate::task::ExecutorParams; pub struct NoOpExecutorBuilder; @@ -29,7 +29,6 @@ impl ExecutorBuilder for NoOpExecutorBuilder { params: ExecutorParams, _node: &NoOpNode, _store: impl StateStore, - _stream: &mut LocalStreamManagerCore, ) -> StreamResult { let [input]: [_; 1] = params.input.try_into().unwrap(); Ok(NoOpExecutor::new(params.actor_context, params.info, input).boxed()) diff --git a/src/stream/src/from_proto/now.rs b/src/stream/src/from_proto/now.rs index 601c1ec3ad58..f622cdef1343 100644 --- a/src/stream/src/from_proto/now.rs +++ b/src/stream/src/from_proto/now.rs @@ -20,7 +20,7 @@ use super::ExecutorBuilder; use crate::common::table::state_table::StateTable; use crate::error::StreamResult; use crate::executor::{BoxedExecutor, NowExecutor}; -use crate::task::{ExecutorParams, LocalStreamManagerCore}; +use crate::task::ExecutorParams; pub struct NowExecutorBuilder; @@ -31,12 +31,10 @@ impl ExecutorBuilder for NowExecutorBuilder { params: ExecutorParams, node: &NowNode, store: impl StateStore, - stream: &mut LocalStreamManagerCore, ) -> StreamResult { let (sender, barrier_receiver) = unbounded_channel(); - stream - .context - .barrier_manager() + params + .local_barrier_manager .register_sender(params.actor_context.id, sender); let state_table = diff --git a/src/stream/src/from_proto/over_window.rs b/src/stream/src/from_proto/over_window.rs index c01e01824fe1..d2fc2893b613 100644 --- a/src/stream/src/from_proto/over_window.rs +++ b/src/stream/src/from_proto/over_window.rs @@ -24,7 +24,7 @@ use super::ExecutorBuilder; use crate::common::table::state_table::StateTable; use crate::error::StreamResult; use crate::executor::{BoxedExecutor, Executor, OverWindowExecutor, OverWindowExecutorArgs}; -use crate::task::{ExecutorParams, LocalStreamManagerCore}; +use crate::task::ExecutorParams; pub struct OverWindowExecutorBuilder; @@ -35,7 +35,6 @@ impl ExecutorBuilder for OverWindowExecutorBuilder { params: ExecutorParams, node: &Self::Node, store: impl StateStore, - stream: &mut LocalStreamManagerCore, ) -> StreamResult { let [input]: [_; 1] = params.input.try_into().unwrap(); let calls: Vec<_> = node @@ -73,7 +72,7 @@ impl ExecutorBuilder for OverWindowExecutorBuilder { order_key_order_types, state_table, - watermark_epoch: stream.get_watermark_epoch(), + watermark_epoch: params.watermark_epoch, metrics: params.executor_stats, chunk_size: params.env.config().developer.chunk_size, diff --git a/src/stream/src/from_proto/project.rs b/src/stream/src/from_proto/project.rs index 9fd1176daffd..6fc9ada39aed 100644 --- a/src/stream/src/from_proto/project.rs +++ b/src/stream/src/from_proto/project.rs @@ -30,7 +30,6 @@ impl ExecutorBuilder for ProjectExecutorBuilder { params: ExecutorParams, node: &Self::Node, _store: impl StateStore, - _stream: &mut LocalStreamManagerCore, ) -> StreamResult { let [input]: [_; 1] = params.input.try_into().unwrap(); let project_exprs: Vec<_> = node diff --git a/src/stream/src/from_proto/project_set.rs b/src/stream/src/from_proto/project_set.rs index b4879e725535..b98e9e467ccf 100644 --- a/src/stream/src/from_proto/project_set.rs +++ b/src/stream/src/from_proto/project_set.rs @@ -29,7 +29,6 @@ impl ExecutorBuilder for ProjectSetExecutorBuilder { params: ExecutorParams, node: &Self::Node, _store: impl StateStore, - _stream: &mut LocalStreamManagerCore, ) -> StreamResult { let [input]: [_; 1] = params.input.try_into().unwrap(); let select_list: Vec<_> = node diff --git a/src/stream/src/from_proto/row_id_gen.rs b/src/stream/src/from_proto/row_id_gen.rs index 4996ac14268f..1333a99aebfa 100644 --- a/src/stream/src/from_proto/row_id_gen.rs +++ b/src/stream/src/from_proto/row_id_gen.rs @@ -19,7 +19,7 @@ use super::ExecutorBuilder; use crate::error::StreamResult; use crate::executor::row_id_gen::RowIdGenExecutor; use crate::executor::BoxedExecutor; -use crate::task::{ExecutorParams, LocalStreamManagerCore}; +use crate::task::ExecutorParams; pub struct RowIdGenExecutorBuilder; @@ -30,7 +30,6 @@ impl ExecutorBuilder for RowIdGenExecutorBuilder { params: ExecutorParams, node: &Self::Node, _store: impl StateStore, - _stream: &mut LocalStreamManagerCore, ) -> StreamResult { let [upstream]: [_; 1] = params.input.try_into().unwrap(); tracing::debug!("row id gen executor: {:?}", params.vnode_bitmap); diff --git a/src/stream/src/from_proto/simple_agg.rs b/src/stream/src/from_proto/simple_agg.rs index 1bca5e18c67d..209a08daf6a2 100644 --- a/src/stream/src/from_proto/simple_agg.rs +++ b/src/stream/src/from_proto/simple_agg.rs @@ -34,7 +34,6 @@ impl ExecutorBuilder for SimpleAggExecutorBuilder { params: ExecutorParams, node: &Self::Node, store: impl StateStore, - stream: &mut LocalStreamManagerCore, ) -> StreamResult { let [input]: [_; 1] = params.input.try_into().unwrap(); let agg_calls: Vec = node @@ -63,14 +62,14 @@ impl ExecutorBuilder for SimpleAggExecutorBuilder { actor_ctx: params.actor_context, info: params.info, - extreme_cache_size: stream.config.developer.unsafe_extreme_cache_size, + extreme_cache_size: params.env.config().developer.unsafe_extreme_cache_size, agg_calls, row_count_index: node.get_row_count_index() as usize, storages, intermediate_state_table, distinct_dedup_tables, - watermark_epoch: stream.get_watermark_epoch(), + watermark_epoch: params.watermark_epoch, extra: SimpleAggExecutorExtraArgs {}, })? .boxed()) diff --git a/src/stream/src/from_proto/sink.rs b/src/stream/src/from_proto/sink.rs index c1951ed4f709..fb05faf90dd8 100644 --- a/src/stream/src/from_proto/sink.rs +++ b/src/stream/src/from_proto/sink.rs @@ -39,7 +39,6 @@ impl ExecutorBuilder for SinkExecutorBuilder { params: ExecutorParams, node: &Self::Node, state_store: impl StateStore, - stream: &mut LocalStreamManagerCore, ) -> StreamResult { let [input_executor]: [_; 1] = params.input.try_into().unwrap(); @@ -106,7 +105,7 @@ impl ExecutorBuilder for SinkExecutorBuilder { let sink_id_str = format!("{}", sink_id.sink_id); - let sink_metrics = stream.streaming_metrics.new_sink_metrics( + let sink_metrics = params.executor_stats.new_sink_metrics( ¶ms.info.identity, sink_id_str.as_str(), connector, diff --git a/src/stream/src/from_proto/sort.rs b/src/stream/src/from_proto/sort.rs index e19a657e227d..dcb59f77f954 100644 --- a/src/stream/src/from_proto/sort.rs +++ b/src/stream/src/from_proto/sort.rs @@ -29,7 +29,6 @@ impl ExecutorBuilder for SortExecutorBuilder { params: ExecutorParams, node: &Self::Node, store: impl StateStore, - _stream: &mut LocalStreamManagerCore, ) -> StreamResult { let [input]: [_; 1] = params.input.try_into().unwrap(); let vnodes = Arc::new(params.vnode_bitmap.expect("vnodes not set for sort")); diff --git a/src/stream/src/from_proto/source/fs_fetch.rs b/src/stream/src/from_proto/source/fs_fetch.rs index 8d8cb78a80b1..98d024fbc0e6 100644 --- a/src/stream/src/from_proto/source/fs_fetch.rs +++ b/src/stream/src/from_proto/source/fs_fetch.rs @@ -29,7 +29,7 @@ use crate::executor::{ StreamSourceCore, }; use crate::from_proto::ExecutorBuilder; -use crate::task::{ExecutorParams, LocalStreamManagerCore}; +use crate::task::ExecutorParams; pub struct FsFetchExecutorBuilder; @@ -40,7 +40,6 @@ impl ExecutorBuilder for FsFetchExecutorBuilder { params: ExecutorParams, node: &Self::Node, store: impl StateStore, - _stream: &mut LocalStreamManagerCore, ) -> StreamResult { let [upstream]: [_; 1] = params.input.try_into().unwrap(); diff --git a/src/stream/src/from_proto/source/trad_source.rs b/src/stream/src/from_proto/source/trad_source.rs index 5b93585aea4b..b8a87ddf86a8 100644 --- a/src/stream/src/from_proto/source/trad_source.rs +++ b/src/stream/src/from_proto/source/trad_source.rs @@ -41,12 +41,10 @@ impl ExecutorBuilder for SourceExecutorBuilder { params: ExecutorParams, node: &Self::Node, store: impl StateStore, - stream: &mut LocalStreamManagerCore, ) -> StreamResult { let (sender, barrier_receiver) = unbounded_channel(); - stream - .context - .barrier_manager() + params + .local_barrier_manager .register_sender(params.actor_context.id, sender); let system_params = params.env.system_params_manager_ref().get_params(); diff --git a/src/stream/src/from_proto/stateless_simple_agg.rs b/src/stream/src/from_proto/stateless_simple_agg.rs index c0b5dcb3a426..87617b42f83e 100644 --- a/src/stream/src/from_proto/stateless_simple_agg.rs +++ b/src/stream/src/from_proto/stateless_simple_agg.rs @@ -27,7 +27,6 @@ impl ExecutorBuilder for StatelessSimpleAggExecutorBuilder { params: ExecutorParams, node: &Self::Node, _store: impl StateStore, - _stream: &mut LocalStreamManagerCore, ) -> StreamResult { let [input]: [_; 1] = params.input.try_into().unwrap(); let agg_calls: Vec = node diff --git a/src/stream/src/from_proto/stream_cdc_scan.rs b/src/stream/src/from_proto/stream_cdc_scan.rs index 27ca849986fc..e6d8525eacde 100644 --- a/src/stream/src/from_proto/stream_cdc_scan.rs +++ b/src/stream/src/from_proto/stream_cdc_scan.rs @@ -23,7 +23,7 @@ use risingwave_pb::stream_plan::StreamCdcScanNode; use super::*; use crate::common::table::state_table::StateTable; -use crate::executor::{CdcBackfillExecutor, ExternalStorageTable}; +use crate::executor::{CdcBackfillExecutor, ExternalStorageTable, FlowControlExecutor}; pub struct StreamCdcScanExecutorBuilder; @@ -34,7 +34,6 @@ impl ExecutorBuilder for StreamCdcScanExecutorBuilder { params: ExecutorParams, node: &Self::Node, state_store: impl StateStore, - _stream: &mut LocalStreamManagerCore, ) -> StreamResult { let [upstream]: [_; 1] = params.input.try_into().unwrap(); @@ -88,8 +87,14 @@ impl ExecutorBuilder for StreamCdcScanExecutorBuilder { let state_table = StateTable::from_table_catalog(node.get_state_table()?, state_store, vnodes).await; - // TODO(kwannoel): Should we apply flow control here as well? - Ok(CdcBackfillExecutor::new( + // adjust backfill chunk size if rate limit is set. + let chunk_size = params.env.config().developer.chunk_size; + let backfill_chunk_size = node + .rate_limit + .map(|x| std::cmp::min(x as usize, chunk_size)) + .unwrap_or(chunk_size); + + let executor = CdcBackfillExecutor::new( params.actor_context.clone(), params.info, external_table, @@ -98,7 +103,14 @@ impl ExecutorBuilder for StreamCdcScanExecutorBuilder { None, params.executor_stats, state_table, - params.env.config().developer.chunk_size, + backfill_chunk_size, + ) + .boxed(); + + Ok(FlowControlExecutor::new( + executor, + params.actor_context, + node.rate_limit.map(|x| x as _), ) .boxed()) } diff --git a/src/stream/src/from_proto/stream_scan.rs b/src/stream/src/from_proto/stream_scan.rs index a8868725d192..c3f15802afc8 100644 --- a/src/stream/src/from_proto/stream_scan.rs +++ b/src/stream/src/from_proto/stream_scan.rs @@ -37,12 +37,11 @@ impl ExecutorBuilder for StreamScanExecutorBuilder { params: ExecutorParams, node: &Self::Node, state_store: impl StateStore, - stream: &mut LocalStreamManagerCore, ) -> StreamResult { let [upstream, snapshot]: [_; 2] = params.input.try_into().unwrap(); // For reporting the progress. - let progress = stream - .context + let progress = params + .local_barrier_manager .register_create_mview_progress(params.actor_context.id); let output_indices = node @@ -90,7 +89,7 @@ impl ExecutorBuilder for StreamScanExecutorBuilder { state_table, output_indices, progress, - stream.streaming_metrics.clone(), + params.executor_stats.clone(), params.env.config().developer.chunk_size, node.rate_limit.map(|x| x as _), ) @@ -133,7 +132,7 @@ impl ExecutorBuilder for StreamScanExecutorBuilder { state_table, output_indices, progress, - stream.streaming_metrics.clone(), + params.executor_stats.clone(), params.env.config().developer.chunk_size, node.rate_limit.map(|x| x as _), ) diff --git a/src/stream/src/from_proto/temporal_join.rs b/src/stream/src/from_proto/temporal_join.rs index f57fbbdac1f1..7c91b8c1d261 100644 --- a/src/stream/src/from_proto/temporal_join.rs +++ b/src/stream/src/from_proto/temporal_join.rs @@ -35,7 +35,6 @@ impl ExecutorBuilder for TemporalJoinExecutorBuilder { params: ExecutorParams, node: &Self::Node, store: impl StateStore, - stream: &mut LocalStreamManagerCore, ) -> StreamResult { let table_desc: &StorageTableDesc = node.get_table_desc()?; let table = { @@ -113,7 +112,7 @@ impl ExecutorBuilder for TemporalJoinExecutorBuilder { output_indices, table_output_indices, table_stream_key_indices, - watermark_epoch: stream.get_watermark_epoch(), + watermark_epoch: params.watermark_epoch, chunk_size: params.env.config().developer.chunk_size, metrics: params.executor_stats, join_type_proto: node.get_join_type()?, diff --git a/src/stream/src/from_proto/top_n.rs b/src/stream/src/from_proto/top_n.rs index a4bcf51277cf..5c74f9f97e0e 100644 --- a/src/stream/src/from_proto/top_n.rs +++ b/src/stream/src/from_proto/top_n.rs @@ -30,7 +30,6 @@ impl ExecutorBuilder for TopNExecutorBuilder StreamResult { let [input]: [_; 1] = params.input.try_into().unwrap(); diff --git a/src/stream/src/from_proto/union.rs b/src/stream/src/from_proto/union.rs index 485c0bd0a35e..e11ea8f3f2d2 100644 --- a/src/stream/src/from_proto/union.rs +++ b/src/stream/src/from_proto/union.rs @@ -26,7 +26,6 @@ impl ExecutorBuilder for UnionExecutorBuilder { params: ExecutorParams, _node: &Self::Node, _store: impl StateStore, - _stream: &mut LocalStreamManagerCore, ) -> StreamResult { Ok(UnionExecutor::new(params.info, params.input).boxed()) } diff --git a/src/stream/src/from_proto/values.rs b/src/stream/src/from_proto/values.rs index a45393e200a5..b3ed75e4f090 100644 --- a/src/stream/src/from_proto/values.rs +++ b/src/stream/src/from_proto/values.rs @@ -21,7 +21,7 @@ use tokio::sync::mpsc::unbounded_channel; use super::ExecutorBuilder; use crate::error::StreamResult; use crate::executor::{BoxedExecutor, ValuesExecutor}; -use crate::task::{ExecutorParams, LocalStreamManagerCore}; +use crate::task::ExecutorParams; /// Build a `ValuesExecutor` for stream. As is a leaf, current workaround registers a `sender` for /// this executor. May refractor with `BarrierRecvExecutor` in the near future. @@ -34,15 +34,13 @@ impl ExecutorBuilder for ValuesExecutorBuilder { params: ExecutorParams, node: &ValuesNode, _store: impl StateStore, - stream: &mut LocalStreamManagerCore, ) -> StreamResult { let (sender, barrier_receiver) = unbounded_channel(); - stream - .context - .barrier_manager() + params + .local_barrier_manager .register_sender(params.actor_context.id, sender); - let progress = stream - .context + let progress = params + .local_barrier_manager .register_create_mview_progress(params.actor_context.id); let rows = node .get_tuples() diff --git a/src/stream/src/from_proto/watermark_filter.rs b/src/stream/src/from_proto/watermark_filter.rs index 44618e812fd8..cc3382bcc65f 100644 --- a/src/stream/src/from_proto/watermark_filter.rs +++ b/src/stream/src/from_proto/watermark_filter.rs @@ -34,7 +34,6 @@ impl ExecutorBuilder for WatermarkFilterBuilder { params: ExecutorParams, node: &Self::Node, store: impl StateStore, - _stream: &mut LocalStreamManagerCore, ) -> StreamResult { let [input]: [_; 1] = params.input.try_into().unwrap(); let watermark_descs = node.get_watermark_descs().clone(); diff --git a/src/stream/src/task/barrier_manager/progress.rs b/src/stream/src/task/barrier_manager/progress.rs index 2b83050a57d9..2a84a0d01d51 100644 --- a/src/stream/src/task/barrier_manager/progress.rs +++ b/src/stream/src/task/barrier_manager/progress.rs @@ -15,7 +15,7 @@ use super::LocalBarrierManager; use crate::task::barrier_manager::LocalBarrierEvent::ReportCreateProgress; use crate::task::barrier_manager::LocalBarrierWorker; -use crate::task::{ActorId, SharedContext}; +use crate::task::ActorId; type ConsumedEpoch = u64; type ConsumedRows = u64; @@ -161,7 +161,7 @@ impl CreateMviewProgress { } } -impl SharedContext { +impl LocalBarrierManager { /// Create a struct for reporting the progress of creating mview. The backfill executors should /// report the progress of barrier rearranging continuously using this. The updated progress /// will be collected by the local barrier manager and reported to the meta service in this @@ -174,6 +174,6 @@ impl SharedContext { backfill_actor_id: ActorId, ) -> CreateMviewProgress { trace!("register create mview progress: {}", backfill_actor_id); - CreateMviewProgress::new(self.barrier_manager.clone(), backfill_actor_id) + CreateMviewProgress::new(self.clone(), backfill_actor_id) } } diff --git a/src/stream/src/task/mod.rs b/src/stream/src/task/mod.rs index b1f1da526bcc..61ce575e5b18 100644 --- a/src/stream/src/task/mod.rs +++ b/src/stream/src/task/mod.rs @@ -13,7 +13,6 @@ // limitations under the License. use std::collections::HashMap; -use std::sync::Arc; use anyhow::anyhow; use parking_lot::{MappedMutexGuard, Mutex, MutexGuard, RwLock}; @@ -31,11 +30,8 @@ mod stream_manager; pub use barrier_manager::*; pub use env::*; -use risingwave_storage::StateStoreImpl; pub use stream_manager::*; -use crate::executor::monitor::StreamingMetrics; - pub type ConsumableChannelPair = (Option, Option); pub type ActorId = u32; pub type FragmentId = u32; @@ -79,8 +75,6 @@ pub struct SharedContext { // disconnected. pub(crate) compute_client_pool: ComputeClientPool, - pub(crate) barrier_manager: LocalBarrierManager, - pub(crate) config: StreamingConfig, } @@ -93,18 +87,12 @@ impl std::fmt::Debug for SharedContext { } impl SharedContext { - pub fn new( - addr: HostAddr, - state_store: StateStoreImpl, - config: &StreamingConfig, - streaming_metrics: Arc, - ) -> Self { + pub fn new(addr: HostAddr, config: &StreamingConfig) -> Self { Self { channel_map: Default::default(), actor_infos: Default::default(), addr, compute_client_pool: ComputeClientPool::default(), - barrier_manager: LocalBarrierManager::new(state_store, streaming_metrics), config: config.clone(), } } @@ -118,7 +106,6 @@ impl SharedContext { actor_infos: Default::default(), addr: LOCAL_TEST_ADDR.clone(), compute_client_pool: ComputeClientPool::default(), - barrier_manager: LocalBarrierManager::for_test(), config: StreamingConfig { developer: StreamingDeveloperConfig { exchange_initial_permits: permit::for_test::INITIAL_PERMITS, @@ -131,10 +118,6 @@ impl SharedContext { } } - pub fn barrier_manager(&self) -> &LocalBarrierManager { - &self.barrier_manager - } - /// Get the channel pair for the given actor ids. If the channel pair does not exist, create one /// with the configured permits. fn get_or_insert_channels( @@ -160,22 +143,13 @@ impl SharedContext { .ok_or_else(|| anyhow!("sender for {ids:?} has already been taken").into()) } - pub fn take_receiver(&self, ids: &UpDownActorIds) -> StreamResult { - self.get_or_insert_channels(*ids) + pub fn take_receiver(&self, ids: UpDownActorIds) -> StreamResult { + self.get_or_insert_channels(ids) .1 .take() .ok_or_else(|| anyhow!("receiver for {ids:?} has already been taken").into()) } - pub fn retain_channel(&self, mut f: F) - where - F: FnMut(&(u32, u32)) -> bool, - { - self.channel_map - .lock() - .retain(|up_down_ids, _| f(up_down_ids)); - } - pub fn clear_channels(&self) { self.channel_map.lock().clear() } @@ -187,6 +161,20 @@ impl SharedContext { .cloned() .ok_or_else(|| anyhow!("actor {} not found in info table", actor_id).into()) } + + pub fn config(&self) -> &StreamingConfig { + &self.config + } + + pub fn drop_actors(&self, actors: &[ActorId]) { + self.channel_map + .lock() + .retain(|(up_id, _), _| !actors.contains(up_id)); + let mut actor_infos = self.actor_infos.write(); + for actor_id in actors { + actor_infos.remove(actor_id); + } + } } /// Generate a globally unique executor id. diff --git a/src/stream/src/task/stream_manager.rs b/src/stream/src/task/stream_manager.rs index 2cf37360f52b..46f6d4fe5a43 100644 --- a/src/stream/src/task/stream_manager.rs +++ b/src/stream/src/task/stream_manager.rs @@ -24,6 +24,7 @@ use async_recursion::async_recursion; use futures::FutureExt; use hytra::TrAdder; use itertools::Itertools; +use parking_lot::Mutex; use risingwave_common::bail; use risingwave_common::buffer::Bitmap; use risingwave_common::catalog::{Field, Schema}; @@ -34,21 +35,19 @@ use risingwave_pb::common::ActorInfo; use risingwave_pb::stream_plan; use risingwave_pb::stream_plan::barrier::BarrierKind; use risingwave_pb::stream_plan::stream_node::NodeBody; -use risingwave_pb::stream_plan::StreamNode; +use risingwave_pb::stream_plan::{StreamActor, StreamNode}; use risingwave_storage::monitor::HummockTraceFutureExt; use risingwave_storage::{dispatch_state_store, StateStore, StateStoreImpl}; use thiserror_ext::AsReport; -use tokio::sync::Mutex; use tokio::task::JoinHandle; use super::{unique_executor_id, unique_operator_id, BarrierCompleteResult}; use crate::error::StreamResult; -use crate::executor::exchange::permit::Receiver; use crate::executor::monitor::StreamingMetrics; use crate::executor::subtask::SubtaskHandle; use crate::executor::*; use crate::from_proto::create_executor; -use crate::task::{ActorId, FragmentId, SharedContext, StreamEnvironment, UpDownActorIds}; +use crate::task::{ActorId, FragmentId, LocalBarrierManager, SharedContext, StreamEnvironment}; #[cfg(test)] pub static LOCAL_TEST_ADDR: std::sync::LazyLock = @@ -67,41 +66,30 @@ pub struct LocalStreamManagerCore { /// termination. handles: HashMap, - pub(crate) context: Arc, - /// Stores all actor information, taken after actor built. actors: HashMap, /// Stores all actor tokio runtime monitoring tasks. actor_monitor_tasks: HashMap, - - /// The state store implement - state_store: StateStoreImpl, - - /// Metrics of the stream manager - pub(crate) streaming_metrics: Arc, - - /// Config of streaming engine - pub(crate) config: StreamingConfig, - - /// Manages the await-trees of all actors. - await_tree_reg: Option>, - - /// Watermark epoch number. - watermark_epoch: AtomicU64Ref, - - total_mem_val: Arc>, } /// `LocalStreamManager` manages all stream executors in this project. pub struct LocalStreamManager { core: Mutex, - // Maintain a copy of the core to reduce async locks state_store: StateStoreImpl, context: Arc, + streaming_metrics: Arc, total_mem_val: Arc>, + + /// Watermark epoch number. + watermark_epoch: AtomicU64Ref, + + local_barrier_manager: LocalBarrierManager, + + /// Manages the await-trees of all actors. + await_tree_reg: Option>>, } /// Report expression evaluation errors to the actor context. @@ -152,6 +140,13 @@ pub struct ExecutorParams { /// Used for reporting expression evaluation errors. pub eval_error_report: ActorEvalErrorReport, + + /// `watermark_epoch` field in `MemoryManager` + pub watermark_epoch: AtomicU64Ref, + + pub shared_context: Arc, + + pub local_barrier_manager: LocalBarrierManager, } impl Debug for ExecutorParams { @@ -168,34 +163,29 @@ impl Debug for ExecutorParams { } impl LocalStreamManager { - fn with_core(core: LocalStreamManagerCore) -> Self { - Self { - state_store: core.state_store.clone(), - context: core.context.clone(), - total_mem_val: core.total_mem_val.clone(), - core: Mutex::new(core), - } - } - pub fn new( addr: HostAddr, state_store: StateStoreImpl, streaming_metrics: Arc, config: StreamingConfig, await_tree_config: Option, + watermark_epoch: AtomicU64Ref, ) -> Self { - Self::with_core(LocalStreamManagerCore::new( - addr, + let local_barrier_manager = + LocalBarrierManager::new(state_store.clone(), streaming_metrics.clone()); + let context = Arc::new(SharedContext::new(addr, &config)); + let core = LocalStreamManagerCore::new(context.config.actor_runtime_worker_threads_num); + Self { state_store, + context, + total_mem_val: Arc::new(TrAdder::new()), + core: Mutex::new(core), streaming_metrics, - config, - await_tree_config, - )) - } - - #[cfg(test)] - pub fn for_test() -> Self { - Self::with_core(LocalStreamManagerCore::for_test()) + watermark_epoch, + local_barrier_manager, + await_tree_reg: await_tree_config + .map(|config| Mutex::new(await_tree::Registry::new(config))), + } } /// Print the traces of all actors periodically, used for debugging only. @@ -203,13 +193,13 @@ impl LocalStreamManager { tokio::spawn(async move { loop { tokio::time::sleep(std::time::Duration::from_millis(5000)).await; - let mut core = self.core.lock().await; let mut o = std::io::stdout().lock(); - for (k, trace) in core + for (k, trace) in self .await_tree_reg - .as_mut() + .as_ref() .expect("async stack trace not enabled") + .lock() .iter() { writeln!(o, ">> Actor {}\n\n{}", k, trace).ok(); @@ -219,17 +209,16 @@ impl LocalStreamManager { } /// Get await-tree contexts for all actors. - pub async fn get_actor_traces(&self) -> HashMap { - let core = self.core.lock().await; - match &core.await_tree_reg { - Some(mgr) => mgr.iter().map(|(k, v)| (*k, v)).collect(), + pub fn get_actor_traces(&self) -> HashMap { + match &self.await_tree_reg.as_ref() { + Some(mgr) => mgr.lock().iter().map(|(k, v)| (*k, v)).collect(), None => Default::default(), } } /// Get all existing actor ids. - pub async fn all_actor_ids(&self) -> HashSet { - self.core.lock().await.handles.keys().cloned().collect() + pub fn all_actor_ids(&self) -> HashSet { + self.core.lock().handles.keys().cloned().collect() } /// Broadcast a barrier to all senders. Save a receiver in barrier manager @@ -240,12 +229,10 @@ impl LocalStreamManager { actor_ids_to_collect: impl IntoIterator, ) -> StreamResult<()> { if barrier.kind == BarrierKind::Initial { - let core = self.core.lock().await; - core.get_watermark_epoch() + self.watermark_epoch .store(barrier.epoch.curr, std::sync::atomic::Ordering::SeqCst); } - let barrier_manager = self.context.barrier_manager(); - barrier_manager + self.local_barrier_manager .send_barrier(barrier, actor_ids_to_send, actor_ids_to_collect) .await?; Ok(()) @@ -253,14 +240,13 @@ impl LocalStreamManager { /// Reset the state of the barrier manager. pub fn reset_barrier_manager(&self) { - self.context.barrier_manager().reset(); + self.local_barrier_manager.reset(); } /// Use `epoch` to find collect rx. And wait for all actor to be collected before /// returning. pub async fn collect_barrier(&self, epoch: u64) -> StreamResult { - self.context - .barrier_manager() + self.local_barrier_manager .await_epoch_completed(epoch) .await } @@ -272,8 +258,9 @@ impl LocalStreamManager { } /// Drop the resources of the given actors. - pub async fn drop_actors(&self, actors: &[ActorId]) -> StreamResult<()> { - let mut core = self.core.lock().await; + pub fn drop_actors(&self, actors: &[ActorId]) -> StreamResult<()> { + self.context.drop_actors(actors); + let mut core = self.core.lock(); for &id in actors { core.drop_actor(id); } @@ -283,7 +270,22 @@ impl LocalStreamManager { /// Force stop all actors on this worker, and then drop their resources. pub async fn stop_all_actors(&self) -> StreamResult<()> { - self.core.lock().await.stop_all_actors().await; + let actor_handles = self.core.lock().drain_actor_handles(); + for (actor_id, handle) in &actor_handles { + tracing::debug!("force stopping actor {}", actor_id); + handle.abort(); + } + for (actor_id, handle) in actor_handles { + tracing::debug!("join actor {}", actor_id); + let result = handle.await; + assert!(result.is_ok() || result.unwrap_err().is_cancelled()); + } + self.context.clear_channels(); + self.context.actor_infos.write().clear(); + self.core.lock().clear_state(); + if let Some(m) = self.await_tree_reg.as_ref() { + m.lock().clear(); + } self.reset_barrier_manager(); // Clear shared buffer in storage to release memory self.clear_storage_buffer().await; @@ -291,23 +293,11 @@ impl LocalStreamManager { Ok(()) } - pub async fn take_receiver(&self, ids: UpDownActorIds) -> StreamResult { - let core = self.core.lock().await; - core.context.take_receiver(&ids) - } - - pub async fn update_actors(&self, actors: &[stream_plan::StreamActor]) -> StreamResult<()> { - let mut core = self.core.lock().await; + pub fn update_actors(&self, actors: &[stream_plan::StreamActor]) -> StreamResult<()> { + let mut core = self.core.lock(); core.update_actors(actors) } - /// This function could only be called once during the lifecycle of `LocalStreamManager` for - /// now. - pub async fn update_actor_info(&self, actor_infos: &[ActorInfo]) -> StreamResult<()> { - let mut core = self.core.lock().await; - core.update_actor_info(actor_infos) - } - /// This function could only be called once during the lifecycle of `LocalStreamManager` for /// now. pub async fn build_actors( @@ -315,20 +305,29 @@ impl LocalStreamManager { actors: &[ActorId], env: StreamEnvironment, ) -> StreamResult<()> { - let mut core = self.core.lock().await; - core.build_actors(actors, env).await - } - - pub async fn config(&self) -> StreamingConfig { - let core = self.core.lock().await; - core.config.clone() + let actors = { + let mut core = self.core.lock(); + actors + .iter() + .map(|actor_id| { + core.actors + .remove(actor_id) + .ok_or_else(|| anyhow!("No such actor with actor id:{}", actor_id)) + }) + .try_collect()? + }; + let actors = self.create_actors(actors, env).await?; + self.core.lock().spawn_actors( + actors, + &self.streaming_metrics, + &self.local_barrier_manager, + self.await_tree_reg.as_ref(), + ); + Ok(()) } - /// After memory manager is created, it will store the watermark epoch in stream manager, so - /// stream executor can get it to build managed cache. - pub async fn set_watermark_epoch(&self, watermark_epoch: AtomicU64Ref) { - let mut guard = self.core.lock().await; - guard.watermark_epoch = watermark_epoch; + pub fn context(&self) -> &Arc { + &self.context } pub fn total_mem_usage(&self) -> usize { @@ -337,38 +336,10 @@ impl LocalStreamManager { } impl LocalStreamManagerCore { - fn new( - addr: HostAddr, - state_store: StateStoreImpl, - streaming_metrics: Arc, - config: StreamingConfig, - await_tree_config: Option, - ) -> Self { - let context = SharedContext::new( - addr, - state_store.clone(), - &config, - streaming_metrics.clone(), - ); - Self::new_inner( - state_store, - context, - streaming_metrics, - config, - await_tree_config, - ) - } - - fn new_inner( - state_store: StateStoreImpl, - context: SharedContext, - streaming_metrics: Arc, - config: StreamingConfig, - await_tree_config: Option, - ) -> Self { + fn new(actor_runtime_worker_threads_num: Option) -> Self { let runtime = { let mut builder = tokio::runtime::Builder::new_multi_thread(); - if let Some(worker_threads_num) = config.actor_runtime_worker_threads_num { + if let Some(worker_threads_num) = actor_runtime_worker_threads_num { builder.worker_threads(worker_threads_num); } builder @@ -381,34 +352,16 @@ impl LocalStreamManagerCore { Self { runtime: runtime.into(), handles: HashMap::new(), - context: Arc::new(context), actors: HashMap::new(), actor_monitor_tasks: HashMap::new(), - state_store, - streaming_metrics, - config, - await_tree_reg: await_tree_config.map(await_tree::Registry::new), - watermark_epoch: Arc::new(AtomicU64::new(0)), - total_mem_val: Arc::new(TrAdder::new()), } } +} - #[cfg(test)] - fn for_test() -> Self { - use risingwave_storage::monitor::MonitoredStorageMetrics; - let streaming_metrics = Arc::new(StreamingMetrics::unused()); - Self::new_inner( - StateStoreImpl::shared_in_memory_store(Arc::new(MonitoredStorageMetrics::unused())), - SharedContext::for_test(), - streaming_metrics, - StreamingConfig::default(), - None, - ) - } - +impl LocalStreamManager { /// Create dispatchers with downstream information registered before fn create_dispatcher( - &mut self, + &self, input: BoxedExecutor, dispatchers: &[stream_plan::Dispatcher], actor_id: ActorId, @@ -433,7 +386,7 @@ impl LocalStreamManagerCore { #[allow(clippy::too_many_arguments)] #[async_recursion] async fn create_nodes_inner( - &mut self, + &self, fragment_id: FragmentId, node: &stream_plan::StreamNode, env: StreamEnvironment, @@ -517,9 +470,12 @@ impl LocalStreamManagerCore { actor_context: actor_context.clone(), vnode_bitmap, eval_error_report, + watermark_epoch: self.watermark_epoch.clone(), + shared_context: self.context.clone(), + local_barrier_manager: self.local_barrier_manager.clone(), }; - let executor = create_executor(executor_params, self, node, store).await?; + let executor = create_executor(executor_params, node, store).await?; assert_eq!( executor.pk_indices(), &pk_indices, @@ -537,7 +493,7 @@ impl LocalStreamManagerCore { let executor = WrapperExecutor::new( executor, actor_context.clone(), - self.config.developer.enable_executor_row_count, + env.config().developer.enable_executor_row_count, ) .boxed(); @@ -559,7 +515,7 @@ impl LocalStreamManagerCore { /// Create a chain(tree) of nodes and return the head executor. async fn create_nodes( - &mut self, + &self, fragment_id: FragmentId, node: &stream_plan::StreamNode, env: StreamEnvironment, @@ -568,7 +524,7 @@ impl LocalStreamManagerCore { ) -> StreamResult<(BoxedExecutor, Vec)> { let mut subtasks = vec![]; - let executor = dispatch_state_store!(self.state_store.clone(), store, { + let executor = dispatch_state_store!(env.state_store(), store, { self.create_nodes_inner( fragment_id, node, @@ -585,23 +541,19 @@ impl LocalStreamManagerCore { Ok((executor, subtasks)) } - async fn build_actors( - &mut self, - actors: &[ActorId], + async fn create_actors( + &self, + actors: Vec, env: StreamEnvironment, - ) -> StreamResult<()> { - for &actor_id in actors { - let actor = self - .actors - .remove(&actor_id) - .ok_or_else(|| anyhow!("No such actor with actor id:{}", actor_id))?; - let mview_definition = &actor.mview_definition; + ) -> StreamResult>> { + let mut ret = Vec::with_capacity(actors.len()); + for actor in actors { + let actor_id = actor.actor_id; let actor_context = ActorContext::create_with_metrics( - actor_id, - actor.fragment_id, + &actor, self.total_mem_val.clone(), self.streaming_metrics.clone(), - self.config.unique_user_stream_errors, + env.config().unique_user_stream_errors, ); let vnode_bitmap = actor.vnode_bitmap.as_ref().map(|b| b.into()); let expr_context = actor.expr_context.clone().unwrap(); @@ -623,30 +575,46 @@ impl LocalStreamManagerCore { let actor = Actor::new( dispatcher, subtasks, - self.context.clone(), self.streaming_metrics.clone(), actor_context.clone(), expr_context, + self.local_barrier_manager.clone(), ); + ret.push(actor); + } + Ok(ret) + } +} + +impl LocalStreamManagerCore { + fn spawn_actors( + &mut self, + actors: Vec>, + streaming_metrics: &Arc, + barrier_manager: &LocalBarrierManager, + await_tree_reg: Option<&Mutex>>, + ) { + for actor in actors { let monitor = tokio_metrics::TaskMonitor::new(); + let actor_context = actor.actor_context.clone(); + let actor_id = actor_context.id; let handle = { - let context = self.context.clone(); - let actor = async move { - if let Err(err) = actor.run().await { + let trace_span = format!("Actor {actor_id}: `{}`", actor_context.mview_definition); + let barrier_manager = barrier_manager.clone(); + let actor = actor.run().map(move |result| { + if let Err(err) = result { // TODO: check error type and panic if it's unexpected. // Intentionally use `?` on the report to also include the backtrace. tracing::error!(actor_id, error = ?err.as_report(), "actor exit with error"); - context.barrier_manager().notify_failure(actor_id, err); + barrier_manager.notify_failure(actor_id, err); } - }; - let traced = match &mut self.await_tree_reg { + }); + let traced = match await_tree_reg { Some(m) => m - .register( - actor_id, - format!("Actor {actor_id}: `{}`", mview_definition), - ) + .lock() + .register(actor_id, trace_span) .instrument(actor) .left_future(), None => actor.right_future(), @@ -654,7 +622,7 @@ impl LocalStreamManagerCore { let instrumented = monitor.instrument(traced); #[cfg(enable_task_local_alloc)] { - let metrics = self.streaming_metrics.clone(); + let metrics = streaming_metrics.clone(); let actor_id_str = actor_id.to_string(); let fragment_id_str = actor_context.fragment_id.to_string(); let allocation_stated = task_stats_alloc::allocation_stat( @@ -678,10 +646,10 @@ impl LocalStreamManagerCore { }; self.handles.insert(actor_id, handle); - if self.streaming_metrics.level >= MetricLevel::Debug { + if streaming_metrics.level >= MetricLevel::Debug { tracing::info!("Tokio metrics are enabled because metrics_level >= Debug"); let actor_id_str = actor_id.to_string(); - let metrics = self.streaming_metrics.clone(); + let metrics = streaming_metrics.clone(); let actor_monitor_task = self.runtime.spawn(async move { loop { let task_metrics = monitor.cumulative(); @@ -736,8 +704,6 @@ impl LocalStreamManagerCore { .insert(actor_id, actor_monitor_task); } } - - Ok(()) } pub fn take_all_handles(&mut self) -> StreamResult> { @@ -757,8 +723,12 @@ impl LocalStreamManagerCore { }) .try_collect() } +} - fn update_actor_info(&mut self, new_actor_infos: &[ActorInfo]) -> StreamResult<()> { +impl LocalStreamManager { + /// This function could only be called once during the lifecycle of `LocalStreamManager` for + /// now. + pub fn update_actor_info(&self, new_actor_infos: &[ActorInfo]) -> StreamResult<()> { let mut actor_infos = self.context.actor_infos.write(); for actor in new_actor_infos { let ret = actor_infos.insert(actor.get_actor_id(), actor.clone()); @@ -773,15 +743,15 @@ impl LocalStreamManagerCore { } Ok(()) } +} +impl LocalStreamManagerCore { /// `drop_actor` is invoked by meta node via RPC once the stop barrier arrives at the /// sink. All the actors in the actors should stop themselves before this method is invoked. fn drop_actor(&mut self, actor_id: ActorId) { - self.context.retain_channel(|&(up_id, _)| up_id != actor_id); self.actor_monitor_tasks .remove(&actor_id) .inspect(|handle| handle.abort()); - self.context.actor_infos.write().remove(&actor_id); self.actors.remove(&actor_id); // Task should have already stopped when this method is invoked. There might be some @@ -790,25 +760,15 @@ impl LocalStreamManagerCore { self.handles.remove(&actor_id); } + fn drain_actor_handles(&mut self) -> Vec<(ActorId, ActorHandle)> { + self.handles.drain().collect() + } + /// `stop_all_actors` is invoked by meta node via RPC for recovery purpose. Different from the /// `drop_actor`, the execution of the actors will be aborted. - async fn stop_all_actors(&mut self) { - for (actor_id, handle) in &self.handles { - tracing::debug!("force stopping actor {}", actor_id); - handle.abort(); - } - for (actor_id, handle) in self.handles.drain() { - tracing::debug!("join actor {}", actor_id); - let result = handle.await; - assert!(result.is_ok() || result.unwrap_err().is_cancelled()); - } + fn clear_state(&mut self) { self.actors.clear(); - self.context.clear_channels(); - if let Some(m) = self.await_tree_reg.as_mut() { - m.clear(); - } self.actor_monitor_tasks.clear(); - self.context.actor_infos.write().clear(); } fn update_actors(&mut self, actors: &[stream_plan::StreamActor]) -> StreamResult<()> { @@ -820,11 +780,6 @@ impl LocalStreamManagerCore { Ok(()) } - - /// When executor need to create cache, it will call this needs the watermark epoch for evict. - pub fn get_watermark_epoch(&self) -> AtomicU64Ref { - self.watermark_epoch.clone() - } } #[cfg(test)] diff --git a/src/test_runner/src/test_runner.rs b/src/test_runner/src/test_runner.rs index 38481880b25f..14db70d35118 100644 --- a/src/test_runner/src/test_runner.rs +++ b/src/test_runner/src/test_runner.rs @@ -82,7 +82,7 @@ pub fn run_test_inner(cases: &[&TestDescAndFn], hook: impl TestHook + 'static + test_main(&args, cases, None) } -thread_local!(static FS: RefCell>> = RefCell::new(None)); +thread_local!(static FS: RefCell>> = const { RefCell::new(None) }); #[derive(Clone)] struct FailPointHook; diff --git a/src/tests/sqlsmith/src/lib.rs b/src/tests/sqlsmith/src/lib.rs index 2d8c23a52b74..34ab90cb497a 100644 --- a/src/tests/sqlsmith/src/lib.rs +++ b/src/tests/sqlsmith/src/lib.rs @@ -274,6 +274,7 @@ CREATE TABLE t3(v1 int, v2 bool, v3 smallint); options: [], }, ], + wildcard_idx: None, constraints: [], with_options: [], source_schema: None, @@ -319,6 +320,7 @@ CREATE TABLE t3(v1 int, v2 bool, v3 smallint); options: [], }, ], + wildcard_idx: None, constraints: [], with_options: [], source_schema: None, @@ -375,6 +377,7 @@ CREATE TABLE t3(v1 int, v2 bool, v3 smallint); options: [], }, ], + wildcard_idx: None, constraints: [], with_options: [], source_schema: None, @@ -507,6 +510,7 @@ CREATE TABLE t4(v1 int PRIMARY KEY, v2 smallint PRIMARY KEY, v3 bool PRIMARY KEY ], }, ], + wildcard_idx: None, constraints: [], with_options: [], source_schema: None, @@ -559,6 +563,7 @@ CREATE TABLE t4(v1 int PRIMARY KEY, v2 smallint PRIMARY KEY, v3 bool PRIMARY KEY ], }, ], + wildcard_idx: None, constraints: [], with_options: [], source_schema: None, @@ -618,6 +623,7 @@ CREATE TABLE t4(v1 int PRIMARY KEY, v2 smallint PRIMARY KEY, v3 bool PRIMARY KEY ], }, ], + wildcard_idx: None, constraints: [], with_options: [], source_schema: None, @@ -695,6 +701,7 @@ CREATE TABLE t4(v1 int PRIMARY KEY, v2 smallint PRIMARY KEY, v3 bool PRIMARY KEY ], }, ], + wildcard_idx: None, constraints: [], with_options: [], source_schema: None, diff --git a/src/tests/sqlsmith/src/sql_gen/relation.rs b/src/tests/sqlsmith/src/sql_gen/relation.rs index 05d67d96c422..a5c6b7d27545 100644 --- a/src/tests/sqlsmith/src/sql_gen/relation.rs +++ b/src/tests/sqlsmith/src/sql_gen/relation.rs @@ -219,11 +219,8 @@ impl<'a, R: Rng> SqlGenerator<'a, R> { right_table: Table, ) -> Option { // We always generate an equi join, to avoid stream nested loop join. - let Some((base_join_on_expr, remaining_equi_columns)) = - self.gen_single_equi_join_expr(left_columns, right_columns) - else { - return None; - }; + let (base_join_on_expr, remaining_equi_columns) = + self.gen_single_equi_join_expr(left_columns, right_columns)?; // Add more expressions let extra_expr = match self.rng.gen_range(1..=100) { @@ -263,11 +260,8 @@ impl<'a, R: Rng> SqlGenerator<'a, R> { right_columns: Vec, right_table: Table, ) -> Option { - let Some(join_constraint) = - self.gen_join_constraint(left_columns, left_table, right_columns, right_table) - else { - return None; - }; + let join_constraint = + self.gen_join_constraint(left_columns, left_table, right_columns, right_table)?; // NOTE: INNER JOIN works fine, usually does not encounter `StreamNestedLoopJoin` much. // If many failures due to `StreamNestedLoopJoin`, try disable the others. @@ -290,14 +284,12 @@ impl<'a, R: Rng> SqlGenerator<'a, R> { let left_columns = left_table.get_qualified_columns(); let (right_factor, right_table) = self.gen_table_factor(); let right_columns = right_table.get_qualified_columns(); - let Some(join_operator) = self.gen_join_operator( + let join_operator = self.gen_join_operator( left_columns, left_table.clone(), right_columns, right_table.clone(), - ) else { - return None; - }; + )?; let right_factor_with_join = Join { relation: right_factor, diff --git a/src/tests/sqlsmith/src/sql_gen/types.rs b/src/tests/sqlsmith/src/sql_gen/types.rs index b713689218a6..b4f9d01d294a 100644 --- a/src/tests/sqlsmith/src/sql_gen/types.rs +++ b/src/tests/sqlsmith/src/sql_gen/types.rs @@ -267,9 +267,7 @@ pub(crate) static BINARY_INEQUALITY_OP_TABLE: LazyLock< .filter_map(|func| { let lhs = func.inputs_type[0].as_exact().clone(); let rhs = func.inputs_type[1].as_exact().clone(); - let Some(op) = expr_type_to_inequality_op(func.name.as_scalar()) else { - return None; - }; + let op = expr_type_to_inequality_op(func.name.as_scalar())?; Some(((lhs, rhs), op)) }) .for_each(|(args, op)| funcs.entry(args).or_default().push(op)); diff --git a/src/utils/pgwire/src/pg_message.rs b/src/utils/pgwire/src/pg_message.rs index e6211c9a6569..c69fd6ca5b37 100644 --- a/src/utils/pgwire/src/pg_message.rs +++ b/src/utils/pgwire/src/pg_message.rs @@ -16,6 +16,7 @@ use std::collections::HashMap; use std::ffi::CStr; use std::io::{Error, ErrorKind, IoSlice, Result, Write}; +use anyhow::anyhow; use byteorder::{BigEndian, ByteOrder, NetworkEndian}; /// Part of code learned from . use bytes::{Buf, BufMut, Bytes, BytesMut}; @@ -58,7 +59,7 @@ impl FeStartupMessage { Ok(v) => Ok(v.trim_end_matches('\0')), Err(err) => Err(Error::new( ErrorKind::InvalidInput, - format!("Input end error: {}", err), + anyhow!(err).context("Input end error"), )), }?; let mut map = HashMap::new(); @@ -242,12 +243,12 @@ impl FeQueryMessage { Ok(cstr) => cstr.to_str().map_err(|err| { Error::new( ErrorKind::InvalidInput, - format!("Invalid UTF-8 sequence: {}", err), + anyhow!(err).context("Invalid UTF-8 sequence"), ) }), Err(err) => Err(Error::new( ErrorKind::InvalidInput, - format!("Input end error: {}", err), + anyhow!(err).context("Input end error"), )), } } diff --git a/src/utils/pgwire/src/pg_protocol.rs b/src/utils/pgwire/src/pg_protocol.rs index 87eed8e2241f..83da9f5dc058 100644 --- a/src/utils/pgwire/src/pg_protocol.rs +++ b/src/utils/pgwire/src/pg_protocol.rs @@ -564,7 +564,9 @@ where ) -> PsqlResult<()> { // Parse sql. let stmts = Parser::parse_sql(&sql) - .inspect_err(|e| tracing::error!("failed to parse sql:\n{}:\n{}", sql, e)) + .inspect_err( + |e| tracing::error!(sql = &*sql, error = %e.as_report(), "failed to parse sql"), + ) .map_err(|err| PsqlError::SimpleQueryError(err.into()))?; if stmts.is_empty() { self.stream.write_no_flush(&BeMessage::EmptyQueryResponse)?; @@ -684,7 +686,9 @@ where let stmt = { let stmts = Parser::parse_sql(sql) - .inspect_err(|e| tracing::error!("failed to parse sql:\n{}:\n{}", sql, e)) + .inspect_err( + |e| tracing::error!(sql, error = %e.as_report(), "failed to parse sql"), + ) .map_err(|err| PsqlError::ExtendedPrepareError(err.into()))?; if stmts.len() > 1 { @@ -1039,7 +1043,7 @@ where let ssl = openssl::ssl::Ssl::new(ssl_ctx).unwrap(); let mut stream = tokio_openssl::SslStream::new(ssl, stream).unwrap(); if let Err(e) = Pin::new(&mut stream).accept().await { - tracing::warn!("Unable to set up an ssl connection, reason: {}", e); + tracing::warn!(error = %e.as_report(), "Unable to set up an ssl connection"); let _ = stream.shutdown().await; return Err(e.into()); } @@ -1081,7 +1085,7 @@ where Conn::Unencrypted(s) => s.write_no_flush(message), Conn::Ssl(s) => s.write_no_flush(message), } - .inspect_err(|error| tracing::error!(%error, "flush error")) + .inspect_err(|error| tracing::error!(error = %error.as_report(), "flush error")) } async fn write(&mut self, message: &BeMessage<'_>) -> io::Result<()> { @@ -1096,7 +1100,7 @@ where Conn::Unencrypted(s) => s.flush().await, Conn::Ssl(s) => s.flush().await, } - .inspect_err(|error| tracing::error!(%error, "flush error")) + .inspect_err(|error| tracing::error!(error = %error.as_report(), "flush error")) } async fn ssl(&mut self, ssl_ctx: &SslContextRef) -> PsqlResult>> { diff --git a/src/utils/runtime/src/logger.rs b/src/utils/runtime/src/logger.rs index cb27840d7530..6eab94af6c48 100644 --- a/src/utils/runtime/src/logger.rs +++ b/src/utils/runtime/src/logger.rs @@ -202,8 +202,7 @@ pub fn init_risingwave_logger(settings: LoggerSettings) { // Configure levels for external crates. filter = filter .with_target("foyer", Level::WARN) - .with_target("aws_sdk_ec2", Level::INFO) - .with_target("aws_sdk_s3", Level::INFO) + .with_target("aws", Level::INFO) .with_target("aws_config", Level::WARN) .with_target("aws_endpoint", Level::WARN) .with_target("aws_credential_types::cache::lazy_caching", Level::WARN) diff --git a/src/workspace-hack/Cargo.toml b/src/workspace-hack/Cargo.toml index fe84476bea3c..743e290899f1 100644 --- a/src/workspace-hack/Cargo.toml +++ b/src/workspace-hack/Cargo.toml @@ -64,9 +64,9 @@ hashbrown-594e8ee84c453af0 = { package = "hashbrown", version = "0.13", features hashbrown-5ef9efb8ec2df382 = { package = "hashbrown", version = "0.12", features = ["nightly", "raw"] } hmac = { version = "0.12", default-features = false, features = ["reset"] } hyper = { version = "0.14", features = ["full"] } -indexmap-dff4ba8e3ae991db = { package = "indexmap", version = "1", default-features = false, features = ["serde", "std"] } +indexmap-dff4ba8e3ae991db = { package = "indexmap", version = "1", default-features = false, features = ["serde"] } indexmap-f595c2ba2a3f28df = { package = "indexmap", version = "2", features = ["serde"] } -itertools = { version = "0.11" } +itertools = { version = "0.10" } jni = { version = "0.21", features = ["invocation"] } lazy_static = { version = "1", default-features = false, features = ["spin_no_std"] } lexical-core = { version = "0.8", features = ["format"] } @@ -170,7 +170,7 @@ frunk_core = { version = "0.4", default-features = false, features = ["std"] } generic-array = { version = "0.14", default-features = false, features = ["more_lengths", "zeroize"] } hashbrown-582f2526e08bb6a0 = { package = "hashbrown", version = "0.14", features = ["nightly", "raw"] } indexmap-f595c2ba2a3f28df = { package = "indexmap", version = "2", features = ["serde"] } -itertools = { version = "0.11" } +itertools = { version = "0.10" } lazy_static = { version = "1", default-features = false, features = ["spin_no_std"] } libc = { version = "0.2", features = ["extra_traits"] } log = { version = "0.4", default-features = false, features = ["kv_unstable", "std"] }