From 14d50a9ac91c57817a0fbd15912a340ea81c6d71 Mon Sep 17 00:00:00 2001 From: xxchan Date: Thu, 19 Sep 2024 20:20:35 +0800 Subject: [PATCH 1/3] refactor: refactor arrow conversion --- Cargo.lock | 383 +++++++++--------- src/batch/Cargo.toml | 3 - src/common/Cargo.toml | 12 +- src/common/src/array/arrow/arrow_52.rs | 47 +++ src/common/src/array/arrow/arrow_deltalake.rs | 2 +- src/common/src/array/arrow/arrow_iceberg.rs | 47 +-- src/common/src/array/arrow/arrow_impl.rs | 2 +- src/common/src/array/arrow/arrow_udf.rs | 12 +- src/common/src/array/arrow/mod.rs | 17 +- src/connector/Cargo.toml | 6 - src/connector/src/error.rs | 5 +- src/connector/src/parser/parquet_parser.rs | 4 +- .../src/sink/file_sink/opendal_sink.rs | 2 +- src/connector/src/sink/iceberg/mod.rs | 6 +- .../prometheus/monitored_base_file_writer.rs | 4 +- .../prometheus/monitored_partition_writer.rs | 3 +- .../monitored_position_delete_writer.rs | 1 + .../prometheus/monitored_write_writer.rs | 4 +- .../src/source/pulsar/source/reader.rs | 2 +- src/expr/core/Cargo.toml | 2 - src/expr/core/src/aggregate/user_defined.rs | 10 +- src/expr/core/src/expr/expr_udf.rs | 2 +- src/expr/core/src/sig/udf.rs | 2 +- .../core/src/table_function/user_defined.rs | 2 +- src/expr/impl/Cargo.toml | 3 - src/expr/impl/src/scalar/external/iceberg.rs | 2 +- src/expr/impl/src/udf/mod.rs | 2 +- src/expr/impl/src/udf/quickjs.rs | 2 +- src/frontend/Cargo.toml | 2 - src/frontend/src/handler/create_sink.rs | 2 +- src/frontend/src/handler/create_source.rs | 2 +- 31 files changed, 303 insertions(+), 292 deletions(-) create mode 100644 src/common/src/array/arrow/arrow_52.rs diff --git a/Cargo.lock b/Cargo.lock index fcce5f2c9db4b..b434a322edf67 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -609,22 +609,23 @@ dependencies = [ [[package]] name = "arrow-flight" -version = "50.0.0" +version = "52.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1d7f215461ad6346f2e4cc853e377d4e076d533e1ed78d327debe83023e3601f" +checksum = "cdd624aafd1f34710a1d6ed44ea0e9b06f7b75adc4277c53bac4a2d23229030b" dependencies = [ - "arrow-array 50.0.0", - "arrow-buffer 50.0.0", - "arrow-cast 50.0.0", - "arrow-ipc 50.0.0", - "arrow-schema 50.0.0", - "base64 0.21.7", + "arrow-array 52.0.0", + "arrow-buffer 52.0.0", + "arrow-cast 52.0.0", + "arrow-ipc 52.0.0", + "arrow-schema 52.0.0", + "base64 0.22.0", "bytes", "futures", "paste", - "prost 0.12.1", + "prost 0.12.6", + "prost-types 0.12.6", "tokio", - "tonic 0.10.2", + "tonic 0.11.0", ] [[package]] @@ -893,65 +894,64 @@ dependencies = [ [[package]] name = "arrow-udf-flight" -version = "0.1.0" +version = "0.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4adb3a066bd22fb520bc3d040d9d59ee54f320c21faeb6df815ea20445c80c54" +checksum = "82fbd8c05d6c16cccfc762bc865ff40fdb73cad6776a7ec14b65973f83e19074" dependencies = [ - "arrow-array 50.0.0", + "arrow-array 52.0.0", "arrow-flight", - "arrow-schema 50.0.0", - "arrow-select 50.0.0", + "arrow-schema 52.0.0", + "arrow-select 52.0.0", "futures-util", "thiserror", "tokio", - "tonic 0.10.2", + "tonic 0.11.0", "tracing", ] [[package]] name = "arrow-udf-js" -version = "0.3.1" +version = "0.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "76cb6d108605c5489fff1ef9c520656946ad05ed0de3ea6d26d56bcb34bdb8c5" +checksum = "5e09de178aed1119c14dab54def3bd9d383dfdb84f1af467bb0fbf0b64a88b04" dependencies = [ "anyhow", - "arrow-array 50.0.0", - "arrow-buffer 50.0.0", - "arrow-schema 50.0.0", + "arrow-array 52.0.0", + "arrow-buffer 52.0.0", + "arrow-schema 52.0.0", "atomic-time", "rquickjs", ] [[package]] name = "arrow-udf-python" -version = "0.2.0" +version = "0.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4506efc6fbc200c083add2a7ed4e3616a859941a745e922320ae7051d90d12ec" +checksum = "efd364e0016acc32928c7e00a768f5fa6339ae8a243052fd3d354639d57b08aa" dependencies = [ "anyhow", - "arrow-array 50.0.0", - "arrow-buffer 50.0.0", - "arrow-ipc 50.0.0", - "arrow-schema 50.0.0", - "lazy_static", + "arrow-array 52.0.0", + "arrow-buffer 52.0.0", + "arrow-ipc 52.0.0", + "arrow-schema 52.0.0", "pyo3", "pyo3-build-config", ] [[package]] name = "arrow-udf-wasm" -version = "0.2.2" +version = "0.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "eb829e25925161d93617d4b053bae03fe51e708f2cce088d85df856011d4f369" +checksum = "8e322a311a82351c980de6ac3e4a74fcd483d3591096411242208632e1b7acab" dependencies = [ "anyhow", - "arrow-array 50.0.0", - "arrow-ipc 50.0.0", - "arrow-schema 50.0.0", + "arrow-array 52.0.0", + "arrow-ipc 52.0.0", + "arrow-schema 52.0.0", "async-trait", "base64 0.22.0", "genawaiter", - "lazy_static", + "once_cell", "tempfile", "wasi-common", "wasmtime", @@ -2828,6 +2828,12 @@ dependencies = [ "wasm-bindgen", ] +[[package]] +name = "cobs" +version = "0.2.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "67ba02a97a2bd10f4b59b25c7973101c79642302776489e030cd13cdab09ed15" + [[package]] name = "colorchoice" version = "1.0.0" @@ -3039,18 +3045,18 @@ dependencies = [ [[package]] name = "cranelift-bforest" -version = "0.107.0" +version = "0.109.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "79b27922a6879b5b5361d0a084cb0b1941bf109a98540addcb932da13b68bed4" +checksum = "0b6b33d7e757a887989eb18b35712b2a67d96171ec3149d1bfb657b29b7b367c" dependencies = [ "cranelift-entity", ] [[package]] name = "cranelift-codegen" -version = "0.107.0" +version = "0.109.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "304c455b28bf56372729acb356afbb55d622f2b0f2f7837aa5e57c138acaac4d" +checksum = "b9acf15cb22be42d07c3b57d7856329cb228b7315d385346149df2566ad5e4aa" dependencies = [ "bumpalo", "cranelift-bforest", @@ -3063,39 +3069,40 @@ dependencies = [ "hashbrown 0.14.3", "log", "regalloc2", + "rustc-hash", "smallvec", "target-lexicon", ] [[package]] name = "cranelift-codegen-meta" -version = "0.107.0" +version = "0.109.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1653c56b99591d07f67c5ca7f9f25888948af3f4b97186bff838d687d666f613" +checksum = "e934d301392b73b3f8b0540391fb82465a0f179a3cee7c726482ac4727efcc97" dependencies = [ "cranelift-codegen-shared", ] [[package]] name = "cranelift-codegen-shared" -version = "0.107.0" +version = "0.109.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f5b6a9cf6b6eb820ee3f973a0db313c05dc12d370f37b4fe9630286e1672573f" +checksum = "8afb2a2566b3d54b854dfb288b3b187f6d3d17d6f762c92898207eba302931da" [[package]] name = "cranelift-control" -version = "0.107.0" +version = "0.109.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d9d06e6bf30075fb6bed9e034ec046475093392eea1aff90eb5c44c4a033d19a" +checksum = "0100f33b704cdacd01ad66ff41f8c5030d57cbff078e2a4e49ab1822591299fa" dependencies = [ "arbitrary", ] [[package]] name = "cranelift-entity" -version = "0.107.0" +version = "0.109.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "29be04f931b73cdb9694874a295027471817f26f26d2f0ebe5454153176b6e3a" +checksum = "a8cfdc315e5d18997093e040a8d234bea1ac1e118a716d3e30f40d449e78207b" dependencies = [ "serde", "serde_derive", @@ -3103,9 +3110,9 @@ dependencies = [ [[package]] name = "cranelift-frontend" -version = "0.107.0" +version = "0.109.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a07fd7393041d7faa2f37426f5dc7fc04003b70988810e8c063beefeff1cd8f9" +checksum = "0f74b84f16af2e982b0c0c72233503d9d55cbfe3865dbe807ca28dc6642a28b5" dependencies = [ "cranelift-codegen", "log", @@ -3115,15 +3122,15 @@ dependencies = [ [[package]] name = "cranelift-isle" -version = "0.107.0" +version = "0.109.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f341d7938caa6dff8149dac05bb2b53fc680323826b83b4cf175ab9f5139a3c9" +checksum = "adf306d3dde705fb94bd48082f01d38c4ededc74293a4c007805f610bf08bc6e" [[package]] name = "cranelift-native" -version = "0.107.0" +version = "0.109.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "82af6066e6448d26eeabb7aa26a43f7ff79f8217b06bade4ee6ef230aecc8880" +checksum = "1ea0ebdef7aff4a79bcbc8b6495f31315f16b3bf311152f472eaa8d679352581" dependencies = [ "cranelift-codegen", "libc", @@ -3132,9 +3139,9 @@ dependencies = [ [[package]] name = "cranelift-wasm" -version = "0.107.0" +version = "0.109.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2766fab7284a914a7f17f90ebe865c86453225fb8637ac31f123f5028fee69cd" +checksum = "d549108a1942065cdbac3bb96c2952afa0e1b9a3beff4b08c4308ac72257576d" dependencies = [ "cranelift-codegen", "cranelift-entity", @@ -3798,7 +3805,7 @@ dependencies = [ "datafusion-common", "datafusion-expr", "object_store", - "prost 0.12.1", + "prost 0.12.6", ] [[package]] @@ -4324,6 +4331,18 @@ dependencies = [ "zeroize", ] +[[package]] +name = "embedded-io" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ef1a6892d9eef45c8fa6b9e0086428a2cca8491aca8f787c534a3d6d0bcb3ced" + +[[package]] +name = "embedded-io" +version = "0.6.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "edd0f118536f44f5ccd48bcb8b111bdc3de888b58c74639dfb034a357d0f206d" + [[package]] name = "encode_unicode" version = "0.3.6" @@ -5319,7 +5338,7 @@ dependencies = [ "http 0.2.9", "thiserror", "tokio", - "tonic 0.10.2", + "tonic 0.11.0", "tower", "tracing", "trust-dns-resolver 0.23.2", @@ -5588,6 +5607,7 @@ checksum = "290f1a1d9242c78d09ce40a5e87e7554ee637af1351968159f4952f028f75604" dependencies = [ "ahash 0.8.11", "allocator-api2", + "serde", ] [[package]] @@ -7829,9 +7849,9 @@ dependencies = [ [[package]] name = "object" -version = "0.33.0" +version = "0.36.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d8dd6c0cdf9429bce006e1362bfce61fa1bfd8c898a643ed8d2b471934701d3d" +checksum = "084f1a5821ac4c651660a94a7153d27ac9d8a53736203f58b31945ded098070a" dependencies = [ "crc32fast", "hashbrown 0.14.3", @@ -8051,7 +8071,7 @@ dependencies = [ "opentelemetry", "opentelemetry-proto", "opentelemetry_sdk", - "prost 0.12.1", + "prost 0.12.6", "thiserror", "tokio", "tonic 0.11.0", @@ -8065,7 +8085,7 @@ checksum = "984806e6cf27f2b49282e2a05e288f30594f3dbc74eb7a6e99422bc48ed78162" dependencies = [ "opentelemetry", "opentelemetry_sdk", - "prost 0.12.1", + "prost 0.12.6", "tonic 0.11.0", ] @@ -8749,6 +8769,18 @@ version = "1.6.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7170ef9988bc169ba16dd36a7fa041e5c4cbeb6a35b76d4c03daded371eae7c0" +[[package]] +name = "postcard" +version = "1.0.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5f7f0a8d620d71c457dd1d47df76bb18960378da56af4527aaa10f515eee732e" +dependencies = [ + "cobs", + "embedded-io 0.4.0", + "embedded-io 0.6.1", + "serde", +] + [[package]] name = "postgres-derive" version = "0.4.5" @@ -9091,12 +9123,12 @@ dependencies = [ [[package]] name = "prost" -version = "0.12.1" +version = "0.12.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f4fdd22f3b9c31b53c060df4a0613a1c7f062d4115a2b984dd15b1858f7e340d" +checksum = "deb1435c188b76130da55f17a466d252ff7b1418b2ad3e037d127b94e3411f29" dependencies = [ "bytes", - "prost-derive 0.12.1", + "prost-derive 0.12.6", ] [[package]] @@ -9145,8 +9177,8 @@ dependencies = [ "once_cell", "petgraph", "prettyplease 0.2.15", - "prost 0.12.1", - "prost-types 0.12.1", + "prost 0.12.6", + "prost-types 0.12.6", "regex", "syn 2.0.66", "tempfile", @@ -9189,12 +9221,12 @@ dependencies = [ [[package]] name = "prost-derive" -version = "0.12.1" +version = "0.12.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "265baba7fabd416cf5078179f7d2cbeca4ce7a9041111900675ea7c4cb8a4c32" +checksum = "81bddcdb20abf9501610992b6759a4c888aef7d1a7247ef75e2404275ac24af1" dependencies = [ "anyhow", - "itertools 0.11.0", + "itertools 0.12.1", "proc-macro2", "quote", "syn 2.0.66", @@ -9249,11 +9281,11 @@ dependencies = [ [[package]] name = "prost-types" -version = "0.12.1" +version = "0.12.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e081b29f63d83a4bc75cfc9f3fe424f9156cf92d8a4f0c9407cce9a1b67327cf" +checksum = "9091c90b0a32608e984ff2fa4091273cbdd755d54935c51d520887f4a1dbd5b0" dependencies = [ - "prost 0.12.1", + "prost 0.12.6", ] [[package]] @@ -10104,9 +10136,6 @@ name = "risingwave_batch" version = "2.1.0-alpha" dependencies = [ "anyhow", - "arrow-array 50.0.0", - "arrow-array 52.0.0", - "arrow-schema 50.0.0", "assert_matches", "async-recursion", "async-trait", @@ -10260,16 +10289,12 @@ dependencies = [ "anyhow", "arc-swap", "arrow-array 48.0.1", - "arrow-array 50.0.0", "arrow-array 52.0.0", "arrow-buffer 48.0.1", - "arrow-buffer 50.0.0", "arrow-buffer 52.0.0", "arrow-cast 48.0.1", - "arrow-cast 50.0.0", "arrow-cast 52.0.0", "arrow-schema 48.0.1", - "arrow-schema 50.0.0", "arrow-schema 52.0.0", "async-trait", "auto_enums", @@ -10596,12 +10621,6 @@ version = "2.1.0-alpha" dependencies = [ "anyhow", "apache-avro 0.16.0", - "arrow-array 50.0.0", - "arrow-array 52.0.0", - "arrow-row 50.0.0", - "arrow-schema 50.0.0", - "arrow-schema 52.0.0", - "arrow-select 50.0.0", "assert_matches", "async-compression", "async-nats", @@ -10862,8 +10881,6 @@ name = "risingwave_expr" version = "2.1.0-alpha" dependencies = [ "anyhow", - "arrow-array 50.0.0", - "arrow-schema 50.0.0", "async-trait", "auto_impl", "await-tree", @@ -10904,10 +10921,7 @@ version = "2.1.0-alpha" dependencies = [ "aho-corasick", "anyhow", - "arrow-array 50.0.0", "arrow-flight", - "arrow-schema 50.0.0", - "arrow-schema 52.0.0", "arrow-udf-flight", "arrow-udf-js", "arrow-udf-python", @@ -10972,8 +10986,6 @@ version = "2.1.0-alpha" dependencies = [ "anyhow", "arc-swap", - "arrow-schema 50.0.0", - "arrow-schema 52.0.0", "assert_matches", "async-recursion", "async-trait", @@ -14404,11 +14416,8 @@ dependencies = [ "hyper-timeout 0.4.1", "percent-encoding", "pin-project", - "prost 0.12.1", - "rustls 0.21.11", - "rustls-pemfile 1.0.4", + "prost 0.12.6", "tokio", - "tokio-rustls 0.24.1", "tokio-stream 0.1.15 (registry+https://github.com/rust-lang/crates.io-index)", "tower", "tower-layer", @@ -14434,8 +14443,11 @@ dependencies = [ "hyper-timeout 0.4.1", "percent-encoding", "pin-project", - "prost 0.12.1", + "prost 0.12.6", + "rustls-pemfile 2.1.1", + "rustls-pki-types", "tokio", + "tokio-rustls 0.25.0", "tokio-stream 0.1.15 (registry+https://github.com/rust-lang/crates.io-index)", "tower", "tower-layer", @@ -15158,9 +15170,9 @@ checksum = "9c8d87e72b64a3b4db28d11ce29237c246188f4f51057d65a7eab63b7987e423" [[package]] name = "wasi-common" -version = "20.0.0" +version = "22.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "63255d85e10627b07325d7cf4e5fe5a40fa4ff183569a0a67931be26d50ede07" +checksum = "b86fd41e1e26ff6af9451c6a332a5ce5f5283ca51e87d875cdd9a05305598ee3" dependencies = [ "anyhow", "bitflags 2.6.0", @@ -15256,18 +15268,18 @@ checksum = "ca6ad05a4870b2bf5fe995117d3728437bd27d7cd5f06f13c17443ef369775a1" [[package]] name = "wasm-encoder" -version = "0.202.0" +version = "0.209.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bfd106365a7f5f7aa3c1916a98cbb3ad477f5ff96ddb130285a91c6e7429e67a" +checksum = "7b4a05336882dae732ce6bd48b7e11fe597293cb72c13da4f35d7d5f8d53b2a7" dependencies = [ "leb128", ] [[package]] name = "wasm-encoder" -version = "0.206.0" +version = "0.217.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d759312e1137f199096d80a70be685899cd7d3d09c572836bb2e9b69b4dc3b1e" +checksum = "7b88b0814c9a2b323a9b46c687e726996c255ac8b64aa237dd11c81ed4854760" dependencies = [ "leb128", ] @@ -15300,20 +15312,23 @@ dependencies = [ [[package]] name = "wasmparser" -version = "0.202.0" +version = "0.209.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d6998515d3cf3f8b980ef7c11b29a9b1017d4cf86b99ae93b546992df9931413" +checksum = "07035cc9a9b41e62d3bb3a3815a66ab87c993c06fe1cf6b2a3f2a18499d937db" dependencies = [ + "ahash 0.8.11", "bitflags 2.6.0", + "hashbrown 0.14.3", "indexmap 2.2.6", "semver 1.0.18", + "serde", ] [[package]] name = "wasmprinter" -version = "0.202.0" +version = "0.209.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ab1cc9508685eef9502e787f4d4123745f5651a1e29aec047645d3cac1e2da7a" +checksum = "ceca8ae6eaa8c7c87b33c25c53bdf299f8c2a764aee1179402ff7652ef3a6859" dependencies = [ "anyhow", "wasmparser", @@ -15321,35 +15336,45 @@ dependencies = [ [[package]] name = "wasmtime" -version = "20.0.0" +version = "22.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5a5990663c28d81015ddbb02a068ac1bf396a4ea296eba7125b2dfc7c00cb52e" +checksum = "786d8b5e7a4d54917c5ebe555b9667337e5f93383f49bddaaeec2eba68093b45" dependencies = [ "addr2line", "anyhow", "async-trait", - "bincode 1.3.3", "bumpalo", + "cc", "cfg-if", "encoding_rs", "fxprof-processed-profile", "gimli", + "hashbrown 0.14.3", "indexmap 2.2.6", "ittapi", "libc", + "libm", "log", - "object 0.33.0", + "mach2", + "memfd", + "memoffset", + "object 0.36.4", "once_cell", "paste", + "postcard", + "psm", "rayon", "rustix 0.38.31", "semver 1.0.18", "serde", "serde_derive", "serde_json", + "smallvec", + "sptr", "target-lexicon", - "wasm-encoder 0.202.0", + "wasm-encoder 0.209.1", "wasmparser", + "wasmtime-asm-macros", "wasmtime-cache", "wasmtime-component-macro", "wasmtime-component-util", @@ -15358,8 +15383,8 @@ dependencies = [ "wasmtime-fiber", "wasmtime-jit-debug", "wasmtime-jit-icache-coherence", - "wasmtime-runtime", "wasmtime-slab", + "wasmtime-versioned-export-macros", "wasmtime-winch", "wat", "windows-sys 0.52.0", @@ -15367,24 +15392,24 @@ dependencies = [ [[package]] name = "wasmtime-asm-macros" -version = "20.0.0" +version = "22.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "625ee94c72004f3ea0228989c9506596e469517d7d0ed66f7300d1067bdf1ca9" +checksum = "d697d99c341d4a9ffb72f3af7a02124d233eeb59aee010f36d88e97cca553d5e" dependencies = [ "cfg-if", ] [[package]] name = "wasmtime-cache" -version = "20.0.0" +version = "22.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "98534bf28de232299e83eab33984a7a6c40c69534d6bd0ea216150b63d41a83a" +checksum = "916610f9ae9a6c22deb25bba2e6247ba9f00b093d30620875203b91328a1adfa" dependencies = [ "anyhow", "base64 0.21.7", - "bincode 1.3.3", "directories-next", "log", + "postcard", "rustix 0.38.31", "serde", "serde_derive", @@ -15396,9 +15421,9 @@ dependencies = [ [[package]] name = "wasmtime-component-macro" -version = "20.0.0" +version = "22.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "64f84414a25ee3a624c8b77550f3fe7b5d8145bd3405ca58886ee6900abb6dc2" +checksum = "b29b462b068e73b5b27fae092a27f47e5937cabf6b26be2779c978698a52feca" dependencies = [ "anyhow", "proc-macro2", @@ -15411,15 +15436,15 @@ dependencies = [ [[package]] name = "wasmtime-component-util" -version = "20.0.0" +version = "22.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "78580bdb4e04c7da3bf98088559ca1d29382668536e4d5c7f2f966d79c390307" +checksum = "f9d2912c53d9054984b380dfbd7579f9c3681b2a73b903a56bd71a1c4f175f1e" [[package]] name = "wasmtime-cranelift" -version = "20.0.0" +version = "22.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b60df0ee08c6a536c765f69e9e8205273435b66d02dd401e938769a2622a6c1a" +checksum = "a3975deafea000457ba84355c7c0fce0372937204f77026510b7b454f28a3a65" dependencies = [ "anyhow", "cfg-if", @@ -15431,7 +15456,7 @@ dependencies = [ "cranelift-wasm", "gimli", "log", - "object 0.33.0", + "object 0.36.4", "target-lexicon", "thiserror", "wasmparser", @@ -15441,24 +15466,23 @@ dependencies = [ [[package]] name = "wasmtime-environ" -version = "20.0.0" +version = "22.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "64ffc1613db69ee47c96738861534f9a405e422a5aa00224fbf5d410b03fb445" +checksum = "f444e900e848b884d8a8a2949b6f5b92af642a3e663ff8fbe78731143a55be61" dependencies = [ "anyhow", - "bincode 1.3.3", "cpp_demangle", "cranelift-entity", "gimli", "indexmap 2.2.6", "log", - "object 0.33.0", + "object 0.36.4", + "postcard", "rustc-demangle", "serde", "serde_derive", "target-lexicon", - "thiserror", - "wasm-encoder 0.202.0", + "wasm-encoder 0.209.1", "wasmparser", "wasmprinter", "wasmtime-component-util", @@ -15467,9 +15491,9 @@ dependencies = [ [[package]] name = "wasmtime-fiber" -version = "20.0.0" +version = "22.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f043514a23792761c5765f8ba61a4aa7d67f260c0c37494caabceb41d8ae81de" +checksum = "4ded58eb2d1bf0dcd2182d0ccd7055c4b10b50d711514f1d73f61515d0fa829d" dependencies = [ "anyhow", "cc", @@ -15482,11 +15506,11 @@ dependencies = [ [[package]] name = "wasmtime-jit-debug" -version = "20.0.0" +version = "22.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9c0ca2ad8f5d2b37f507ef1c935687a690e84e9f325f5a2af9639440b43c1f0e" +checksum = "9bc54198c6720f098210a85efb3ba8c078d1de4d373cdb6778850a66ae088d11" dependencies = [ - "object 0.33.0", + "object 0.36.4", "once_cell", "rustix 0.38.31", "wasmtime-versioned-export-macros", @@ -15494,69 +15518,40 @@ dependencies = [ [[package]] name = "wasmtime-jit-icache-coherence" -version = "20.0.0" +version = "22.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7a9f93a3289057b26dc75eb84d6e60d7694f7d169c7c09597495de6e016a13ff" -dependencies = [ - "cfg-if", - "libc", - "windows-sys 0.52.0", -] - -[[package]] -name = "wasmtime-runtime" -version = "20.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c6332a2b0af4224c3ea57c857ad39acd2780ccc2b0c99ba1baa01864d90d7c94" +checksum = "5afe2f0499542f9a4bcfa1b55bfdda803b6ade4e7c93c6b99e0f39dba44b0a91" dependencies = [ "anyhow", - "cc", "cfg-if", - "encoding_rs", - "indexmap 2.2.6", "libc", - "log", - "mach2", - "memfd", - "memoffset", - "paste", - "psm", - "rustix 0.38.31", - "sptr", - "wasm-encoder 0.202.0", - "wasmtime-asm-macros", - "wasmtime-environ", - "wasmtime-fiber", - "wasmtime-jit-debug", - "wasmtime-slab", - "wasmtime-versioned-export-macros", "windows-sys 0.52.0", ] [[package]] name = "wasmtime-slab" -version = "20.0.0" +version = "22.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8b3655075824a374c536a2b2cc9283bb765fcdf3d58b58587862c48571ad81ef" +checksum = "0a7de1f2bec5bbb35d532e61c85c049dc84ae671df60492f90b954ecf21169e7" [[package]] name = "wasmtime-types" -version = "20.0.0" +version = "22.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b98cf64a242b0b9257604181ca28b28a5fcaa4c9ea1d396f76d1d2d1c5b40eef" +checksum = "412463e9000e14cf6856be48628d2213c20c153e29ffc22b036980c892ea6964" dependencies = [ "cranelift-entity", "serde", "serde_derive", - "thiserror", + "smallvec", "wasmparser", ] [[package]] name = "wasmtime-versioned-export-macros" -version = "20.0.0" +version = "22.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8561d9e2920db2a175213d557d71c2ac7695831ab472bbfafb9060cd1034684f" +checksum = "de5a9bc4f44ceeb168e9e8e3be4e0b4beb9095b468479663a9e24c667e36826f" dependencies = [ "proc-macro2", "quote", @@ -15565,14 +15560,14 @@ dependencies = [ [[package]] name = "wasmtime-winch" -version = "20.0.0" +version = "22.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a06b573d14ac846a0fb8c541d8fca6a64acf9a1d176176982472274ab1d2fa5d" +checksum = "ed4db238a0241df2d15f79ad17b3a37a27f2ea6cb885894d81b42ae107544466" dependencies = [ "anyhow", "cranelift-codegen", "gimli", - "object 0.33.0", + "object 0.36.4", "target-lexicon", "wasmparser", "wasmtime-cranelift", @@ -15582,9 +15577,9 @@ dependencies = [ [[package]] name = "wasmtime-wit-bindgen" -version = "20.0.0" +version = "22.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "595bc7bb3b0ff4aa00fab718c323ea552c3034d77abc821a35112552f2ea487a" +checksum = "70dc077306b38288262e5ba01d4b21532a6987416cdc0aedf04bb06c22a68fdc" dependencies = [ "anyhow", "heck 0.4.1", @@ -15603,24 +15598,24 @@ dependencies = [ [[package]] name = "wast" -version = "206.0.0" +version = "217.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "68586953ee4960b1f5d84ebf26df3b628b17e6173bc088e0acfbce431469795a" +checksum = "79004ecebded92d3c710d4841383368c7f04b63d0992ddd6b0c7d5029b7629b7" dependencies = [ "bumpalo", "leb128", "memchr", "unicode-width", - "wasm-encoder 0.206.0", + "wasm-encoder 0.217.0", ] [[package]] name = "wat" -version = "1.206.0" +version = "1.217.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "da4c6f2606276c6e991aebf441b2fc92c517807393f039992a3e0ad873efe4ad" +checksum = "c126271c3d92ca0f7c63e4e462e40c69cca52fd4245fcda730d1cf558fb55088" dependencies = [ - "wast 206.0.0", + "wast 217.0.0", ] [[package]] @@ -15695,9 +15690,9 @@ checksum = "653f141f39ec16bba3c5abe400a0c60da7468261cc2cbf36805022876bc721a8" [[package]] name = "wiggle" -version = "20.0.0" +version = "22.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1b6552dda951239e219c329e5a768393664e8d120c5e0818487ac2633f173b1f" +checksum = "29830e5d01c182d24b94092c697aa7ab0ee97d22e78a2bf40ca91eae6ebca5c2" dependencies = [ "anyhow", "async-trait", @@ -15710,9 +15705,9 @@ dependencies = [ [[package]] name = "wiggle-generate" -version = "20.0.0" +version = "22.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "da64cb31e0bfe8b1d2d13956ef9fd5c77545756a1a6ef0e6cfd44e8f1f207aed" +checksum = "557567f2793508760cd855f7659b7a0b9dc4dbc451f53f1415d6943a15311ade" dependencies = [ "anyhow", "heck 0.4.1", @@ -15725,9 +15720,9 @@ dependencies = [ [[package]] name = "wiggle-macro" -version = "20.0.0" +version = "22.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "900b2416ef2ff2903ded6cf55d4a941fed601bf56a8c4874856d7a77c1891994" +checksum = "cc26129a8aea20b62c961d1b9ab4a3c3b56b10042ed85d004f8678af0f21ba6e" dependencies = [ "proc-macro2", "quote", @@ -15768,9 +15763,9 @@ checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f" [[package]] name = "winch-codegen" -version = "0.18.0" +version = "0.20.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fb23450977f9d4a23c02439cf6899340b2d68887b19465c5682740d9cc37d52e" +checksum = "85c6915884e731b2db0d8cf08cb64474cb69221a161675fd3c135f91febc3daa" dependencies = [ "anyhow", "cranelift-codegen", @@ -16078,9 +16073,9 @@ dependencies = [ [[package]] name = "wit-parser" -version = "0.202.0" +version = "0.209.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "744237b488352f4f27bca05a10acb79474415951c450e52ebd0da784c1df2bcc" +checksum = "3e79b9e3c0b6bb589dec46317e645851e0db2734c44e2be5e251b03ff4a51269" dependencies = [ "anyhow", "id-arena", diff --git a/src/batch/Cargo.toml b/src/batch/Cargo.toml index 403eb864229d3..3970f4eab4324 100644 --- a/src/batch/Cargo.toml +++ b/src/batch/Cargo.toml @@ -15,9 +15,6 @@ normal = ["workspace-hack"] [dependencies] anyhow = "1" -arrow-array = { workspace = true } -arrow-array-iceberg = { workspace = true } -arrow-schema = { workspace = true } assert_matches = "1" async-recursion = "1" async-trait = "0.1" diff --git a/src/common/Cargo.toml b/src/common/Cargo.toml index cab8ef2129f0f..fdc2b6369f26f 100644 --- a/src/common/Cargo.toml +++ b/src/common/Cargo.toml @@ -17,18 +17,14 @@ normal = ["workspace-hack"] ahash = "0.8" anyhow = "1" arc-swap = "1" -arrow-array = { workspace = true } +arrow-52-array = { workspace = true } +arrow-52-buffer = { workspace = true } +arrow-52-cast = { workspace = true } +arrow-52-schema = { workspace = true } arrow-array-deltalake = { workspace = true } -arrow-array-iceberg = { workspace = true } -arrow-buffer = { workspace = true } arrow-buffer-deltalake = { workspace = true } -arrow-buffer-iceberg = { workspace = true } -arrow-cast = { workspace = true } arrow-cast-deltalake = { workspace = true } -arrow-cast-iceberg = { workspace = true } -arrow-schema = { workspace = true } arrow-schema-deltalake = { workspace = true } -arrow-schema-iceberg = { workspace = true } async-trait = "0.1" auto_enums = { workspace = true } auto_impl = "1" diff --git a/src/common/src/array/arrow/arrow_52.rs b/src/common/src/array/arrow/arrow_52.rs new file mode 100644 index 0000000000000..6a5a4555e38cb --- /dev/null +++ b/src/common/src/array/arrow/arrow_52.rs @@ -0,0 +1,47 @@ +// Copyright 2024 RisingWave Labs +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#[path = "./arrow_impl.rs"] +mod arrow_impl; +use arrow_buffer::IntervalMonthDayNano as ArrowIntervalType; +pub use arrow_impl::{FromArrow, ToArrow}; +use { + arrow_52_array as arrow_array, arrow_52_buffer as arrow_buffer, arrow_52_cast as arrow_cast, + arrow_52_schema as arrow_schema, +}; + +use crate::array::Interval; + +impl super::ArrowIntervalTypeTrait for ArrowIntervalType { + fn to_interval(self) -> Interval { + // XXX: the arrow-rs decoding is incorrect + // let (months, days, ns) = arrow_array::types::IntervalMonthDayNanoType::to_parts(value); + Interval::from_month_day_usec(self.months, self.days, self.nanoseconds / 1000) + } + + fn from_interval(value: Interval) -> Self { + // XXX: the arrow-rs encoding is incorrect + // arrow_array::types::IntervalMonthDayNanoType::make_value( + // self.months(), + // self.days(), + // // TODO: this may overflow and we need `try_into` + // self.usecs() * 1000, + // ) + Self { + months: value.months(), + days: value.days(), + nanoseconds: value.usecs() * 1000, + } + } +} diff --git a/src/common/src/array/arrow/arrow_deltalake.rs b/src/common/src/array/arrow/arrow_deltalake.rs index 7338532e082d6..5584f89206bf2 100644 --- a/src/common/src/array/arrow/arrow_deltalake.rs +++ b/src/common/src/array/arrow/arrow_deltalake.rs @@ -22,7 +22,7 @@ use std::sync::Arc; use arrow_array::ArrayRef; use num_traits::abs; -use { +pub use { arrow_array_deltalake as arrow_array, arrow_buffer_deltalake as arrow_buffer, arrow_cast_deltalake as arrow_cast, arrow_schema_deltalake as arrow_schema, }; diff --git a/src/common/src/array/arrow/arrow_iceberg.rs b/src/common/src/array/arrow/arrow_iceberg.rs index 80c0a3dab1667..315b23a4807d5 100644 --- a/src/common/src/array/arrow/arrow_iceberg.rs +++ b/src/common/src/array/arrow/arrow_iceberg.rs @@ -17,46 +17,16 @@ use std::collections::HashMap; use std::ops::{Div, Mul}; use std::sync::Arc; -use arrow_array_iceberg::{self as arrow_array, ArrayRef}; -use arrow_buffer_iceberg::IntervalMonthDayNano as ArrowIntervalType; +use arrow_array::ArrayRef; use num_traits::abs; -use { - arrow_buffer_iceberg as arrow_buffer, arrow_cast_iceberg as arrow_cast, - arrow_schema_iceberg as arrow_schema, +pub use { + arrow_52_array as arrow_array, arrow_52_buffer as arrow_buffer, arrow_52_cast as arrow_cast, + arrow_52_schema as arrow_schema, }; +pub use super::arrow_52::{FromArrow, ToArrow}; use crate::array::{Array, ArrayError, ArrayImpl, DataChunk, DataType, DecimalArray}; -use crate::types::{Interval, StructType}; - -impl ArrowIntervalTypeTrait for ArrowIntervalType { - fn to_interval(self) -> Interval { - // XXX: the arrow-rs decoding is incorrect - // let (months, days, ns) = arrow_array::types::IntervalMonthDayNanoType::to_parts(value); - Interval::from_month_day_usec(self.months, self.days, self.nanoseconds / 1000) - } - - fn from_interval(value: Interval) -> Self { - // XXX: the arrow-rs encoding is incorrect - // arrow_array::types::IntervalMonthDayNanoType::make_value( - // self.months(), - // self.days(), - // // TODO: this may overflow and we need `try_into` - // self.usecs() * 1000, - // ) - Self { - months: value.months(), - days: value.days(), - nanoseconds: value.usecs() * 1000, - } - } -} - -#[path = "./arrow_impl.rs"] -mod arrow_impl; - -use arrow_impl::{FromArrow, ToArrow}; - -use crate::array::arrow::ArrowIntervalTypeTrait; +use crate::types::StructType; pub struct IcebergArrowConvert; @@ -261,10 +231,9 @@ impl ToArrow for IcebergCreateTableArrowConvert { mod test { use std::sync::Arc; - use arrow_array_iceberg::{ArrayRef, Decimal128Array}; - use arrow_schema_iceberg::DataType; - + use super::arrow_array::{ArrayRef, Decimal128Array}; use super::arrow_impl::ToArrow; + use super::arrow_schema::DataType; use super::IcebergArrowConvert; use crate::array::{Decimal, DecimalArray}; diff --git a/src/common/src/array/arrow/arrow_impl.rs b/src/common/src/array/arrow/arrow_impl.rs index 8fa3e2abb6b5f..3095461a2ebc5 100644 --- a/src/common/src/array/arrow/arrow_impl.rs +++ b/src/common/src/array/arrow/arrow_impl.rs @@ -42,8 +42,8 @@ use std::fmt::Write; +use arrow_array::array; use arrow_array::cast::AsArray; -use arrow_array_iceberg::array; use arrow_buffer::OffsetBuffer; use chrono::{DateTime, NaiveDateTime, NaiveTime}; use itertools::Itertools; diff --git a/src/common/src/array/arrow/arrow_udf.rs b/src/common/src/array/arrow/arrow_udf.rs index a5296ca21cab8..e59687a05be23 100644 --- a/src/common/src/array/arrow/arrow_udf.rs +++ b/src/common/src/array/arrow/arrow_udf.rs @@ -20,16 +20,14 @@ use std::sync::Arc; -pub use arrow_impl::{FromArrow, ToArrow}; -use {arrow_array, arrow_buffer, arrow_cast, arrow_schema}; -type ArrowIntervalType = i128; +pub use { + arrow_52_array as arrow_array, arrow_52_buffer as arrow_buffer, arrow_52_cast as arrow_cast, + arrow_52_schema as arrow_schema, +}; +pub use super::arrow_52::{FromArrow, ToArrow}; use crate::array::{ArrayError, ArrayImpl, DataType, DecimalArray, JsonbArray}; -#[expect(clippy::duplicate_mod)] -#[path = "./arrow_impl.rs"] -mod arrow_impl; - /// Arrow conversion for UDF. #[derive(Default, Debug)] pub struct UdfArrowConvert { diff --git a/src/common/src/array/arrow/mod.rs b/src/common/src/array/arrow/mod.rs index d519d62f9935a..a43d306f926a3 100644 --- a/src/common/src/array/arrow/mod.rs +++ b/src/common/src/array/arrow/mod.rs @@ -12,6 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. +mod arrow_52; mod arrow_deltalake; mod arrow_iceberg; mod arrow_udf; @@ -19,7 +20,21 @@ mod arrow_udf; pub use arrow_deltalake::DeltaLakeConvert; pub use arrow_iceberg::{IcebergArrowConvert, IcebergCreateTableArrowConvert}; pub use arrow_udf::{FromArrow, ToArrow, UdfArrowConvert}; - +pub use reexport::*; +mod reexport { + pub use super::arrow_deltalake::{ + arrow_array as arrow_array_deltalake, arrow_buffer as arrow_buffer_deltalake, + arrow_cast as arrow_cast_deltalake, arrow_schema as arrow_schema_deltalake, + }; + pub use super::arrow_iceberg::{ + arrow_array as arrow_array_iceberg, arrow_buffer as arrow_buffer_iceberg, + arrow_cast as arrow_cast_iceberg, arrow_schema as arrow_schema_iceberg, + }; + pub use super::arrow_udf::{ + arrow_array as arrow_array_udf, arrow_buffer as arrow_buffer_udf, + arrow_cast as arrow_cast_udf, arrow_schema as arrow_schema_udf, + }; +} use crate::types::Interval; trait ArrowIntervalTypeTrait { diff --git a/src/connector/Cargo.toml b/src/connector/Cargo.toml index 2535847c98fe4..14eed053b25cf 100644 --- a/src/connector/Cargo.toml +++ b/src/connector/Cargo.toml @@ -16,12 +16,6 @@ normal = ["workspace-hack"] [dependencies] anyhow = "1" apache-avro = { workspace = true } -arrow-array = { workspace = true } -arrow-array-iceberg = { workspace = true } -arrow-row = { workspace = true } -arrow-schema = { workspace = true } -arrow-schema-iceberg = { workspace = true } -arrow-select = { workspace = true } assert_matches = "1" async-compression = { version = "0.4.5", features = ["gzip", "tokio"] } async-nats = "0.35" diff --git a/src/connector/src/error.rs b/src/connector/src/error.rs index 9dbd17ae58a1d..cbd25bbae75b6 100644 --- a/src/connector/src/error.rs +++ b/src/connector/src/error.rs @@ -64,8 +64,9 @@ def_anyhow_newtype! { icelake::Error => "Iceberg error", iceberg::Error => "IcebergV2 error", redis::RedisError => "Redis error", - arrow_schema::ArrowError => "Arrow error", - arrow_schema_iceberg::ArrowError => "Arrow error", + // currently, the following two are the same type + // risingwave_common::array::arrow::arrow_schema_udf::ArrowError => "Arrow error", + risingwave_common::array::arrow::arrow_schema_iceberg::ArrowError => "Arrow error", google_cloud_pubsub::client::google_cloud_auth::error::Error => "Google Cloud error", rumqttc::tokio_rustls::rustls::Error => "TLS error", rumqttc::v5::ClientError => "MQTT error", diff --git a/src/connector/src/parser/parquet_parser.rs b/src/connector/src/parser/parquet_parser.rs index 4f1e720bc47fb..db2ace3d2b6dd 100644 --- a/src/connector/src/parser/parquet_parser.rs +++ b/src/connector/src/parser/parquet_parser.rs @@ -14,10 +14,10 @@ use std::future::IntoFuture; use std::sync::Arc; -use arrow_array_iceberg::RecordBatch; use deltalake::parquet::arrow::async_reader::AsyncFileReader; use futures_async_stream::try_stream; -use risingwave_common::array::arrow::IcebergArrowConvert; +use risingwave_common::array::arrow::arrow_array_iceberg::RecordBatch; +use risingwave_common::array::arrow::{arrow_schema_iceberg, IcebergArrowConvert}; use risingwave_common::array::{ArrayBuilderImpl, DataChunk, StreamChunk}; use risingwave_common::bail; use risingwave_common::types::{Datum, ScalarImpl}; diff --git a/src/connector/src/sink/file_sink/opendal_sink.rs b/src/connector/src/sink/file_sink/opendal_sink.rs index 65ec46f494345..d3771d9122d6e 100644 --- a/src/connector/src/sink/file_sink/opendal_sink.rs +++ b/src/connector/src/sink/file_sink/opendal_sink.rs @@ -17,11 +17,11 @@ use std::marker::PhantomData; use std::sync::Arc; use anyhow::anyhow; -use arrow_schema_iceberg::SchemaRef; use async_trait::async_trait; use opendal::{FuturesAsyncWriter, Operator, Writer as OpendalWriter}; use parquet::arrow::AsyncArrowWriter; use parquet::file::properties::WriterProperties; +use risingwave_common::array::arrow::arrow_schema_iceberg::{self, SchemaRef}; use risingwave_common::array::arrow::IcebergArrowConvert; use risingwave_common::array::StreamChunk; use risingwave_common::catalog::Schema; diff --git a/src/connector/src/sink/iceberg/mod.rs b/src/connector/src/sink/iceberg/mod.rs index 9e87694539f0c..6609f63578e0b 100644 --- a/src/connector/src/sink/iceberg/mod.rs +++ b/src/connector/src/sink/iceberg/mod.rs @@ -24,9 +24,6 @@ use std::ops::Deref; use std::sync::Arc; use anyhow::{anyhow, Context}; -use arrow_schema_iceberg::{ - DataType as ArrowDataType, Field as ArrowField, Fields, Schema as ArrowSchema, SchemaRef, -}; use async_trait::async_trait; use iceberg::io::{S3_ACCESS_KEY_ID, S3_ENDPOINT, S3_REGION, S3_SECRET_ACCESS_KEY}; use iceberg::spec::TableMetadata; @@ -46,6 +43,9 @@ use icelake::transaction::Transaction; use icelake::types::{data_file_from_json, data_file_to_json, Any, DataFile}; use icelake::{Table, TableIdentifier}; use itertools::Itertools; +use risingwave_common::array::arrow::arrow_schema_iceberg::{ + self, DataType as ArrowDataType, Field as ArrowField, Fields, Schema as ArrowSchema, SchemaRef, +}; use risingwave_common::array::arrow::{IcebergArrowConvert, IcebergCreateTableArrowConvert}; use risingwave_common::array::{Op, StreamChunk}; use risingwave_common::bail; diff --git a/src/connector/src/sink/iceberg/prometheus/monitored_base_file_writer.rs b/src/connector/src/sink/iceberg/prometheus/monitored_base_file_writer.rs index c29608f0fd63b..a45eaf5a401fd 100644 --- a/src/connector/src/sink/iceberg/prometheus/monitored_base_file_writer.rs +++ b/src/connector/src/sink/iceberg/prometheus/monitored_base_file_writer.rs @@ -12,13 +12,13 @@ // See the License for the specific language governing permissions and // limitations under the License. -use arrow_array_iceberg::RecordBatch; -use arrow_schema_iceberg::SchemaRef; use icelake::io_v2::{ BaseFileWriter, BaseFileWriterBuilder, BaseFileWriterMetrics, CurrentFileStatus, FileWriter, FileWriterBuilder, }; use icelake::Result; +use risingwave_common::array::arrow::arrow_array_iceberg::RecordBatch; +use risingwave_common::array::arrow::arrow_schema_iceberg::SchemaRef; use risingwave_common::metrics::LabelGuardedIntGauge; #[derive(Clone)] diff --git a/src/connector/src/sink/iceberg/prometheus/monitored_partition_writer.rs b/src/connector/src/sink/iceberg/prometheus/monitored_partition_writer.rs index 463b1f3c9dbd4..c5fb3bcc906b5 100644 --- a/src/connector/src/sink/iceberg/prometheus/monitored_partition_writer.rs +++ b/src/connector/src/sink/iceberg/prometheus/monitored_partition_writer.rs @@ -12,12 +12,13 @@ // See the License for the specific language governing permissions and // limitations under the License. -use arrow_schema_iceberg::SchemaRef; use icelake::io_v2::{ FanoutPartitionedWriter, FanoutPartitionedWriterBuilder, FanoutPartitionedWriterMetrics, IcebergWriter, IcebergWriterBuilder, }; use icelake::Result; +use risingwave_common::array::arrow::arrow_array_iceberg; +use risingwave_common::array::arrow::arrow_schema_iceberg::SchemaRef; use risingwave_common::metrics::LabelGuardedIntGauge; #[derive(Clone)] diff --git a/src/connector/src/sink/iceberg/prometheus/monitored_position_delete_writer.rs b/src/connector/src/sink/iceberg/prometheus/monitored_position_delete_writer.rs index 8be6eb7018b13..f68a1b6032135 100644 --- a/src/connector/src/sink/iceberg/prometheus/monitored_position_delete_writer.rs +++ b/src/connector/src/sink/iceberg/prometheus/monitored_position_delete_writer.rs @@ -17,6 +17,7 @@ use icelake::io_v2::{ PositionDeleteMetrics, PositionDeleteWriter, PositionDeleteWriterBuilder, }; use icelake::Result; +use risingwave_common::array::arrow::arrow_schema_iceberg; use risingwave_common::metrics::LabelGuardedIntGauge; #[derive(Clone)] diff --git a/src/connector/src/sink/iceberg/prometheus/monitored_write_writer.rs b/src/connector/src/sink/iceberg/prometheus/monitored_write_writer.rs index aebb5939ff143..634e9ac968f89 100644 --- a/src/connector/src/sink/iceberg/prometheus/monitored_write_writer.rs +++ b/src/connector/src/sink/iceberg/prometheus/monitored_write_writer.rs @@ -12,11 +12,11 @@ // See the License for the specific language governing permissions and // limitations under the License. -use arrow_array_iceberg::RecordBatch; -use arrow_schema_iceberg::SchemaRef; use async_trait::async_trait; use icelake::io_v2::{IcebergWriter, IcebergWriterBuilder}; use icelake::Result; +use risingwave_common::array::arrow::arrow_array_iceberg::RecordBatch; +use risingwave_common::array::arrow::arrow_schema_iceberg::SchemaRef; use risingwave_common::metrics::{LabelGuardedHistogram, LabelGuardedIntCounter}; #[derive(Clone)] diff --git a/src/connector/src/source/pulsar/source/reader.rs b/src/connector/src/source/pulsar/source/reader.rs index 20f6872474e88..e377970e38214 100644 --- a/src/connector/src/source/pulsar/source/reader.rs +++ b/src/connector/src/source/pulsar/source/reader.rs @@ -15,7 +15,6 @@ use std::collections::HashMap; use anyhow::Context; -use arrow_array_iceberg::{Int32Array, Int64Array, RecordBatch}; use async_trait::async_trait; use futures::StreamExt; use futures_async_stream::try_stream; @@ -27,6 +26,7 @@ use itertools::Itertools; use pulsar::consumer::InitialPosition; use pulsar::message::proto::MessageIdData; use pulsar::{Consumer, ConsumerBuilder, ConsumerOptions, Pulsar, SubType, TokioExecutor}; +use risingwave_common::array::arrow::arrow_array_iceberg::{Int32Array, Int64Array, RecordBatch}; use risingwave_common::array::arrow::IcebergArrowConvert; use risingwave_common::array::StreamChunk; use risingwave_common::catalog::ROWID_PREFIX; diff --git a/src/expr/core/Cargo.toml b/src/expr/core/Cargo.toml index cbff3a5ff2e28..538192523b046 100644 --- a/src/expr/core/Cargo.toml +++ b/src/expr/core/Cargo.toml @@ -17,8 +17,6 @@ normal = ["workspace-hack", "ctor"] [dependencies] anyhow = "1" -arrow-array = { workspace = true } -arrow-schema = { workspace = true } async-trait = "0.1" auto_impl = "1" await-tree = { workspace = true } diff --git a/src/expr/core/src/aggregate/user_defined.rs b/src/expr/core/src/aggregate/user_defined.rs index a3897896a3a67..e63169da4aefb 100644 --- a/src/expr/core/src/aggregate/user_defined.rs +++ b/src/expr/core/src/aggregate/user_defined.rs @@ -15,8 +15,8 @@ use std::sync::Arc; use anyhow::Context; -use arrow_array::ArrayRef; -use arrow_schema::{Field, Fields, Schema, SchemaRef}; +use risingwave_common::array::arrow::arrow_array_udf::ArrayRef; +use risingwave_common::array::arrow::arrow_schema_udf::{Field, Fields, Schema, SchemaRef}; use risingwave_common::array::arrow::{FromArrow, ToArrow, UdfArrowConvert}; use risingwave_common::array::Op; use risingwave_common::bitmap::Bitmap; @@ -154,7 +154,11 @@ pub fn new_user_defined( Ok(Box::new(UserDefinedAggregateFunction { return_field: arrow_convert.to_arrow_field("", return_type)?, - state_field: Field::new("state", arrow_schema::DataType::Binary, true), + state_field: Field::new( + "state", + risingwave_common::array::arrow::arrow_schema_udf::DataType::Binary, + true, + ), return_type: return_type.clone(), arg_schema, runtime, diff --git a/src/expr/core/src/expr/expr_udf.rs b/src/expr/core/src/expr/expr_udf.rs index c5bce6a2944df..71a8cc4ce4460 100644 --- a/src/expr/core/src/expr/expr_udf.rs +++ b/src/expr/core/src/expr/expr_udf.rs @@ -16,9 +16,9 @@ use std::sync::atomic::{AtomicU64, Ordering}; use std::sync::{Arc, LazyLock}; use anyhow::Context; -use arrow_schema::{Fields, Schema, SchemaRef}; use await_tree::InstrumentAwait; use prometheus::{exponential_buckets, Registry}; +use risingwave_common::array::arrow::arrow_schema_udf::{Fields, Schema, SchemaRef}; use risingwave_common::array::arrow::{FromArrow, ToArrow, UdfArrowConvert}; use risingwave_common::array::{Array, ArrayRef, DataChunk}; use risingwave_common::metrics::*; diff --git a/src/expr/core/src/sig/udf.rs b/src/expr/core/src/sig/udf.rs index 9a253a78051e9..047879b9192b8 100644 --- a/src/expr/core/src/sig/udf.rs +++ b/src/expr/core/src/sig/udf.rs @@ -19,9 +19,9 @@ //! See expr/impl/src/udf for the implementations. use anyhow::{bail, Context, Result}; -use arrow_array::{ArrayRef, BooleanArray, RecordBatch}; use enum_as_inner::EnumAsInner; use futures::stream::BoxStream; +use risingwave_common::array::arrow::arrow_array_udf::{ArrayRef, BooleanArray, RecordBatch}; use risingwave_common::types::DataType; /// The global registry of UDF implementations. diff --git a/src/expr/core/src/table_function/user_defined.rs b/src/expr/core/src/table_function/user_defined.rs index 919c258299cde..83841f355b1a9 100644 --- a/src/expr/core/src/table_function/user_defined.rs +++ b/src/expr/core/src/table_function/user_defined.rs @@ -15,7 +15,7 @@ use std::sync::Arc; use anyhow::Context; -use arrow_schema::{Fields, Schema, SchemaRef}; +use risingwave_common::array::arrow::arrow_schema_udf::{Fields, Schema, SchemaRef}; use risingwave_common::array::arrow::{FromArrow, ToArrow, UdfArrowConvert}; use risingwave_common::array::I32Array; use risingwave_common::bail; diff --git a/src/expr/impl/Cargo.toml b/src/expr/impl/Cargo.toml index c0e506889ef77..f65556f8a4711 100644 --- a/src/expr/impl/Cargo.toml +++ b/src/expr/impl/Cargo.toml @@ -24,10 +24,7 @@ wasm-udf = ["arrow-udf-wasm", "zstd"] [dependencies] aho-corasick = "1" anyhow = "1" -arrow-array = { workspace = true } arrow-flight = { workspace = true, optional = true } -arrow-schema = { workspace = true } -arrow-schema-iceberg = { workspace = true } arrow-udf-flight = { workspace = true, optional = true } arrow-udf-js = { workspace = true, optional = true } arrow-udf-python = { workspace = true, optional = true } diff --git a/src/expr/impl/src/scalar/external/iceberg.rs b/src/expr/impl/src/scalar/external/iceberg.rs index 5fbc9b003305a..c6881c5228e43 100644 --- a/src/expr/impl/src/scalar/external/iceberg.rs +++ b/src/expr/impl/src/scalar/external/iceberg.rs @@ -22,7 +22,7 @@ use anyhow::anyhow; use icelake::types::{ create_transform_function, Any as IcelakeDataType, BoxedTransformFunction, Transform, }; -use risingwave_common::array::arrow::IcebergArrowConvert; +use risingwave_common::array::arrow::{arrow_schema_iceberg, IcebergArrowConvert}; use risingwave_common::array::{ArrayRef, DataChunk}; use risingwave_common::ensure; use risingwave_common::row::OwnedRow; diff --git a/src/expr/impl/src/udf/mod.rs b/src/expr/impl/src/udf/mod.rs index 599fe2cb5198f..1977b937b1b4f 100644 --- a/src/expr/impl/src/udf/mod.rs +++ b/src/expr/impl/src/udf/mod.rs @@ -16,8 +16,8 @@ // common imports for submodules use anyhow::{Context as _, Result}; -use arrow_array::{ArrayRef, BooleanArray, RecordBatch}; use futures_util::stream::BoxStream; +use risingwave_common::array::arrow::arrow_array_udf::{ArrayRef, BooleanArray, RecordBatch}; use risingwave_expr::sig::{ CreateFunctionOptions, CreateFunctionOutput, UdfImpl, UdfImplDescriptor, UDF_IMPLS, }; diff --git a/src/expr/impl/src/udf/quickjs.rs b/src/expr/impl/src/udf/quickjs.rs index 7faa4dec8ae9f..eb0dee6db9bc4 100644 --- a/src/expr/impl/src/udf/quickjs.rs +++ b/src/expr/impl/src/udf/quickjs.rs @@ -12,9 +12,9 @@ // See the License for the specific language governing permissions and // limitations under the License. -use arrow_schema::{DataType, Field}; use arrow_udf_js::{CallMode, Runtime}; use futures_util::StreamExt; +use risingwave_common::array::arrow::arrow_schema_udf::{DataType, Field}; use risingwave_common::array::arrow::{ToArrow, UdfArrowConvert}; use super::*; diff --git a/src/frontend/Cargo.toml b/src/frontend/Cargo.toml index 3a95eab660b09..8116dc3369ace 100644 --- a/src/frontend/Cargo.toml +++ b/src/frontend/Cargo.toml @@ -17,8 +17,6 @@ normal = ["workspace-hack"] [dependencies] anyhow = "1" arc-swap = "1" -arrow-schema = { workspace = true } -arrow-schema-iceberg = { workspace = true } async-recursion = "1.1.0" async-trait = "0.1" auto_enums = { workspace = true } diff --git a/src/frontend/src/handler/create_sink.rs b/src/frontend/src/handler/create_sink.rs index 9f4f2f63975f1..f87fced6b02a2 100644 --- a/src/frontend/src/handler/create_sink.rs +++ b/src/frontend/src/handler/create_sink.rs @@ -16,11 +16,11 @@ use std::collections::{BTreeMap, BTreeSet, HashMap, HashSet}; use std::sync::{Arc, LazyLock}; use anyhow::Context; -use arrow_schema_iceberg::DataType as ArrowDataType; use either::Either; use itertools::Itertools; use maplit::{convert_args, hashmap}; use pgwire::pg_response::{PgResponse, StatementType}; +use risingwave_common::array::arrow::arrow_schema_iceberg::DataType as ArrowDataType; use risingwave_common::array::arrow::IcebergArrowConvert; use risingwave_common::catalog::{ ColumnCatalog, ConnectionId, DatabaseId, Schema, SchemaId, TableId, UserId, diff --git a/src/frontend/src/handler/create_source.rs b/src/frontend/src/handler/create_source.rs index f1535fa769b28..71a3b67cd8dd6 100644 --- a/src/frontend/src/handler/create_source.rs +++ b/src/frontend/src/handler/create_source.rs @@ -21,7 +21,7 @@ use either::Either; use itertools::Itertools; use maplit::{convert_args, hashmap}; use pgwire::pg_response::{PgResponse, StatementType}; -use risingwave_common::array::arrow::IcebergArrowConvert; +use risingwave_common::array::arrow::{arrow_schema_iceberg, IcebergArrowConvert}; use risingwave_common::bail_not_implemented; use risingwave_common::catalog::{ debug_assert_column_ids_distinct, ColumnCatalog, ColumnDesc, ColumnId, Schema, TableId, From 8d5e660b6cd634ea575c07ebfe42d458d46653ec Mon Sep 17 00:00:00 2001 From: xxchan Date: Thu, 19 Sep 2024 20:37:11 +0800 Subject: [PATCH 2/3] revert upgrade --- Cargo.lock | 367 +++++++++--------- Cargo.toml | 18 +- src/common/Cargo.toml | 20 +- src/common/src/array/arrow/arrow_48.rs | 23 ++ src/common/src/array/arrow/arrow_50.rs | 23 ++ src/common/src/array/arrow/arrow_52.rs | 5 +- src/common/src/array/arrow/arrow_deltalake.rs | 14 +- src/common/src/array/arrow/arrow_iceberg.rs | 11 +- src/common/src/array/arrow/arrow_udf.rs | 7 +- src/common/src/array/arrow/mod.rs | 17 +- src/connector/src/error.rs | 2 - src/expr/core/src/aggregate/user_defined.rs | 2 +- src/expr/core/src/expr/expr_udf.rs | 2 +- .../core/src/table_function/user_defined.rs | 2 +- src/expr/impl/src/udf/quickjs.rs | 2 +- 15 files changed, 270 insertions(+), 245 deletions(-) create mode 100644 src/common/src/array/arrow/arrow_48.rs create mode 100644 src/common/src/array/arrow/arrow_50.rs diff --git a/Cargo.lock b/Cargo.lock index b434a322edf67..bd98f519bc586 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -609,23 +609,22 @@ dependencies = [ [[package]] name = "arrow-flight" -version = "52.0.0" +version = "50.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cdd624aafd1f34710a1d6ed44ea0e9b06f7b75adc4277c53bac4a2d23229030b" +checksum = "1d7f215461ad6346f2e4cc853e377d4e076d533e1ed78d327debe83023e3601f" dependencies = [ - "arrow-array 52.0.0", - "arrow-buffer 52.0.0", - "arrow-cast 52.0.0", - "arrow-ipc 52.0.0", - "arrow-schema 52.0.0", - "base64 0.22.0", + "arrow-array 50.0.0", + "arrow-buffer 50.0.0", + "arrow-cast 50.0.0", + "arrow-ipc 50.0.0", + "arrow-schema 50.0.0", + "base64 0.21.7", "bytes", "futures", "paste", - "prost 0.12.6", - "prost-types 0.12.6", + "prost 0.12.1", "tokio", - "tonic 0.11.0", + "tonic 0.10.2", ] [[package]] @@ -894,64 +893,65 @@ dependencies = [ [[package]] name = "arrow-udf-flight" -version = "0.3.0" +version = "0.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "82fbd8c05d6c16cccfc762bc865ff40fdb73cad6776a7ec14b65973f83e19074" +checksum = "4adb3a066bd22fb520bc3d040d9d59ee54f320c21faeb6df815ea20445c80c54" dependencies = [ - "arrow-array 52.0.0", + "arrow-array 50.0.0", "arrow-flight", - "arrow-schema 52.0.0", - "arrow-select 52.0.0", + "arrow-schema 50.0.0", + "arrow-select 50.0.0", "futures-util", "thiserror", "tokio", - "tonic 0.11.0", + "tonic 0.10.2", "tracing", ] [[package]] name = "arrow-udf-js" -version = "0.4.0" +version = "0.3.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5e09de178aed1119c14dab54def3bd9d383dfdb84f1af467bb0fbf0b64a88b04" +checksum = "76cb6d108605c5489fff1ef9c520656946ad05ed0de3ea6d26d56bcb34bdb8c5" dependencies = [ "anyhow", - "arrow-array 52.0.0", - "arrow-buffer 52.0.0", - "arrow-schema 52.0.0", + "arrow-array 50.0.0", + "arrow-buffer 50.0.0", + "arrow-schema 50.0.0", "atomic-time", "rquickjs", ] [[package]] name = "arrow-udf-python" -version = "0.3.0" +version = "0.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "efd364e0016acc32928c7e00a768f5fa6339ae8a243052fd3d354639d57b08aa" +checksum = "4506efc6fbc200c083add2a7ed4e3616a859941a745e922320ae7051d90d12ec" dependencies = [ "anyhow", - "arrow-array 52.0.0", - "arrow-buffer 52.0.0", - "arrow-ipc 52.0.0", - "arrow-schema 52.0.0", + "arrow-array 50.0.0", + "arrow-buffer 50.0.0", + "arrow-ipc 50.0.0", + "arrow-schema 50.0.0", + "lazy_static", "pyo3", "pyo3-build-config", ] [[package]] name = "arrow-udf-wasm" -version = "0.3.0" +version = "0.2.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8e322a311a82351c980de6ac3e4a74fcd483d3591096411242208632e1b7acab" +checksum = "eb829e25925161d93617d4b053bae03fe51e708f2cce088d85df856011d4f369" dependencies = [ "anyhow", - "arrow-array 52.0.0", - "arrow-ipc 52.0.0", - "arrow-schema 52.0.0", + "arrow-array 50.0.0", + "arrow-ipc 50.0.0", + "arrow-schema 50.0.0", "async-trait", "base64 0.22.0", "genawaiter", - "once_cell", + "lazy_static", "tempfile", "wasi-common", "wasmtime", @@ -2828,12 +2828,6 @@ dependencies = [ "wasm-bindgen", ] -[[package]] -name = "cobs" -version = "0.2.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "67ba02a97a2bd10f4b59b25c7973101c79642302776489e030cd13cdab09ed15" - [[package]] name = "colorchoice" version = "1.0.0" @@ -3045,18 +3039,18 @@ dependencies = [ [[package]] name = "cranelift-bforest" -version = "0.109.0" +version = "0.107.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0b6b33d7e757a887989eb18b35712b2a67d96171ec3149d1bfb657b29b7b367c" +checksum = "79b27922a6879b5b5361d0a084cb0b1941bf109a98540addcb932da13b68bed4" dependencies = [ "cranelift-entity", ] [[package]] name = "cranelift-codegen" -version = "0.109.0" +version = "0.107.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b9acf15cb22be42d07c3b57d7856329cb228b7315d385346149df2566ad5e4aa" +checksum = "304c455b28bf56372729acb356afbb55d622f2b0f2f7837aa5e57c138acaac4d" dependencies = [ "bumpalo", "cranelift-bforest", @@ -3069,40 +3063,39 @@ dependencies = [ "hashbrown 0.14.3", "log", "regalloc2", - "rustc-hash", "smallvec", "target-lexicon", ] [[package]] name = "cranelift-codegen-meta" -version = "0.109.0" +version = "0.107.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e934d301392b73b3f8b0540391fb82465a0f179a3cee7c726482ac4727efcc97" +checksum = "1653c56b99591d07f67c5ca7f9f25888948af3f4b97186bff838d687d666f613" dependencies = [ "cranelift-codegen-shared", ] [[package]] name = "cranelift-codegen-shared" -version = "0.109.0" +version = "0.107.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8afb2a2566b3d54b854dfb288b3b187f6d3d17d6f762c92898207eba302931da" +checksum = "f5b6a9cf6b6eb820ee3f973a0db313c05dc12d370f37b4fe9630286e1672573f" [[package]] name = "cranelift-control" -version = "0.109.0" +version = "0.107.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0100f33b704cdacd01ad66ff41f8c5030d57cbff078e2a4e49ab1822591299fa" +checksum = "d9d06e6bf30075fb6bed9e034ec046475093392eea1aff90eb5c44c4a033d19a" dependencies = [ "arbitrary", ] [[package]] name = "cranelift-entity" -version = "0.109.0" +version = "0.107.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a8cfdc315e5d18997093e040a8d234bea1ac1e118a716d3e30f40d449e78207b" +checksum = "29be04f931b73cdb9694874a295027471817f26f26d2f0ebe5454153176b6e3a" dependencies = [ "serde", "serde_derive", @@ -3110,9 +3103,9 @@ dependencies = [ [[package]] name = "cranelift-frontend" -version = "0.109.0" +version = "0.107.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0f74b84f16af2e982b0c0c72233503d9d55cbfe3865dbe807ca28dc6642a28b5" +checksum = "a07fd7393041d7faa2f37426f5dc7fc04003b70988810e8c063beefeff1cd8f9" dependencies = [ "cranelift-codegen", "log", @@ -3122,15 +3115,15 @@ dependencies = [ [[package]] name = "cranelift-isle" -version = "0.109.0" +version = "0.107.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "adf306d3dde705fb94bd48082f01d38c4ededc74293a4c007805f610bf08bc6e" +checksum = "f341d7938caa6dff8149dac05bb2b53fc680323826b83b4cf175ab9f5139a3c9" [[package]] name = "cranelift-native" -version = "0.109.0" +version = "0.107.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1ea0ebdef7aff4a79bcbc8b6495f31315f16b3bf311152f472eaa8d679352581" +checksum = "82af6066e6448d26eeabb7aa26a43f7ff79f8217b06bade4ee6ef230aecc8880" dependencies = [ "cranelift-codegen", "libc", @@ -3139,9 +3132,9 @@ dependencies = [ [[package]] name = "cranelift-wasm" -version = "0.109.0" +version = "0.107.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d549108a1942065cdbac3bb96c2952afa0e1b9a3beff4b08c4308ac72257576d" +checksum = "2766fab7284a914a7f17f90ebe865c86453225fb8637ac31f123f5028fee69cd" dependencies = [ "cranelift-codegen", "cranelift-entity", @@ -3805,7 +3798,7 @@ dependencies = [ "datafusion-common", "datafusion-expr", "object_store", - "prost 0.12.6", + "prost 0.12.1", ] [[package]] @@ -4331,18 +4324,6 @@ dependencies = [ "zeroize", ] -[[package]] -name = "embedded-io" -version = "0.4.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ef1a6892d9eef45c8fa6b9e0086428a2cca8491aca8f787c534a3d6d0bcb3ced" - -[[package]] -name = "embedded-io" -version = "0.6.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "edd0f118536f44f5ccd48bcb8b111bdc3de888b58c74639dfb034a357d0f206d" - [[package]] name = "encode_unicode" version = "0.3.6" @@ -5338,7 +5319,7 @@ dependencies = [ "http 0.2.9", "thiserror", "tokio", - "tonic 0.11.0", + "tonic 0.10.2", "tower", "tracing", "trust-dns-resolver 0.23.2", @@ -5607,7 +5588,6 @@ checksum = "290f1a1d9242c78d09ce40a5e87e7554ee637af1351968159f4952f028f75604" dependencies = [ "ahash 0.8.11", "allocator-api2", - "serde", ] [[package]] @@ -7849,9 +7829,9 @@ dependencies = [ [[package]] name = "object" -version = "0.36.4" +version = "0.33.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "084f1a5821ac4c651660a94a7153d27ac9d8a53736203f58b31945ded098070a" +checksum = "d8dd6c0cdf9429bce006e1362bfce61fa1bfd8c898a643ed8d2b471934701d3d" dependencies = [ "crc32fast", "hashbrown 0.14.3", @@ -8071,7 +8051,7 @@ dependencies = [ "opentelemetry", "opentelemetry-proto", "opentelemetry_sdk", - "prost 0.12.6", + "prost 0.12.1", "thiserror", "tokio", "tonic 0.11.0", @@ -8085,7 +8065,7 @@ checksum = "984806e6cf27f2b49282e2a05e288f30594f3dbc74eb7a6e99422bc48ed78162" dependencies = [ "opentelemetry", "opentelemetry_sdk", - "prost 0.12.6", + "prost 0.12.1", "tonic 0.11.0", ] @@ -8769,18 +8749,6 @@ version = "1.6.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7170ef9988bc169ba16dd36a7fa041e5c4cbeb6a35b76d4c03daded371eae7c0" -[[package]] -name = "postcard" -version = "1.0.10" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5f7f0a8d620d71c457dd1d47df76bb18960378da56af4527aaa10f515eee732e" -dependencies = [ - "cobs", - "embedded-io 0.4.0", - "embedded-io 0.6.1", - "serde", -] - [[package]] name = "postgres-derive" version = "0.4.5" @@ -9123,12 +9091,12 @@ dependencies = [ [[package]] name = "prost" -version = "0.12.6" +version = "0.12.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "deb1435c188b76130da55f17a466d252ff7b1418b2ad3e037d127b94e3411f29" +checksum = "f4fdd22f3b9c31b53c060df4a0613a1c7f062d4115a2b984dd15b1858f7e340d" dependencies = [ "bytes", - "prost-derive 0.12.6", + "prost-derive 0.12.1", ] [[package]] @@ -9177,8 +9145,8 @@ dependencies = [ "once_cell", "petgraph", "prettyplease 0.2.15", - "prost 0.12.6", - "prost-types 0.12.6", + "prost 0.12.1", + "prost-types 0.12.1", "regex", "syn 2.0.66", "tempfile", @@ -9221,12 +9189,12 @@ dependencies = [ [[package]] name = "prost-derive" -version = "0.12.6" +version = "0.12.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "81bddcdb20abf9501610992b6759a4c888aef7d1a7247ef75e2404275ac24af1" +checksum = "265baba7fabd416cf5078179f7d2cbeca4ce7a9041111900675ea7c4cb8a4c32" dependencies = [ "anyhow", - "itertools 0.12.1", + "itertools 0.11.0", "proc-macro2", "quote", "syn 2.0.66", @@ -9281,11 +9249,11 @@ dependencies = [ [[package]] name = "prost-types" -version = "0.12.6" +version = "0.12.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9091c90b0a32608e984ff2fa4091273cbdd755d54935c51d520887f4a1dbd5b0" +checksum = "e081b29f63d83a4bc75cfc9f3fe424f9156cf92d8a4f0c9407cce9a1b67327cf" dependencies = [ - "prost 0.12.6", + "prost 0.12.1", ] [[package]] @@ -10289,12 +10257,16 @@ dependencies = [ "anyhow", "arc-swap", "arrow-array 48.0.1", + "arrow-array 50.0.0", "arrow-array 52.0.0", "arrow-buffer 48.0.1", + "arrow-buffer 50.0.0", "arrow-buffer 52.0.0", "arrow-cast 48.0.1", + "arrow-cast 50.0.0", "arrow-cast 52.0.0", "arrow-schema 48.0.1", + "arrow-schema 50.0.0", "arrow-schema 52.0.0", "async-trait", "auto_enums", @@ -14416,8 +14388,11 @@ dependencies = [ "hyper-timeout 0.4.1", "percent-encoding", "pin-project", - "prost 0.12.6", + "prost 0.12.1", + "rustls 0.21.11", + "rustls-pemfile 1.0.4", "tokio", + "tokio-rustls 0.24.1", "tokio-stream 0.1.15 (registry+https://github.com/rust-lang/crates.io-index)", "tower", "tower-layer", @@ -14443,11 +14418,8 @@ dependencies = [ "hyper-timeout 0.4.1", "percent-encoding", "pin-project", - "prost 0.12.6", - "rustls-pemfile 2.1.1", - "rustls-pki-types", + "prost 0.12.1", "tokio", - "tokio-rustls 0.25.0", "tokio-stream 0.1.15 (registry+https://github.com/rust-lang/crates.io-index)", "tower", "tower-layer", @@ -15170,9 +15142,9 @@ checksum = "9c8d87e72b64a3b4db28d11ce29237c246188f4f51057d65a7eab63b7987e423" [[package]] name = "wasi-common" -version = "22.0.0" +version = "20.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b86fd41e1e26ff6af9451c6a332a5ce5f5283ca51e87d875cdd9a05305598ee3" +checksum = "63255d85e10627b07325d7cf4e5fe5a40fa4ff183569a0a67931be26d50ede07" dependencies = [ "anyhow", "bitflags 2.6.0", @@ -15268,18 +15240,18 @@ checksum = "ca6ad05a4870b2bf5fe995117d3728437bd27d7cd5f06f13c17443ef369775a1" [[package]] name = "wasm-encoder" -version = "0.209.1" +version = "0.202.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7b4a05336882dae732ce6bd48b7e11fe597293cb72c13da4f35d7d5f8d53b2a7" +checksum = "bfd106365a7f5f7aa3c1916a98cbb3ad477f5ff96ddb130285a91c6e7429e67a" dependencies = [ "leb128", ] [[package]] name = "wasm-encoder" -version = "0.217.0" +version = "0.206.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7b88b0814c9a2b323a9b46c687e726996c255ac8b64aa237dd11c81ed4854760" +checksum = "d759312e1137f199096d80a70be685899cd7d3d09c572836bb2e9b69b4dc3b1e" dependencies = [ "leb128", ] @@ -15312,23 +15284,20 @@ dependencies = [ [[package]] name = "wasmparser" -version = "0.209.1" +version = "0.202.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "07035cc9a9b41e62d3bb3a3815a66ab87c993c06fe1cf6b2a3f2a18499d937db" +checksum = "d6998515d3cf3f8b980ef7c11b29a9b1017d4cf86b99ae93b546992df9931413" dependencies = [ - "ahash 0.8.11", "bitflags 2.6.0", - "hashbrown 0.14.3", "indexmap 2.2.6", "semver 1.0.18", - "serde", ] [[package]] name = "wasmprinter" -version = "0.209.1" +version = "0.202.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ceca8ae6eaa8c7c87b33c25c53bdf299f8c2a764aee1179402ff7652ef3a6859" +checksum = "ab1cc9508685eef9502e787f4d4123745f5651a1e29aec047645d3cac1e2da7a" dependencies = [ "anyhow", "wasmparser", @@ -15336,45 +15305,35 @@ dependencies = [ [[package]] name = "wasmtime" -version = "22.0.0" +version = "20.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "786d8b5e7a4d54917c5ebe555b9667337e5f93383f49bddaaeec2eba68093b45" +checksum = "5a5990663c28d81015ddbb02a068ac1bf396a4ea296eba7125b2dfc7c00cb52e" dependencies = [ "addr2line", "anyhow", "async-trait", + "bincode 1.3.3", "bumpalo", - "cc", "cfg-if", "encoding_rs", "fxprof-processed-profile", "gimli", - "hashbrown 0.14.3", "indexmap 2.2.6", "ittapi", "libc", - "libm", "log", - "mach2", - "memfd", - "memoffset", - "object 0.36.4", + "object 0.33.0", "once_cell", "paste", - "postcard", - "psm", "rayon", "rustix 0.38.31", "semver 1.0.18", "serde", "serde_derive", "serde_json", - "smallvec", - "sptr", "target-lexicon", - "wasm-encoder 0.209.1", + "wasm-encoder 0.202.0", "wasmparser", - "wasmtime-asm-macros", "wasmtime-cache", "wasmtime-component-macro", "wasmtime-component-util", @@ -15383,8 +15342,8 @@ dependencies = [ "wasmtime-fiber", "wasmtime-jit-debug", "wasmtime-jit-icache-coherence", + "wasmtime-runtime", "wasmtime-slab", - "wasmtime-versioned-export-macros", "wasmtime-winch", "wat", "windows-sys 0.52.0", @@ -15392,24 +15351,24 @@ dependencies = [ [[package]] name = "wasmtime-asm-macros" -version = "22.0.0" +version = "20.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d697d99c341d4a9ffb72f3af7a02124d233eeb59aee010f36d88e97cca553d5e" +checksum = "625ee94c72004f3ea0228989c9506596e469517d7d0ed66f7300d1067bdf1ca9" dependencies = [ "cfg-if", ] [[package]] name = "wasmtime-cache" -version = "22.0.0" +version = "20.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "916610f9ae9a6c22deb25bba2e6247ba9f00b093d30620875203b91328a1adfa" +checksum = "98534bf28de232299e83eab33984a7a6c40c69534d6bd0ea216150b63d41a83a" dependencies = [ "anyhow", "base64 0.21.7", + "bincode 1.3.3", "directories-next", "log", - "postcard", "rustix 0.38.31", "serde", "serde_derive", @@ -15421,9 +15380,9 @@ dependencies = [ [[package]] name = "wasmtime-component-macro" -version = "22.0.0" +version = "20.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b29b462b068e73b5b27fae092a27f47e5937cabf6b26be2779c978698a52feca" +checksum = "64f84414a25ee3a624c8b77550f3fe7b5d8145bd3405ca58886ee6900abb6dc2" dependencies = [ "anyhow", "proc-macro2", @@ -15436,15 +15395,15 @@ dependencies = [ [[package]] name = "wasmtime-component-util" -version = "22.0.0" +version = "20.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f9d2912c53d9054984b380dfbd7579f9c3681b2a73b903a56bd71a1c4f175f1e" +checksum = "78580bdb4e04c7da3bf98088559ca1d29382668536e4d5c7f2f966d79c390307" [[package]] name = "wasmtime-cranelift" -version = "22.0.0" +version = "20.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a3975deafea000457ba84355c7c0fce0372937204f77026510b7b454f28a3a65" +checksum = "b60df0ee08c6a536c765f69e9e8205273435b66d02dd401e938769a2622a6c1a" dependencies = [ "anyhow", "cfg-if", @@ -15456,7 +15415,7 @@ dependencies = [ "cranelift-wasm", "gimli", "log", - "object 0.36.4", + "object 0.33.0", "target-lexicon", "thiserror", "wasmparser", @@ -15466,23 +15425,24 @@ dependencies = [ [[package]] name = "wasmtime-environ" -version = "22.0.0" +version = "20.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f444e900e848b884d8a8a2949b6f5b92af642a3e663ff8fbe78731143a55be61" +checksum = "64ffc1613db69ee47c96738861534f9a405e422a5aa00224fbf5d410b03fb445" dependencies = [ "anyhow", + "bincode 1.3.3", "cpp_demangle", "cranelift-entity", "gimli", "indexmap 2.2.6", "log", - "object 0.36.4", - "postcard", + "object 0.33.0", "rustc-demangle", "serde", "serde_derive", "target-lexicon", - "wasm-encoder 0.209.1", + "thiserror", + "wasm-encoder 0.202.0", "wasmparser", "wasmprinter", "wasmtime-component-util", @@ -15491,9 +15451,9 @@ dependencies = [ [[package]] name = "wasmtime-fiber" -version = "22.0.0" +version = "20.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4ded58eb2d1bf0dcd2182d0ccd7055c4b10b50d711514f1d73f61515d0fa829d" +checksum = "f043514a23792761c5765f8ba61a4aa7d67f260c0c37494caabceb41d8ae81de" dependencies = [ "anyhow", "cc", @@ -15506,11 +15466,11 @@ dependencies = [ [[package]] name = "wasmtime-jit-debug" -version = "22.0.0" +version = "20.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9bc54198c6720f098210a85efb3ba8c078d1de4d373cdb6778850a66ae088d11" +checksum = "9c0ca2ad8f5d2b37f507ef1c935687a690e84e9f325f5a2af9639440b43c1f0e" dependencies = [ - "object 0.36.4", + "object 0.33.0", "once_cell", "rustix 0.38.31", "wasmtime-versioned-export-macros", @@ -15518,40 +15478,69 @@ dependencies = [ [[package]] name = "wasmtime-jit-icache-coherence" -version = "22.0.0" +version = "20.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7a9f93a3289057b26dc75eb84d6e60d7694f7d169c7c09597495de6e016a13ff" +dependencies = [ + "cfg-if", + "libc", + "windows-sys 0.52.0", +] + +[[package]] +name = "wasmtime-runtime" +version = "20.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5afe2f0499542f9a4bcfa1b55bfdda803b6ade4e7c93c6b99e0f39dba44b0a91" +checksum = "c6332a2b0af4224c3ea57c857ad39acd2780ccc2b0c99ba1baa01864d90d7c94" dependencies = [ "anyhow", + "cc", "cfg-if", + "encoding_rs", + "indexmap 2.2.6", "libc", + "log", + "mach2", + "memfd", + "memoffset", + "paste", + "psm", + "rustix 0.38.31", + "sptr", + "wasm-encoder 0.202.0", + "wasmtime-asm-macros", + "wasmtime-environ", + "wasmtime-fiber", + "wasmtime-jit-debug", + "wasmtime-slab", + "wasmtime-versioned-export-macros", "windows-sys 0.52.0", ] [[package]] name = "wasmtime-slab" -version = "22.0.0" +version = "20.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0a7de1f2bec5bbb35d532e61c85c049dc84ae671df60492f90b954ecf21169e7" +checksum = "8b3655075824a374c536a2b2cc9283bb765fcdf3d58b58587862c48571ad81ef" [[package]] name = "wasmtime-types" -version = "22.0.0" +version = "20.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "412463e9000e14cf6856be48628d2213c20c153e29ffc22b036980c892ea6964" +checksum = "b98cf64a242b0b9257604181ca28b28a5fcaa4c9ea1d396f76d1d2d1c5b40eef" dependencies = [ "cranelift-entity", "serde", "serde_derive", - "smallvec", + "thiserror", "wasmparser", ] [[package]] name = "wasmtime-versioned-export-macros" -version = "22.0.0" +version = "20.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "de5a9bc4f44ceeb168e9e8e3be4e0b4beb9095b468479663a9e24c667e36826f" +checksum = "8561d9e2920db2a175213d557d71c2ac7695831ab472bbfafb9060cd1034684f" dependencies = [ "proc-macro2", "quote", @@ -15560,14 +15549,14 @@ dependencies = [ [[package]] name = "wasmtime-winch" -version = "22.0.0" +version = "20.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ed4db238a0241df2d15f79ad17b3a37a27f2ea6cb885894d81b42ae107544466" +checksum = "a06b573d14ac846a0fb8c541d8fca6a64acf9a1d176176982472274ab1d2fa5d" dependencies = [ "anyhow", "cranelift-codegen", "gimli", - "object 0.36.4", + "object 0.33.0", "target-lexicon", "wasmparser", "wasmtime-cranelift", @@ -15577,9 +15566,9 @@ dependencies = [ [[package]] name = "wasmtime-wit-bindgen" -version = "22.0.0" +version = "20.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "70dc077306b38288262e5ba01d4b21532a6987416cdc0aedf04bb06c22a68fdc" +checksum = "595bc7bb3b0ff4aa00fab718c323ea552c3034d77abc821a35112552f2ea487a" dependencies = [ "anyhow", "heck 0.4.1", @@ -15598,24 +15587,24 @@ dependencies = [ [[package]] name = "wast" -version = "217.0.0" +version = "206.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "79004ecebded92d3c710d4841383368c7f04b63d0992ddd6b0c7d5029b7629b7" +checksum = "68586953ee4960b1f5d84ebf26df3b628b17e6173bc088e0acfbce431469795a" dependencies = [ "bumpalo", "leb128", "memchr", "unicode-width", - "wasm-encoder 0.217.0", + "wasm-encoder 0.206.0", ] [[package]] name = "wat" -version = "1.217.0" +version = "1.206.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c126271c3d92ca0f7c63e4e462e40c69cca52fd4245fcda730d1cf558fb55088" +checksum = "da4c6f2606276c6e991aebf441b2fc92c517807393f039992a3e0ad873efe4ad" dependencies = [ - "wast 217.0.0", + "wast 206.0.0", ] [[package]] @@ -15690,9 +15679,9 @@ checksum = "653f141f39ec16bba3c5abe400a0c60da7468261cc2cbf36805022876bc721a8" [[package]] name = "wiggle" -version = "22.0.0" +version = "20.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "29830e5d01c182d24b94092c697aa7ab0ee97d22e78a2bf40ca91eae6ebca5c2" +checksum = "1b6552dda951239e219c329e5a768393664e8d120c5e0818487ac2633f173b1f" dependencies = [ "anyhow", "async-trait", @@ -15705,9 +15694,9 @@ dependencies = [ [[package]] name = "wiggle-generate" -version = "22.0.0" +version = "20.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "557567f2793508760cd855f7659b7a0b9dc4dbc451f53f1415d6943a15311ade" +checksum = "da64cb31e0bfe8b1d2d13956ef9fd5c77545756a1a6ef0e6cfd44e8f1f207aed" dependencies = [ "anyhow", "heck 0.4.1", @@ -15720,9 +15709,9 @@ dependencies = [ [[package]] name = "wiggle-macro" -version = "22.0.0" +version = "20.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cc26129a8aea20b62c961d1b9ab4a3c3b56b10042ed85d004f8678af0f21ba6e" +checksum = "900b2416ef2ff2903ded6cf55d4a941fed601bf56a8c4874856d7a77c1891994" dependencies = [ "proc-macro2", "quote", @@ -15763,9 +15752,9 @@ checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f" [[package]] name = "winch-codegen" -version = "0.20.0" +version = "0.18.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "85c6915884e731b2db0d8cf08cb64474cb69221a161675fd3c135f91febc3daa" +checksum = "fb23450977f9d4a23c02439cf6899340b2d68887b19465c5682740d9cc37d52e" dependencies = [ "anyhow", "cranelift-codegen", @@ -16073,9 +16062,9 @@ dependencies = [ [[package]] name = "wit-parser" -version = "0.209.1" +version = "0.202.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3e79b9e3c0b6bb589dec46317e645851e0db2734c44e2be5e251b03ff4a51269" +checksum = "744237b488352f4f27bca05a10acb79474415951c450e52ebd0da784c1df2bcc" dependencies = [ "anyhow", "id-arena", diff --git a/Cargo.toml b/Cargo.toml index c34b414ad0a7b..01123152d56da 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -140,32 +140,18 @@ prost-build = { version = "0.13" } icelake = { git = "https://github.com/risingwavelabs/icelake.git", rev = "3f4724158acee37a4785f56670a1427993a58739", features = [ "prometheus", ] } -arrow-array-iceberg = { package = "arrow-array", version = "52" } -arrow-schema-iceberg = { package = "arrow-schema", version = "52" } -arrow-buffer-iceberg = { package = "arrow-buffer", version = "52" } -arrow-cast-iceberg = { package = "arrow-cast", version = "52" } + # branch dev iceberg = { git = "https://github.com/risingwavelabs/iceberg-rust.git", rev = "84bf51c9d0d5886e4ee306ca4f383f029e1767a4" } iceberg-catalog-rest = { git = "https://github.com/risingwavelabs/iceberg-rust.git", rev = "84bf51c9d0d5886e4ee306ca4f383f029e1767a4" } iceberg-catalog-glue = { git = "https://github.com/risingwavelabs/iceberg-rust.git", rev = "84bf51c9d0d5886e4ee306ca4f383f029e1767a4" } opendal = "0.47" -arrow-array = "50" -arrow-arith = "50" -arrow-cast = "50" -arrow-schema = "50" -arrow-buffer = "50" +# used only by arrow-udf-flight arrow-flight = "50" -arrow-select = "50" -arrow-ord = "50" -arrow-row = "50" arrow-udf-js = "0.3.1" arrow-udf-wasm = { version = "0.2.2", features = ["build"] } arrow-udf-python = "0.2" arrow-udf-flight = "0.1" -arrow-array-deltalake = { package = "arrow-array", version = "48.0.1" } -arrow-buffer-deltalake = { package = "arrow-buffer", version = "48.0.1" } -arrow-cast-deltalake = { package = "arrow-cast", version = "48.0.1" } -arrow-schema-deltalake = { package = "arrow-schema", version = "48.0.1" } clap = { version = "4", features = ["cargo", "derive", "env"] } # Use a forked version which removes the dependencies on dynamo db to reduce # compile time and binary size. diff --git a/src/common/Cargo.toml b/src/common/Cargo.toml index fdc2b6369f26f..58a4b9870dfaf 100644 --- a/src/common/Cargo.toml +++ b/src/common/Cargo.toml @@ -17,14 +17,18 @@ normal = ["workspace-hack"] ahash = "0.8" anyhow = "1" arc-swap = "1" -arrow-52-array = { workspace = true } -arrow-52-buffer = { workspace = true } -arrow-52-cast = { workspace = true } -arrow-52-schema = { workspace = true } -arrow-array-deltalake = { workspace = true } -arrow-buffer-deltalake = { workspace = true } -arrow-cast-deltalake = { workspace = true } -arrow-schema-deltalake = { workspace = true } +arrow-48-array = { package = "arrow-array", version = "48" } +arrow-48-buffer = { package = "arrow-buffer", version = "48" } +arrow-48-cast = { package = "arrow-cast", version = "48" } +arrow-48-schema = { package = "arrow-schema", version = "48" } +arrow-50-array = { package = "arrow-array", version = "50" } +arrow-50-buffer = { package = "arrow-buffer", version = "50" } +arrow-50-cast = { package = "arrow-cast", version = "50" } +arrow-50-schema = { package = "arrow-schema", version = "50" } +arrow-52-array = { package = "arrow-array", version = "52" } +arrow-52-buffer = { package = "arrow-buffer", version = "52" } +arrow-52-cast = { package = "arrow-cast", version = "52" } +arrow-52-schema = { package = "arrow-schema", version = "52" } async-trait = "0.1" auto_enums = { workspace = true } auto_impl = "1" diff --git a/src/common/src/array/arrow/arrow_48.rs b/src/common/src/array/arrow/arrow_48.rs new file mode 100644 index 0000000000000..6f7333b0e70c1 --- /dev/null +++ b/src/common/src/array/arrow/arrow_48.rs @@ -0,0 +1,23 @@ +// Copyright 2024 RisingWave Labs +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#[allow(clippy::duplicate_mod)] +#[path = "./arrow_impl.rs"] +mod arrow_impl; +type ArrowIntervalType = i128; +pub use arrow_impl::{FromArrow, ToArrow}; +pub use { + arrow_48_array as arrow_array, arrow_48_buffer as arrow_buffer, arrow_48_cast as arrow_cast, + arrow_48_schema as arrow_schema, +}; diff --git a/src/common/src/array/arrow/arrow_50.rs b/src/common/src/array/arrow/arrow_50.rs new file mode 100644 index 0000000000000..17b5328806ce6 --- /dev/null +++ b/src/common/src/array/arrow/arrow_50.rs @@ -0,0 +1,23 @@ +// Copyright 2024 RisingWave Labs +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#[allow(clippy::duplicate_mod)] +#[path = "./arrow_impl.rs"] +mod arrow_impl; +type ArrowIntervalType = i128; +pub use arrow_impl::{FromArrow, ToArrow}; +pub use { + arrow_50_array as arrow_array, arrow_50_buffer as arrow_buffer, arrow_50_cast as arrow_cast, + arrow_50_schema as arrow_schema, +}; diff --git a/src/common/src/array/arrow/arrow_52.rs b/src/common/src/array/arrow/arrow_52.rs index 6a5a4555e38cb..1590366e1281b 100644 --- a/src/common/src/array/arrow/arrow_52.rs +++ b/src/common/src/array/arrow/arrow_52.rs @@ -12,11 +12,12 @@ // See the License for the specific language governing permissions and // limitations under the License. +#[allow(clippy::duplicate_mod)] #[path = "./arrow_impl.rs"] mod arrow_impl; -use arrow_buffer::IntervalMonthDayNano as ArrowIntervalType; +type ArrowIntervalType = arrow_buffer::IntervalMonthDayNano; pub use arrow_impl::{FromArrow, ToArrow}; -use { +pub use { arrow_52_array as arrow_array, arrow_52_buffer as arrow_buffer, arrow_52_cast as arrow_cast, arrow_52_schema as arrow_schema, }; diff --git a/src/common/src/array/arrow/arrow_deltalake.rs b/src/common/src/array/arrow/arrow_deltalake.rs index 5584f89206bf2..7e3684233745a 100644 --- a/src/common/src/array/arrow/arrow_deltalake.rs +++ b/src/common/src/array/arrow/arrow_deltalake.rs @@ -22,17 +22,11 @@ use std::sync::Arc; use arrow_array::ArrayRef; use num_traits::abs; -pub use { - arrow_array_deltalake as arrow_array, arrow_buffer_deltalake as arrow_buffer, - arrow_cast_deltalake as arrow_cast, arrow_schema_deltalake as arrow_schema, -}; -type ArrowIntervalType = i128; -use self::arrow_impl::ToArrow; +pub use super::arrow_48::{ + arrow_array, arrow_buffer, arrow_cast, arrow_schema, FromArrow, ToArrow, +}; use crate::array::{Array, ArrayError, DataChunk, Decimal, DecimalArray}; -#[expect(clippy::duplicate_mod)] -#[path = "./arrow_impl.rs"] -mod arrow_impl; pub struct DeltaLakeConvert; @@ -103,8 +97,8 @@ mod test { use arrow_array::cast::AsArray; use arrow_array::ArrayRef; use arrow_schema::Field; - use {arrow_array_deltalake as arrow_array, arrow_schema_deltalake as arrow_schema}; + use super::*; use crate::array::arrow::arrow_deltalake::DeltaLakeConvert; use crate::array::{ArrayImpl, Decimal, DecimalArray, ListArray, ListValue}; use crate::bitmap::Bitmap; diff --git a/src/common/src/array/arrow/arrow_iceberg.rs b/src/common/src/array/arrow/arrow_iceberg.rs index 315b23a4807d5..845dcc8a0f6f9 100644 --- a/src/common/src/array/arrow/arrow_iceberg.rs +++ b/src/common/src/array/arrow/arrow_iceberg.rs @@ -19,12 +19,10 @@ use std::sync::Arc; use arrow_array::ArrayRef; use num_traits::abs; -pub use { - arrow_52_array as arrow_array, arrow_52_buffer as arrow_buffer, arrow_52_cast as arrow_cast, - arrow_52_schema as arrow_schema, -}; -pub use super::arrow_52::{FromArrow, ToArrow}; +pub use super::arrow_52::{ + arrow_array, arrow_buffer, arrow_cast, arrow_schema, FromArrow, ToArrow, +}; use crate::array::{Array, ArrayError, ArrayImpl, DataChunk, DataType, DecimalArray}; use crate::types::StructType; @@ -232,9 +230,8 @@ mod test { use std::sync::Arc; use super::arrow_array::{ArrayRef, Decimal128Array}; - use super::arrow_impl::ToArrow; use super::arrow_schema::DataType; - use super::IcebergArrowConvert; + use super::*; use crate::array::{Decimal, DecimalArray}; #[test] diff --git a/src/common/src/array/arrow/arrow_udf.rs b/src/common/src/array/arrow/arrow_udf.rs index e59687a05be23..0a2e85785c772 100644 --- a/src/common/src/array/arrow/arrow_udf.rs +++ b/src/common/src/array/arrow/arrow_udf.rs @@ -20,12 +20,9 @@ use std::sync::Arc; -pub use { - arrow_52_array as arrow_array, arrow_52_buffer as arrow_buffer, arrow_52_cast as arrow_cast, - arrow_52_schema as arrow_schema, +pub use super::arrow_50::{ + arrow_array, arrow_buffer, arrow_cast, arrow_schema, FromArrow, ToArrow, }; - -pub use super::arrow_52::{FromArrow, ToArrow}; use crate::array::{ArrayError, ArrayImpl, DataType, DecimalArray, JsonbArray}; /// Arrow conversion for UDF. diff --git a/src/common/src/array/arrow/mod.rs b/src/common/src/array/arrow/mod.rs index a43d306f926a3..ade14a5948d2f 100644 --- a/src/common/src/array/arrow/mod.rs +++ b/src/common/src/array/arrow/mod.rs @@ -12,31 +12,44 @@ // See the License for the specific language governing permissions and // limitations under the License. +// These mods imports arrow_impl.rs to provide FromArrow, ToArrow traits for corresponding arrow versions, +// and the default From/To implementations. +mod arrow_48; +mod arrow_50; mod arrow_52; +// These mods import mods above and may override some methods. mod arrow_deltalake; mod arrow_iceberg; mod arrow_udf; pub use arrow_deltalake::DeltaLakeConvert; pub use arrow_iceberg::{IcebergArrowConvert, IcebergCreateTableArrowConvert}; -pub use arrow_udf::{FromArrow, ToArrow, UdfArrowConvert}; +pub use arrow_udf::UdfArrowConvert; pub use reexport::*; +/// For other RisingWave crates, they can directly use arrow re-exported here, without adding +/// `arrow` dependencies in their `Cargo.toml`. And they don't need to care about the version. mod reexport { pub use super::arrow_deltalake::{ arrow_array as arrow_array_deltalake, arrow_buffer as arrow_buffer_deltalake, arrow_cast as arrow_cast_deltalake, arrow_schema as arrow_schema_deltalake, + FromArrow as DeltaLakeFromArrow, ToArrow as DeltaLakeToArrow, }; pub use super::arrow_iceberg::{ arrow_array as arrow_array_iceberg, arrow_buffer as arrow_buffer_iceberg, arrow_cast as arrow_cast_iceberg, arrow_schema as arrow_schema_iceberg, + FromArrow as IcebergFromArrow, ToArrow as IcebergToArrow, }; pub use super::arrow_udf::{ arrow_array as arrow_array_udf, arrow_buffer as arrow_buffer_udf, - arrow_cast as arrow_cast_udf, arrow_schema as arrow_schema_udf, + arrow_cast as arrow_cast_udf, arrow_schema as arrow_schema_udf, FromArrow as UdfFromArrow, + ToArrow as UdfToArrow, }; } use crate::types::Interval; +/// Arrow 52 changed the interval type from `i128` to `arrow_buffer::IntervalMonthDayNano`, so +/// we introduced this trait to customize the conversion in `arrow_impl.rs`. +/// We may delete this after all arrow versions are upgraded. trait ArrowIntervalTypeTrait { fn to_interval(self) -> Interval; fn from_interval(value: Interval) -> Self; diff --git a/src/connector/src/error.rs b/src/connector/src/error.rs index cbd25bbae75b6..e8b7a134b60f1 100644 --- a/src/connector/src/error.rs +++ b/src/connector/src/error.rs @@ -64,8 +64,6 @@ def_anyhow_newtype! { icelake::Error => "Iceberg error", iceberg::Error => "IcebergV2 error", redis::RedisError => "Redis error", - // currently, the following two are the same type - // risingwave_common::array::arrow::arrow_schema_udf::ArrowError => "Arrow error", risingwave_common::array::arrow::arrow_schema_iceberg::ArrowError => "Arrow error", google_cloud_pubsub::client::google_cloud_auth::error::Error => "Google Cloud error", rumqttc::tokio_rustls::rustls::Error => "TLS error", diff --git a/src/expr/core/src/aggregate/user_defined.rs b/src/expr/core/src/aggregate/user_defined.rs index e63169da4aefb..2f4fdc5f9f9c5 100644 --- a/src/expr/core/src/aggregate/user_defined.rs +++ b/src/expr/core/src/aggregate/user_defined.rs @@ -17,7 +17,7 @@ use std::sync::Arc; use anyhow::Context; use risingwave_common::array::arrow::arrow_array_udf::ArrayRef; use risingwave_common::array::arrow::arrow_schema_udf::{Field, Fields, Schema, SchemaRef}; -use risingwave_common::array::arrow::{FromArrow, ToArrow, UdfArrowConvert}; +use risingwave_common::array::arrow::{UdfArrowConvert, UdfFromArrow, UdfToArrow}; use risingwave_common::array::Op; use risingwave_common::bitmap::Bitmap; use risingwave_pb::expr::PbUserDefinedFunctionMetadata; diff --git a/src/expr/core/src/expr/expr_udf.rs b/src/expr/core/src/expr/expr_udf.rs index 71a8cc4ce4460..6ae27dabb2458 100644 --- a/src/expr/core/src/expr/expr_udf.rs +++ b/src/expr/core/src/expr/expr_udf.rs @@ -19,7 +19,7 @@ use anyhow::Context; use await_tree::InstrumentAwait; use prometheus::{exponential_buckets, Registry}; use risingwave_common::array::arrow::arrow_schema_udf::{Fields, Schema, SchemaRef}; -use risingwave_common::array::arrow::{FromArrow, ToArrow, UdfArrowConvert}; +use risingwave_common::array::arrow::{UdfArrowConvert, UdfFromArrow, UdfToArrow}; use risingwave_common::array::{Array, ArrayRef, DataChunk}; use risingwave_common::metrics::*; use risingwave_common::monitor::GLOBAL_METRICS_REGISTRY; diff --git a/src/expr/core/src/table_function/user_defined.rs b/src/expr/core/src/table_function/user_defined.rs index 83841f355b1a9..b490e9b023af1 100644 --- a/src/expr/core/src/table_function/user_defined.rs +++ b/src/expr/core/src/table_function/user_defined.rs @@ -16,7 +16,7 @@ use std::sync::Arc; use anyhow::Context; use risingwave_common::array::arrow::arrow_schema_udf::{Fields, Schema, SchemaRef}; -use risingwave_common::array::arrow::{FromArrow, ToArrow, UdfArrowConvert}; +use risingwave_common::array::arrow::{UdfArrowConvert, UdfFromArrow, UdfToArrow}; use risingwave_common::array::I32Array; use risingwave_common::bail; diff --git a/src/expr/impl/src/udf/quickjs.rs b/src/expr/impl/src/udf/quickjs.rs index eb0dee6db9bc4..fb13f3f374b84 100644 --- a/src/expr/impl/src/udf/quickjs.rs +++ b/src/expr/impl/src/udf/quickjs.rs @@ -15,7 +15,7 @@ use arrow_udf_js::{CallMode, Runtime}; use futures_util::StreamExt; use risingwave_common::array::arrow::arrow_schema_udf::{DataType, Field}; -use risingwave_common::array::arrow::{ToArrow, UdfArrowConvert}; +use risingwave_common::array::arrow::{UdfArrowConvert, UdfToArrow}; use super::*; From a85f3e79a08acd6ea47f42faff42a7bf8508ddbd Mon Sep 17 00:00:00 2001 From: xxchan Date: Thu, 19 Sep 2024 21:06:57 +0800 Subject: [PATCH 3/3] fix --- src/expr/impl/src/udf/external.rs | 12 ++++++------ src/expr/impl/src/udf/python.rs | 4 ++-- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/src/expr/impl/src/udf/external.rs b/src/expr/impl/src/udf/external.rs index 0d6ba0e409386..f3055954c3a7e 100644 --- a/src/expr/impl/src/udf/external.rs +++ b/src/expr/impl/src/udf/external.rs @@ -18,11 +18,11 @@ use std::sync::{Arc, LazyLock, Weak}; use std::time::Duration; use anyhow::bail; -use arrow_schema::Fields; use arrow_udf_flight::Client; use futures_util::{StreamExt, TryStreamExt}; use ginepro::{LoadBalancedChannel, ResolutionStrategy}; -use risingwave_common::array::arrow::{ToArrow, UdfArrowConvert}; +use risingwave_common::array::arrow::arrow_schema_udf::{self, Fields}; +use risingwave_common::array::arrow::{UdfArrowConvert, UdfToArrow}; use risingwave_common::util::addr::HostAddr; use thiserror_ext::AsReport; use tokio::runtime::Runtime; @@ -45,15 +45,15 @@ static EXTERNAL: UdfImplDescriptor = UdfImplDescriptor { }; // A helper function to create a unnamed field from data type. let to_field = |data_type| convert.to_arrow_field("", data_type); - let args = arrow_schema::Schema::new( + let args = arrow_schema_udf::Schema::new( opts.arg_types .iter() .map(to_field) .try_collect::()?, ); - let returns = arrow_schema::Schema::new(if opts.kind.is_table() { + let returns = arrow_schema_udf::Schema::new(if opts.kind.is_table() { vec![ - arrow_schema::Field::new("row", arrow_schema::DataType::Int32, true), + arrow_schema_udf::Field::new("row", arrow_schema_udf::DataType::Int32, true), to_field(opts.return_type)?, ] } else { @@ -285,7 +285,7 @@ fn is_tonic_error(err: &arrow_udf_flight::Error) -> bool { } /// Check if two list of data types match, ignoring field names. -fn data_types_match(a: &arrow_schema::Schema, b: &arrow_schema::Schema) -> bool { +fn data_types_match(a: &arrow_schema_udf::Schema, b: &arrow_schema_udf::Schema) -> bool { if a.fields().len() != b.fields().len() { return false; } diff --git a/src/expr/impl/src/udf/python.rs b/src/expr/impl/src/udf/python.rs index 9ca4b2a5b2a24..b60faced6f41c 100644 --- a/src/expr/impl/src/udf/python.rs +++ b/src/expr/impl/src/udf/python.rs @@ -12,10 +12,10 @@ // See the License for the specific language governing permissions and // limitations under the License. -use arrow_schema::{DataType, Field}; +use arrow_schema_udf::{DataType, Field}; use arrow_udf_python::{CallMode, Runtime}; use futures_util::StreamExt; -use risingwave_common::array::arrow::{ToArrow, UdfArrowConvert}; +use risingwave_common::array::arrow::{arrow_schema_udf, UdfArrowConvert, UdfToArrow}; use super::*;