diff --git a/.gitignore b/.gitignore index 72e3ed1487fe0..5f54a467b21b7 100644 --- a/.gitignore +++ b/.gitignore @@ -71,12 +71,12 @@ e2e_test/generated/* scale-test.tar.zst simulation-it-test.tar.zst - # hummock-trace .trace # spark binary e2e_test/iceberg/spark-*-bin* +e2e_test/iceberg/metastore_db **/poetry.lock diff --git a/Cargo.lock b/Cargo.lock index 32c5fe29fc5aa..fce73f9891743 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -991,7 +991,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "136d4d23bcc79e27423727b36823d86233aad06dfea531837b038394d11e9928" dependencies = [ "concurrent-queue", - "event-listener 5.2.0", + "event-listener 5.3.1", "event-listener-strategy", "futures-core", "pin-project-lite", @@ -2345,9 +2345,9 @@ dependencies = [ [[package]] name = "bytecount" -version = "0.6.3" +version = "0.6.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2c676a478f63e9fa2dd5368a42f28bba0d6c560b775f38583c8bbaa7fcd67c9c" +checksum = "5ce89b21cab1437276d2650d57e971f9d548a2d9037cc231abdc0562b97498ce" [[package]] name = "bytemuck" @@ -2418,9 +2418,9 @@ checksum = "981520c98f422fcc584dc1a95c334e6953900b9106bc47a9839b81790009eb21" [[package]] name = "camino" -version = "1.1.6" +version = "1.1.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c59e92b5a388f549b863a7bea62612c09f24c8393560709a54558a9abdfb3b9c" +checksum = "8b96ec4966b5813e2c0507c1f86115c8c5abaadc3980879c3424042a02fd1ad3" dependencies = [ "serde", ] @@ -2498,9 +2498,9 @@ checksum = "1582e1c9e755dd6ad6b224dcffb135d199399a4568d454bd89fe515ca8425695" [[package]] name = "cargo-platform" -version = "0.1.3" +version = "0.1.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2cfa25e60aea747ec7e1124f238816749faa93759c6ff5b31f1ccdda137f4479" +checksum = "24b1f0365a6c6bb4020cd05806fd0d33c44d38046b8bd7f0e40814b9763cabfc" dependencies = [ "serde", ] @@ -2852,9 +2852,9 @@ dependencies = [ [[package]] name = "concurrent-queue" -version = "2.2.0" +version = "2.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "62ec6771ecfa0762d24683ee5a32ad78487a3d3afdc0fb8cae19d2c5deb50b7c" +checksum = "4ca0197aee26d1ae37445ee532fefce43251d24cc7c166799f4d46817f1d3973" dependencies = [ "crossbeam-utils", ] @@ -4568,9 +4568,9 @@ checksum = "0206175f82b8d6bf6652ff7d71a1e27fd2e4efde587fd368662814d6ec1d9ce0" [[package]] name = "event-listener" -version = "5.2.0" +version = "5.3.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2b5fb89194fa3cad959b833185b3063ba881dbfc7030680b314250779fb4cc91" +checksum = "6032be9bd27023a771701cc49f9f053c751055f71efb2e0ae5c15809093675ba" dependencies = [ "concurrent-queue", "parking", @@ -4583,7 +4583,7 @@ version = "0.5.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "332f51cb23d20b0de8458b86580878211da09bcd4503cb579c225b3d124cabb3" dependencies = [ - "event-listener 5.2.0", + "event-listener 5.3.1", "pin-project-lite", ] @@ -6004,8 +6004,7 @@ dependencies = [ [[package]] name = "iceberg" version = "0.3.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "651dfca7c429918e164607a549287cfdd1e7814d2e4cb577d0d6dc57fe19b785" +source = "git+https://github.com/risingwavelabs/iceberg-rust.git?rev=84bf51c9d0d5886e4ee306ca4f383f029e1767a4#84bf51c9d0d5886e4ee306ca4f383f029e1767a4" dependencies = [ "anyhow", "apache-avro 0.17.0 (registry+https://github.com/rust-lang/crates.io-index)", @@ -6025,11 +6024,13 @@ dependencies = [ "fnv", "futures", "itertools 0.13.0", + "moka", "murmur3", "once_cell", "opendal 0.49.0", "ordered-float 4.1.1", "parquet 52.0.0", + "paste", "reqwest 0.12.4", "rust_decimal", "serde", @@ -6039,7 +6040,7 @@ dependencies = [ "serde_repr", "serde_with 3.8.0", "tokio", - "typed-builder 0.19.1", + "typed-builder 0.20.0", "url", "uuid", ] @@ -6047,8 +6048,7 @@ dependencies = [ [[package]] name = "iceberg-catalog-glue" version = "0.3.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "64ef7c992442a80c46975e08f3862140ca3e1c1c772aa68baaf65bb08f97ff07" +source = "git+https://github.com/risingwavelabs/iceberg-rust.git?rev=84bf51c9d0d5886e4ee306ca4f383f029e1767a4#84bf51c9d0d5886e4ee306ca4f383f029e1767a4" dependencies = [ "anyhow", "async-trait", @@ -6058,15 +6058,14 @@ dependencies = [ "log", "serde_json", "tokio", - "typed-builder 0.19.1", + "typed-builder 0.20.0", "uuid", ] [[package]] name = "iceberg-catalog-rest" version = "0.3.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f351c7b964fa6f3b4f976f8de3f16f1bf84eea8478606aaebdfd6a871d6b082c" +source = "git+https://github.com/risingwavelabs/iceberg-rust.git?rev=84bf51c9d0d5886e4ee306ca4f383f029e1767a4#84bf51c9d0d5886e4ee306ca4f383f029e1767a4" dependencies = [ "async-trait", "chrono", @@ -6079,14 +6078,14 @@ dependencies = [ "serde_derive", "serde_json", "tokio", - "typed-builder 0.19.1", + "typed-builder 0.20.0", "uuid", ] [[package]] name = "icelake" version = "0.3.141592654" -source = "git+https://github.com/risingwavelabs/icelake.git?rev=1860eb315183a5f3f72b4097c1e40d49407f8373#1860eb315183a5f3f72b4097c1e40d49407f8373" +source = "git+https://github.com/risingwavelabs/icelake.git?rev=3f4724158acee37a4785f56670a1427993a58739#3f4724158acee37a4785f56670a1427993a58739" dependencies = [ "anyhow", "apache-avro 0.17.0 (git+https://github.com/apache/avro.git)", @@ -9127,7 +9126,7 @@ checksum = "8bdf592881d821b83d471f8af290226c8d51402259e9bb5be7f9f8bdebbb11ac" dependencies = [ "bytes", "heck 0.4.1", - "itertools 0.10.5", + "itertools 0.11.0", "log", "multimap 0.8.3", "once_cell", @@ -9182,7 +9181,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "265baba7fabd416cf5078179f7d2cbeca4ce7a9041111900675ea7c4cb8a4c32" dependencies = [ "anyhow", - "itertools 0.10.5", + "itertools 0.11.0", "proc-macro2", "quote", "syn 2.0.66", @@ -9311,11 +9310,11 @@ dependencies = [ [[package]] name = "pulldown-cmark" -version = "0.9.3" +version = "0.9.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "77a1a2f1f0a7ecff9c31abbe177637be0e97a0aef46cf8738ece09327985d998" +checksum = "57206b407293d2bcd3af849ce869d52068623f19e1b5ff8e8778e3309439682b" dependencies = [ - "bitflags 1.3.2", + "bitflags 2.6.0", "memchr", "unicase", ] @@ -9368,7 +9367,7 @@ dependencies = [ "indoc", "libc", "memoffset", - "parking_lot 0.11.2", + "parking_lot 0.12.1", "portable-atomic", "pyo3-build-config", "pyo3-ffi", @@ -10614,6 +10613,7 @@ dependencies = [ "easy-ext", "enum-as-inner 0.6.0", "expect-test", + "fs-err", "futures", "futures-async-stream", "gcp-bigquery-client", @@ -10652,11 +10652,8 @@ dependencies = [ "pretty_assertions", "prometheus", "prost 0.13.1", - "prost-build 0.12.1", "prost-reflect", "prost-types 0.13.1", - "protobuf-native", - "protobuf-src", "pulsar", "quote", "rand", @@ -10718,11 +10715,19 @@ dependencies = [ "chrono", "easy-ext", "expect-test", + "fs-err", "hex", "itertools 0.12.1", "jsonbb", "jsonschema-transpiler", + "madsim-tokio", "num-bigint", + "prost 0.13.1", + "prost-build 0.12.1", + "prost-reflect", + "prost-types 0.13.1", + "protobuf-native", + "protobuf-src", "risingwave_common", "risingwave_pb", "rust_decimal", @@ -14833,6 +14838,15 @@ dependencies = [ "typed-builder-macro 0.19.1", ] +[[package]] +name = "typed-builder" +version = "0.20.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7e14ed59dc8b7b26cacb2a92bad2e8b1f098806063898ab42a3bd121d7d45e75" +dependencies = [ + "typed-builder-macro 0.20.0", +] + [[package]] name = "typed-builder-macro" version = "0.16.2" @@ -14866,6 +14880,17 @@ dependencies = [ "syn 2.0.66", ] +[[package]] +name = "typed-builder-macro" +version = "0.20.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "560b82d656506509d43abe30e0ba64c56b1953ab3d4fe7ba5902747a7a3cedd5" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.66", +] + [[package]] name = "typenum" version = "1.16.0" diff --git a/Cargo.toml b/Cargo.toml index a5da9b82b658c..63feb981d9a1b 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -135,16 +135,18 @@ tonic-build = { package = "madsim-tonic-build", version = "0.5" } otlp-embedded = { git = "https://github.com/risingwavelabs/otlp-embedded", rev = "e6cd165b9bc85783b42c106e99186b86b73e3507" } prost = { version = "0.13" } prost-build = { version = "0.13" } -icelake = { git = "https://github.com/risingwavelabs/icelake.git", rev = "1860eb315183a5f3f72b4097c1e40d49407f8373", features = [ +# branch dylan/fix_parquet_nested_type_field_id +icelake = { git = "https://github.com/risingwavelabs/icelake.git", rev = "3f4724158acee37a4785f56670a1427993a58739", features = [ "prometheus", ] } arrow-array-iceberg = { package = "arrow-array", version = "52" } arrow-schema-iceberg = { package = "arrow-schema", version = "52" } arrow-buffer-iceberg = { package = "arrow-buffer", version = "52" } arrow-cast-iceberg = { package = "arrow-cast", version = "52" } -iceberg = "0.3.0" -iceberg-catalog-rest = "0.3.0" -iceberg-catalog-glue = "0.3.0" +# branch dev +iceberg = { git = "https://github.com/risingwavelabs/iceberg-rust.git", rev = "84bf51c9d0d5886e4ee306ca4f383f029e1767a4" } +iceberg-catalog-rest = { git = "https://github.com/risingwavelabs/iceberg-rust.git", rev = "84bf51c9d0d5886e4ee306ca4f383f029e1767a4" } +iceberg-catalog-glue = { git = "https://github.com/risingwavelabs/iceberg-rust.git", rev = "84bf51c9d0d5886e4ee306ca4f383f029e1767a4" } opendal = "0.47" arrow-array = "50" arrow-arith = "50" diff --git a/ci/build-ci-image.sh b/ci/build-ci-image.sh index 88542b4aa5f12..9d00b47bcd3aa 100755 --- a/ci/build-ci-image.sh +++ b/ci/build-ci-image.sh @@ -10,7 +10,7 @@ cat ../rust-toolchain # shellcheck disable=SC2155 # REMEMBER TO ALSO UPDATE ci/docker-compose.yml -export BUILD_ENV_VERSION=v20240812 +export BUILD_ENV_VERSION=v20240911 export BUILD_TAG="public.ecr.aws/w1p7b4n3/rw-build-env:${BUILD_ENV_VERSION}" diff --git a/ci/docker-compose.yml b/ci/docker-compose.yml index 4b1954ff5ae2c..11d29d7236367 100644 --- a/ci/docker-compose.yml +++ b/ci/docker-compose.yml @@ -71,7 +71,7 @@ services: retries: 5 source-test-env: - image: public.ecr.aws/w1p7b4n3/rw-build-env:v20240812 + image: public.ecr.aws/w1p7b4n3/rw-build-env:v20240911 depends_on: - mysql - sqlserver-server @@ -85,7 +85,7 @@ services: - ..:/risingwave sink-test-env: - image: public.ecr.aws/w1p7b4n3/rw-build-env:v20240812 + image: public.ecr.aws/w1p7b4n3/rw-build-env:v20240911 depends_on: - mysql - db @@ -108,12 +108,12 @@ services: rw-build-env: - image: public.ecr.aws/w1p7b4n3/rw-build-env:v20240812 + image: public.ecr.aws/w1p7b4n3/rw-build-env:v20240911 volumes: - ..:/risingwave ci-flamegraph-env: - image: public.ecr.aws/w1p7b4n3/rw-build-env:v20240812 + image: public.ecr.aws/w1p7b4n3/rw-build-env:v20240911 # NOTE(kwannoel): This is used in order to permit # syscalls for `nperf` (perf_event_open), # so it can do CPU profiling. @@ -124,7 +124,7 @@ services: - ..:/risingwave regress-test-env: - image: public.ecr.aws/w1p7b4n3/rw-build-env:v20240812 + image: public.ecr.aws/w1p7b4n3/rw-build-env:v20240911 depends_on: db: condition: service_healthy diff --git a/ci/rust-toolchain b/ci/rust-toolchain index 6bc57a2a65d8f..158ecbbdb0dfd 100644 --- a/ci/rust-toolchain +++ b/ci/rust-toolchain @@ -4,4 +4,4 @@ # 3. (optional) **follow the instructions in lints/README.md** to update the toolchain and dependencies for lints [toolchain] -channel = "nightly-2024-06-06" +channel = "nightly-2024-07-19" diff --git a/ci/scripts/e2e-cassandra-sink-test.sh b/ci/scripts/e2e-cassandra-sink-test.sh index 0e1c9a98d49e8..678b97aac4b54 100755 --- a/ci/scripts/e2e-cassandra-sink-test.sh +++ b/ci/scripts/e2e-cassandra-sink-test.sh @@ -41,17 +41,24 @@ wget $(get_latest_cassandra_download_url) -O cassandra_latest.tar.gz tar xfvz cassandra_latest.tar.gz export LATEST_CASSANDRA_VERSION=$(get_latest_cassandra_version) export CASSANDRA_DIR="./apache-cassandra-${LATEST_CASSANDRA_VERSION}" -# remove bundled packages, and use installed packages, because Python 3.12 has removed asyncore, but I failed to install libev support for bundled Python driver. -rm ${CASSANDRA_DIR}/lib/six-1.12.0-py2.py3-none-any.zip -rm ${CASSANDRA_DIR}/lib/cassandra-driver-internal-only-3.25.0.zip -apt-get install -y libev4 libev-dev -pip3 install --break-system-packages cassandra-driver + +# Cassandra only support python 3.11 +apt-get install -y software-properties-common +add-apt-repository ppa:deadsnakes/ppa +apt-get update +apt-get install -y python3.11 +apt-get install -y python3.11-venv +python3.11 -m venv cqlsh_env +source cqlsh_env/bin/activate + export CQLSH_HOST=cassandra-server export CQLSH_PORT=9042 echo "--- testing sinks" sqllogictest -p 4566 -d dev './e2e_test/sink/cassandra_sink.slt' +deactivate + echo "--- Kill cluster" cd ../../ risedev ci-kill \ No newline at end of file diff --git a/ci/scripts/e2e-iceberg-sink-v2-test.sh b/ci/scripts/e2e-iceberg-sink-v2-test.sh index 1a46f30682bdd..c039c625aa213 100755 --- a/ci/scripts/e2e-iceberg-sink-v2-test.sh +++ b/ci/scripts/e2e-iceberg-sink-v2-test.sh @@ -46,6 +46,7 @@ poetry run python main.py -t ./test_case/range_partition_append_only.toml poetry run python main.py -t ./test_case/range_partition_upsert.toml poetry run python main.py -t ./test_case/append_only_with_checkpoint_interval.toml poetry run python main.py -t ./test_case/iceberg_select_empty_table.toml +poetry run python main.py -t ./test_case/iceberg_source_eq_delete.toml echo "--- Kill cluster" diff --git a/ci/scripts/e2e-source-test.sh b/ci/scripts/e2e-source-test.sh index 29f2a0ac7b5ce..6bf2f8a491576 100755 --- a/ci/scripts/e2e-source-test.sh +++ b/ci/scripts/e2e-source-test.sh @@ -45,7 +45,6 @@ risedev ci-kill echo "--- Prepare data" cp src/connector/src/test_data/simple-schema.avsc ./avro-simple-schema.avsc cp src/connector/src/test_data/complex-schema.avsc ./avro-complex-schema.avsc -cp src/connector/src/test_data/complex-schema ./proto-complex-schema cp src/connector/src/test_data/complex-schema.json ./json-complex-schema diff --git a/dashboard/package-lock.json b/dashboard/package-lock.json index c06e209600477..496093d8c2fe4 100644 --- a/dashboard/package-lock.json +++ b/dashboard/package-lock.json @@ -54,7 +54,7 @@ "eslint-plugin-n": "^15.2.5", "eslint-plugin-promise": "^6.0.1", "eslint-plugin-react": "^7.31.6", - "express": "^4.19.2", + "express": "^4.20.0", "prettier": "^2.7.1", "prettier-plugin-organize-imports": "^3.1.1", "typescript": "5.4.2" @@ -3792,9 +3792,9 @@ } }, "node_modules/body-parser": { - "version": "1.20.2", - "resolved": "https://registry.npmjs.org/body-parser/-/body-parser-1.20.2.tgz", - "integrity": "sha512-ml9pReCu3M61kGlqoTm2umSXTlRTuGTx0bfYj+uIUKKYycG5NtSbeetV3faSU6R7ajOPw0g/J1PvK4qNy7s5bA==", + "version": "1.20.3", + "resolved": "https://registry.npmjs.org/body-parser/-/body-parser-1.20.3.tgz", + "integrity": "sha512-7rAxByjUMqQ3/bHJy7D6OGXvx/MMc4IqBn/X0fcM1QUcAItpZrBEYhWGem+tzXH90c+G01ypMcYJBO9Y30203g==", "dev": true, "dependencies": { "bytes": "3.1.2", @@ -3805,7 +3805,7 @@ "http-errors": "2.0.0", "iconv-lite": "0.4.24", "on-finished": "2.4.1", - "qs": "6.11.0", + "qs": "6.13.0", "raw-body": "2.5.2", "type-is": "~1.6.18", "unpipe": "1.0.0" @@ -3842,6 +3842,21 @@ "integrity": "sha512-Tpp60P6IUJDTuOq/5Z8cdskzJujfwqfOTkrwIwj7IRISpnkJnT6SyJ4PCPnGMoFjC9ddhal5KVIYtAt97ix05A==", "dev": true }, + "node_modules/body-parser/node_modules/qs": { + "version": "6.13.0", + "resolved": "https://registry.npmjs.org/qs/-/qs-6.13.0.tgz", + "integrity": "sha512-+38qI9SOr8tfZ4QmJNplMUxqjbe7LKvvZgWdExBOmd+egZTtjLB67Gu0HRX3u/XOq7UU2Nx6nsjvS16Z9uwfpg==", + "dev": true, + "dependencies": { + "side-channel": "^1.0.6" + }, + "engines": { + "node": ">=0.6" + }, + "funding": { + "url": "https://github.com/sponsors/ljharb" + } + }, "node_modules/bootstrap-icons": { "version": "1.9.1", "resolved": "https://registry.npmjs.org/bootstrap-icons/-/bootstrap-icons-1.9.1.tgz", @@ -3975,14 +3990,19 @@ } }, "node_modules/call-bind": { - "version": "1.0.5", - "resolved": "https://registry.npmjs.org/call-bind/-/call-bind-1.0.5.tgz", - "integrity": "sha512-C3nQxfFZxFRVoJoGKKI8y3MOEo129NQ+FgQ08iye+Mk4zNZZGdjfs06bVTr+DBSlA66Q2VEcMki/cUCP4SercQ==", + "version": "1.0.7", + "resolved": "https://registry.npmjs.org/call-bind/-/call-bind-1.0.7.tgz", + "integrity": "sha512-GHTSNSYICQ7scH7sZ+M2rFopRoLh8t2bLSW6BbgrtLsahOIB5iyAVJf9GjWK3cYTDaMj4XdBpM1cA6pIS0Kv2w==", "dev": true, "dependencies": { + "es-define-property": "^1.0.0", + "es-errors": "^1.3.0", "function-bind": "^1.1.2", - "get-intrinsic": "^1.2.1", - "set-function-length": "^1.1.1" + "get-intrinsic": "^1.2.4", + "set-function-length": "^1.2.1" + }, + "engines": { + "node": ">= 0.4" }, "funding": { "url": "https://github.com/sponsors/ljharb" @@ -4874,17 +4894,20 @@ } }, "node_modules/define-data-property": { - "version": "1.1.1", - "resolved": "https://registry.npmjs.org/define-data-property/-/define-data-property-1.1.1.tgz", - "integrity": "sha512-E7uGkTzkk1d0ByLeSc6ZsFS79Axg+m1P/VsgYsxHgiuc3tFSj+MjMIwe90FC4lOAZzNBdY7kkO2P2wKdsQ1vgQ==", + "version": "1.1.4", + "resolved": "https://registry.npmjs.org/define-data-property/-/define-data-property-1.1.4.tgz", + "integrity": "sha512-rBMvIzlpA8v6E+SJZoo++HAYqsLrkg7MSfIinMPFhmkorw7X+dOXVJQs+QT69zGkzMyfDnIMN2Wid1+NbL3T+A==", "dev": true, "dependencies": { - "get-intrinsic": "^1.2.1", - "gopd": "^1.0.1", - "has-property-descriptors": "^1.0.0" + "es-define-property": "^1.0.0", + "es-errors": "^1.3.0", + "gopd": "^1.0.1" }, "engines": { "node": ">= 0.4" + }, + "funding": { + "url": "https://github.com/sponsors/ljharb" } }, "node_modules/define-lazy-prop": { @@ -5166,6 +5189,27 @@ "url": "https://github.com/sponsors/ljharb" } }, + "node_modules/es-define-property": { + "version": "1.0.0", + "resolved": "https://registry.npmjs.org/es-define-property/-/es-define-property-1.0.0.tgz", + "integrity": "sha512-jxayLKShrEqqzJ0eumQbVhTYQM27CfT1T35+gCgDFoL82JLsXqTJ76zv6A0YLOgEnLUMvLzsDsGIrl8NFpT2gQ==", + "dev": true, + "dependencies": { + "get-intrinsic": "^1.2.4" + }, + "engines": { + "node": ">= 0.4" + } + }, + "node_modules/es-errors": { + "version": "1.3.0", + "resolved": "https://registry.npmjs.org/es-errors/-/es-errors-1.3.0.tgz", + "integrity": "sha512-Zf5H2Kxt2xjTvbJvP2ZWLEICxA6j+hAmMzIlypy4xcBg1vKVnx89Wy0GbS+kf5cwCVFFzdCFh2XSCFNULS6csw==", + "dev": true, + "engines": { + "node": ">= 0.4" + } + }, "node_modules/es-iterator-helpers": { "version": "1.0.15", "resolved": "https://registry.npmjs.org/es-iterator-helpers/-/es-iterator-helpers-1.0.15.tgz", @@ -6120,37 +6164,37 @@ } }, "node_modules/express": { - "version": "4.19.2", - "resolved": "https://registry.npmjs.org/express/-/express-4.19.2.tgz", - "integrity": "sha512-5T6nhjsT+EOMzuck8JjBHARTHfMht0POzlA60WV2pMD3gyXw2LZnZ+ueGdNxG+0calOJcWKbpFcuzLZ91YWq9Q==", + "version": "4.20.0", + "resolved": "https://registry.npmjs.org/express/-/express-4.20.0.tgz", + "integrity": "sha512-pLdae7I6QqShF5PnNTCVn4hI91Dx0Grkn2+IAsMTgMIKuQVte2dN9PeGSSAME2FR8anOhVA62QDIUaWVfEXVLw==", "dev": true, "dependencies": { "accepts": "~1.3.8", "array-flatten": "1.1.1", - "body-parser": "1.20.2", + "body-parser": "1.20.3", "content-disposition": "0.5.4", "content-type": "~1.0.4", "cookie": "0.6.0", "cookie-signature": "1.0.6", "debug": "2.6.9", "depd": "2.0.0", - "encodeurl": "~1.0.2", + "encodeurl": "~2.0.0", "escape-html": "~1.0.3", "etag": "~1.8.1", "finalhandler": "1.2.0", "fresh": "0.5.2", "http-errors": "2.0.0", - "merge-descriptors": "1.0.1", + "merge-descriptors": "1.0.3", "methods": "~1.1.2", "on-finished": "2.4.1", "parseurl": "~1.3.3", - "path-to-regexp": "0.1.7", + "path-to-regexp": "0.1.10", "proxy-addr": "~2.0.7", "qs": "6.11.0", "range-parser": "~1.2.1", "safe-buffer": "5.2.1", - "send": "0.18.0", - "serve-static": "1.15.0", + "send": "0.19.0", + "serve-static": "1.16.0", "setprototypeof": "1.2.0", "statuses": "2.0.1", "type-is": "~1.6.18", @@ -6170,6 +6214,15 @@ "ms": "2.0.0" } }, + "node_modules/express/node_modules/encodeurl": { + "version": "2.0.0", + "resolved": "https://registry.npmjs.org/encodeurl/-/encodeurl-2.0.0.tgz", + "integrity": "sha512-Q0n9HRi4m6JuGIV1eFlmvJB7ZEVxu93IrMyiMsGC0lrMJMWzRgx6WGquyfQgZVb31vhGgXnfmPNNXmxnOkRBrg==", + "dev": true, + "engines": { + "node": ">= 0.8" + } + }, "node_modules/express/node_modules/ms": { "version": "2.0.0", "resolved": "https://registry.npmjs.org/ms/-/ms-2.0.0.tgz", @@ -6643,16 +6696,20 @@ } }, "node_modules/get-intrinsic": { - "version": "1.2.2", - "resolved": "https://registry.npmjs.org/get-intrinsic/-/get-intrinsic-1.2.2.tgz", - "integrity": "sha512-0gSo4ml/0j98Y3lngkFEot/zhiCeWsbYIlZ+uZOVgzLyLaUw7wxUL+nCTP0XJvJg1AXulJRI3UJi8GsbDuxdGA==", + "version": "1.2.4", + "resolved": "https://registry.npmjs.org/get-intrinsic/-/get-intrinsic-1.2.4.tgz", + "integrity": "sha512-5uYhsJH8VJBTv7oslg4BznJYhDoRI6waYCxMmCdnTrcCrHA/fCFKoTFz2JKKE0HdDFUF7/oQuhzumXJK7paBRQ==", "dev": true, "dependencies": { + "es-errors": "^1.3.0", "function-bind": "^1.1.2", "has-proto": "^1.0.1", "has-symbols": "^1.0.3", "hasown": "^2.0.0" }, + "engines": { + "node": ">= 0.4" + }, "funding": { "url": "https://github.com/sponsors/ljharb" } @@ -6833,12 +6890,12 @@ } }, "node_modules/has-property-descriptors": { - "version": "1.0.1", - "resolved": "https://registry.npmjs.org/has-property-descriptors/-/has-property-descriptors-1.0.1.tgz", - "integrity": "sha512-VsX8eaIewvas0xnvinAe9bw4WfIeODpGYikiWYLH+dma0Jw6KHYqWiWfhQlgOVK8D6PvjubK5Uc4P0iIhIcNVg==", + "version": "1.0.2", + "resolved": "https://registry.npmjs.org/has-property-descriptors/-/has-property-descriptors-1.0.2.tgz", + "integrity": "sha512-55JNKuIW+vq4Ke1BjOTjM2YctQIvCT7GFzHwmfZPGo5wnrgkid0YQtnAleFSqumZm4az3n2BS+erby5ipJdgrg==", "dev": true, "dependencies": { - "get-intrinsic": "^1.2.2" + "es-define-property": "^1.0.0" }, "funding": { "url": "https://github.com/sponsors/ljharb" @@ -8320,10 +8377,13 @@ } }, "node_modules/merge-descriptors": { - "version": "1.0.1", - "resolved": "https://registry.npmjs.org/merge-descriptors/-/merge-descriptors-1.0.1.tgz", - "integrity": "sha512-cCi6g3/Zr1iqQi6ySbseM1Xvooa98N0w31jzUYrXPX2xqObmFGHJ0tQ5u74H3mVh7wLouTseZyYIq39g8cNp1w==", - "dev": true + "version": "1.0.3", + "resolved": "https://registry.npmjs.org/merge-descriptors/-/merge-descriptors-1.0.3.tgz", + "integrity": "sha512-gaNvAS7TZ897/rVaZ0nMtAyxNyi/pdbjbAwUpFQpN70GqnVfOiXpeUUMKRBmzXaSQ8DdTX4/0ms62r2K+hE6mQ==", + "dev": true, + "funding": { + "url": "https://github.com/sponsors/sindresorhus" + } }, "node_modules/merge-stream": { "version": "2.0.0", @@ -8720,10 +8780,13 @@ } }, "node_modules/object-inspect": { - "version": "1.12.3", - "resolved": "https://registry.npmjs.org/object-inspect/-/object-inspect-1.12.3.tgz", - "integrity": "sha512-geUvdk7c+eizMNUDkRpW1wJwgfOiOeHbxBR/hLXK1aT6zmVSO0jsQcs7fj6MGw89jC/cjGfLcNOrtMYtGqm81g==", + "version": "1.13.2", + "resolved": "https://registry.npmjs.org/object-inspect/-/object-inspect-1.13.2.tgz", + "integrity": "sha512-IRZSRuzJiynemAXPYtPe5BoI/RESNYR7TYm50MC5Mqbd3Jmw5y790sErYw3V6SryFJD64b74qQQs9wn5Bg/k3g==", "dev": true, + "engines": { + "node": ">= 0.4" + }, "funding": { "url": "https://github.com/sponsors/ljharb" } @@ -9056,9 +9119,9 @@ } }, "node_modules/path-to-regexp": { - "version": "0.1.7", - "resolved": "https://registry.npmjs.org/path-to-regexp/-/path-to-regexp-0.1.7.tgz", - "integrity": "sha512-5DFkuoqlv1uYQKxy8omFBeJPQcdoE07Kv2sferDCrAq1ohOU+MSDswDIbnx3YAM60qIOnYa53wBhXW0EbMonrQ==", + "version": "0.1.10", + "resolved": "https://registry.npmjs.org/path-to-regexp/-/path-to-regexp-0.1.10.tgz", + "integrity": "sha512-7lf7qcQidTku0Gu3YDPc8DJ1q7OOucfa/BSsIwjuh56VU7katFvuM8hULfkwB3Fns/rsVF7PwPKVw1sl5KQS9w==", "dev": true }, "node_modules/path-type": { @@ -10076,9 +10139,9 @@ } }, "node_modules/send": { - "version": "0.18.0", - "resolved": "https://registry.npmjs.org/send/-/send-0.18.0.tgz", - "integrity": "sha512-qqWzuOjSFOuqPjFe4NOsMLafToQQwBSOEpS+FwEt3A2V3vKubTquT3vmLTQpFgMXp8AlFWFuP1qKaJZOtPpVXg==", + "version": "0.19.0", + "resolved": "https://registry.npmjs.org/send/-/send-0.19.0.tgz", + "integrity": "sha512-dW41u5VfLXu8SJh5bwRmyYUbAoSB3c9uQh6L8h/KtsFREPWpbX1lrljJo186Jc4nmci/sGUZ9a0a0J2zgfq2hw==", "dev": true, "dependencies": { "debug": "2.6.9", @@ -10121,9 +10184,9 @@ "dev": true }, "node_modules/serve-static": { - "version": "1.15.0", - "resolved": "https://registry.npmjs.org/serve-static/-/serve-static-1.15.0.tgz", - "integrity": "sha512-XGuRDNjXUijsUL0vl6nSD7cwURuzEgglbOaFuZM9g3kwDXOWVTck0jLzjPzGD+TazWbboZYu52/9/XPdUgne9g==", + "version": "1.16.0", + "resolved": "https://registry.npmjs.org/serve-static/-/serve-static-1.16.0.tgz", + "integrity": "sha512-pDLK8zwl2eKaYrs8mrPZBJua4hMplRWJ1tIFksVC3FtBEBnl8dxgeHtsaMS8DhS9i4fLObaon6ABoc4/hQGdPA==", "dev": true, "dependencies": { "encodeurl": "~1.0.2", @@ -10135,6 +10198,51 @@ "node": ">= 0.8.0" } }, + "node_modules/serve-static/node_modules/debug": { + "version": "2.6.9", + "resolved": "https://registry.npmjs.org/debug/-/debug-2.6.9.tgz", + "integrity": "sha512-bC7ElrdJaJnPbAP+1EotYvqZsb3ecl5wi6Bfi6BJTUcNowp6cvspg0jXznRTKDjm/E7AdgFBVeAPVMNcKGsHMA==", + "dev": true, + "dependencies": { + "ms": "2.0.0" + } + }, + "node_modules/serve-static/node_modules/debug/node_modules/ms": { + "version": "2.0.0", + "resolved": "https://registry.npmjs.org/ms/-/ms-2.0.0.tgz", + "integrity": "sha512-Tpp60P6IUJDTuOq/5Z8cdskzJujfwqfOTkrwIwj7IRISpnkJnT6SyJ4PCPnGMoFjC9ddhal5KVIYtAt97ix05A==", + "dev": true + }, + "node_modules/serve-static/node_modules/ms": { + "version": "2.1.3", + "resolved": "https://registry.npmjs.org/ms/-/ms-2.1.3.tgz", + "integrity": "sha512-6FlzubTLZG3J2a/NVCAleEhjzq5oxgHyaCU9yYXvcLsvoVaHJq/s5xXI6/XXP6tz7R9xAOtHnSO/tXtF3WRTlA==", + "dev": true + }, + "node_modules/serve-static/node_modules/send": { + "version": "0.18.0", + "resolved": "https://registry.npmjs.org/send/-/send-0.18.0.tgz", + "integrity": "sha512-qqWzuOjSFOuqPjFe4NOsMLafToQQwBSOEpS+FwEt3A2V3vKubTquT3vmLTQpFgMXp8AlFWFuP1qKaJZOtPpVXg==", + "dev": true, + "dependencies": { + "debug": "2.6.9", + "depd": "2.0.0", + "destroy": "1.2.0", + "encodeurl": "~1.0.2", + "escape-html": "~1.0.3", + "etag": "~1.8.1", + "fresh": "0.5.2", + "http-errors": "2.0.0", + "mime": "1.6.0", + "ms": "2.1.3", + "on-finished": "2.4.1", + "range-parser": "~1.2.1", + "statuses": "2.0.1" + }, + "engines": { + "node": ">= 0.8.0" + } + }, "node_modules/set-blocking": { "version": "2.0.0", "resolved": "https://registry.npmjs.org/set-blocking/-/set-blocking-2.0.0.tgz", @@ -10142,16 +10250,17 @@ "optional": true }, "node_modules/set-function-length": { - "version": "1.2.0", - "resolved": "https://registry.npmjs.org/set-function-length/-/set-function-length-1.2.0.tgz", - "integrity": "sha512-4DBHDoyHlM1IRPGYcoxexgh67y4ueR53FKV1yyxwFMY7aCqcN/38M1+SwZ/qJQ8iLv7+ck385ot4CcisOAPT9w==", + "version": "1.2.2", + "resolved": "https://registry.npmjs.org/set-function-length/-/set-function-length-1.2.2.tgz", + "integrity": "sha512-pgRc4hJ4/sNjWCSS9AmnS40x3bNMDTknHgL5UaMBTMyJnU90EgWh1Rz+MC9eFu4BuN/UwZjKQuY/1v3rM7HMfg==", "dev": true, "dependencies": { - "define-data-property": "^1.1.1", + "define-data-property": "^1.1.4", + "es-errors": "^1.3.0", "function-bind": "^1.1.2", - "get-intrinsic": "^1.2.2", + "get-intrinsic": "^1.2.4", "gopd": "^1.0.1", - "has-property-descriptors": "^1.0.1" + "has-property-descriptors": "^1.0.2" }, "engines": { "node": ">= 0.4" @@ -10209,14 +10318,18 @@ } }, "node_modules/side-channel": { - "version": "1.0.4", - "resolved": "https://registry.npmjs.org/side-channel/-/side-channel-1.0.4.tgz", - "integrity": "sha512-q5XPytqFEIKHkGdiMIrY10mvLRvnQh42/+GoBlFW3b2LXLE2xxJpZFdm94we0BaoV3RwJyGqg5wS7epxTv0Zvw==", + "version": "1.0.6", + "resolved": "https://registry.npmjs.org/side-channel/-/side-channel-1.0.6.tgz", + "integrity": "sha512-fDW/EZ6Q9RiO8eFG8Hj+7u/oW+XrPTIChwCOM2+th2A6OblDtYYIpve9m+KvI9Z4C9qSEXlaGR6bTEYHReuglA==", "dev": true, "dependencies": { - "call-bind": "^1.0.0", - "get-intrinsic": "^1.0.2", - "object-inspect": "^1.9.0" + "call-bind": "^1.0.7", + "es-errors": "^1.3.0", + "get-intrinsic": "^1.2.4", + "object-inspect": "^1.13.1" + }, + "engines": { + "node": ">= 0.4" }, "funding": { "url": "https://github.com/sponsors/ljharb" @@ -14446,9 +14559,9 @@ "dev": true }, "body-parser": { - "version": "1.20.2", - "resolved": "https://registry.npmjs.org/body-parser/-/body-parser-1.20.2.tgz", - "integrity": "sha512-ml9pReCu3M61kGlqoTm2umSXTlRTuGTx0bfYj+uIUKKYycG5NtSbeetV3faSU6R7ajOPw0g/J1PvK4qNy7s5bA==", + "version": "1.20.3", + "resolved": "https://registry.npmjs.org/body-parser/-/body-parser-1.20.3.tgz", + "integrity": "sha512-7rAxByjUMqQ3/bHJy7D6OGXvx/MMc4IqBn/X0fcM1QUcAItpZrBEYhWGem+tzXH90c+G01ypMcYJBO9Y30203g==", "dev": true, "requires": { "bytes": "3.1.2", @@ -14459,7 +14572,7 @@ "http-errors": "2.0.0", "iconv-lite": "0.4.24", "on-finished": "2.4.1", - "qs": "6.11.0", + "qs": "6.13.0", "raw-body": "2.5.2", "type-is": "~1.6.18", "unpipe": "1.0.0" @@ -14488,6 +14601,15 @@ "resolved": "https://registry.npmjs.org/ms/-/ms-2.0.0.tgz", "integrity": "sha512-Tpp60P6IUJDTuOq/5Z8cdskzJujfwqfOTkrwIwj7IRISpnkJnT6SyJ4PCPnGMoFjC9ddhal5KVIYtAt97ix05A==", "dev": true + }, + "qs": { + "version": "6.13.0", + "resolved": "https://registry.npmjs.org/qs/-/qs-6.13.0.tgz", + "integrity": "sha512-+38qI9SOr8tfZ4QmJNplMUxqjbe7LKvvZgWdExBOmd+egZTtjLB67Gu0HRX3u/XOq7UU2Nx6nsjvS16Z9uwfpg==", + "dev": true, + "requires": { + "side-channel": "^1.0.6" + } } } }, @@ -14586,14 +14708,16 @@ "dev": true }, "call-bind": { - "version": "1.0.5", - "resolved": "https://registry.npmjs.org/call-bind/-/call-bind-1.0.5.tgz", - "integrity": "sha512-C3nQxfFZxFRVoJoGKKI8y3MOEo129NQ+FgQ08iye+Mk4zNZZGdjfs06bVTr+DBSlA66Q2VEcMki/cUCP4SercQ==", + "version": "1.0.7", + "resolved": "https://registry.npmjs.org/call-bind/-/call-bind-1.0.7.tgz", + "integrity": "sha512-GHTSNSYICQ7scH7sZ+M2rFopRoLh8t2bLSW6BbgrtLsahOIB5iyAVJf9GjWK3cYTDaMj4XdBpM1cA6pIS0Kv2w==", "dev": true, "requires": { + "es-define-property": "^1.0.0", + "es-errors": "^1.3.0", "function-bind": "^1.1.2", - "get-intrinsic": "^1.2.1", - "set-function-length": "^1.1.1" + "get-intrinsic": "^1.2.4", + "set-function-length": "^1.2.1" } }, "callsites": { @@ -15255,14 +15379,14 @@ } }, "define-data-property": { - "version": "1.1.1", - "resolved": "https://registry.npmjs.org/define-data-property/-/define-data-property-1.1.1.tgz", - "integrity": "sha512-E7uGkTzkk1d0ByLeSc6ZsFS79Axg+m1P/VsgYsxHgiuc3tFSj+MjMIwe90FC4lOAZzNBdY7kkO2P2wKdsQ1vgQ==", + "version": "1.1.4", + "resolved": "https://registry.npmjs.org/define-data-property/-/define-data-property-1.1.4.tgz", + "integrity": "sha512-rBMvIzlpA8v6E+SJZoo++HAYqsLrkg7MSfIinMPFhmkorw7X+dOXVJQs+QT69zGkzMyfDnIMN2Wid1+NbL3T+A==", "dev": true, "requires": { - "get-intrinsic": "^1.2.1", - "gopd": "^1.0.1", - "has-property-descriptors": "^1.0.0" + "es-define-property": "^1.0.0", + "es-errors": "^1.3.0", + "gopd": "^1.0.1" } }, "define-lazy-prop": { @@ -15488,6 +15612,21 @@ "which-typed-array": "^1.1.10" } }, + "es-define-property": { + "version": "1.0.0", + "resolved": "https://registry.npmjs.org/es-define-property/-/es-define-property-1.0.0.tgz", + "integrity": "sha512-jxayLKShrEqqzJ0eumQbVhTYQM27CfT1T35+gCgDFoL82JLsXqTJ76zv6A0YLOgEnLUMvLzsDsGIrl8NFpT2gQ==", + "dev": true, + "requires": { + "get-intrinsic": "^1.2.4" + } + }, + "es-errors": { + "version": "1.3.0", + "resolved": "https://registry.npmjs.org/es-errors/-/es-errors-1.3.0.tgz", + "integrity": "sha512-Zf5H2Kxt2xjTvbJvP2ZWLEICxA6j+hAmMzIlypy4xcBg1vKVnx89Wy0GbS+kf5cwCVFFzdCFh2XSCFNULS6csw==", + "dev": true + }, "es-iterator-helpers": { "version": "1.0.15", "resolved": "https://registry.npmjs.org/es-iterator-helpers/-/es-iterator-helpers-1.0.15.tgz", @@ -16174,37 +16313,37 @@ } }, "express": { - "version": "4.19.2", - "resolved": "https://registry.npmjs.org/express/-/express-4.19.2.tgz", - "integrity": "sha512-5T6nhjsT+EOMzuck8JjBHARTHfMht0POzlA60WV2pMD3gyXw2LZnZ+ueGdNxG+0calOJcWKbpFcuzLZ91YWq9Q==", + "version": "4.20.0", + "resolved": "https://registry.npmjs.org/express/-/express-4.20.0.tgz", + "integrity": "sha512-pLdae7I6QqShF5PnNTCVn4hI91Dx0Grkn2+IAsMTgMIKuQVte2dN9PeGSSAME2FR8anOhVA62QDIUaWVfEXVLw==", "dev": true, "requires": { "accepts": "~1.3.8", "array-flatten": "1.1.1", - "body-parser": "1.20.2", + "body-parser": "1.20.3", "content-disposition": "0.5.4", "content-type": "~1.0.4", "cookie": "0.6.0", "cookie-signature": "1.0.6", "debug": "2.6.9", "depd": "2.0.0", - "encodeurl": "~1.0.2", + "encodeurl": "~2.0.0", "escape-html": "~1.0.3", "etag": "~1.8.1", "finalhandler": "1.2.0", "fresh": "0.5.2", "http-errors": "2.0.0", - "merge-descriptors": "1.0.1", + "merge-descriptors": "1.0.3", "methods": "~1.1.2", "on-finished": "2.4.1", "parseurl": "~1.3.3", - "path-to-regexp": "0.1.7", + "path-to-regexp": "0.1.10", "proxy-addr": "~2.0.7", "qs": "6.11.0", "range-parser": "~1.2.1", "safe-buffer": "5.2.1", - "send": "0.18.0", - "serve-static": "1.15.0", + "send": "0.19.0", + "serve-static": "1.16.0", "setprototypeof": "1.2.0", "statuses": "2.0.1", "type-is": "~1.6.18", @@ -16221,6 +16360,12 @@ "ms": "2.0.0" } }, + "encodeurl": { + "version": "2.0.0", + "resolved": "https://registry.npmjs.org/encodeurl/-/encodeurl-2.0.0.tgz", + "integrity": "sha512-Q0n9HRi4m6JuGIV1eFlmvJB7ZEVxu93IrMyiMsGC0lrMJMWzRgx6WGquyfQgZVb31vhGgXnfmPNNXmxnOkRBrg==", + "dev": true + }, "ms": { "version": "2.0.0", "resolved": "https://registry.npmjs.org/ms/-/ms-2.0.0.tgz", @@ -16602,11 +16747,12 @@ "peer": true }, "get-intrinsic": { - "version": "1.2.2", - "resolved": "https://registry.npmjs.org/get-intrinsic/-/get-intrinsic-1.2.2.tgz", - "integrity": "sha512-0gSo4ml/0j98Y3lngkFEot/zhiCeWsbYIlZ+uZOVgzLyLaUw7wxUL+nCTP0XJvJg1AXulJRI3UJi8GsbDuxdGA==", + "version": "1.2.4", + "resolved": "https://registry.npmjs.org/get-intrinsic/-/get-intrinsic-1.2.4.tgz", + "integrity": "sha512-5uYhsJH8VJBTv7oslg4BznJYhDoRI6waYCxMmCdnTrcCrHA/fCFKoTFz2JKKE0HdDFUF7/oQuhzumXJK7paBRQ==", "dev": true, "requires": { + "es-errors": "^1.3.0", "function-bind": "^1.1.2", "has-proto": "^1.0.1", "has-symbols": "^1.0.3", @@ -16735,12 +16881,12 @@ "integrity": "sha512-sKJf1+ceQBr4SMkvQnBDNDtf4TXpVhVGateu0t918bl30FnbE2m4vNLX+VWe/dpjlb+HugGYzW7uQXH98HPEYw==" }, "has-property-descriptors": { - "version": "1.0.1", - "resolved": "https://registry.npmjs.org/has-property-descriptors/-/has-property-descriptors-1.0.1.tgz", - "integrity": "sha512-VsX8eaIewvas0xnvinAe9bw4WfIeODpGYikiWYLH+dma0Jw6KHYqWiWfhQlgOVK8D6PvjubK5Uc4P0iIhIcNVg==", + "version": "1.0.2", + "resolved": "https://registry.npmjs.org/has-property-descriptors/-/has-property-descriptors-1.0.2.tgz", + "integrity": "sha512-55JNKuIW+vq4Ke1BjOTjM2YctQIvCT7GFzHwmfZPGo5wnrgkid0YQtnAleFSqumZm4az3n2BS+erby5ipJdgrg==", "dev": true, "requires": { - "get-intrinsic": "^1.2.2" + "es-define-property": "^1.0.0" } }, "has-proto": { @@ -17803,9 +17949,9 @@ "dev": true }, "merge-descriptors": { - "version": "1.0.1", - "resolved": "https://registry.npmjs.org/merge-descriptors/-/merge-descriptors-1.0.1.tgz", - "integrity": "sha512-cCi6g3/Zr1iqQi6ySbseM1Xvooa98N0w31jzUYrXPX2xqObmFGHJ0tQ5u74H3mVh7wLouTseZyYIq39g8cNp1w==", + "version": "1.0.3", + "resolved": "https://registry.npmjs.org/merge-descriptors/-/merge-descriptors-1.0.3.tgz", + "integrity": "sha512-gaNvAS7TZ897/rVaZ0nMtAyxNyi/pdbjbAwUpFQpN70GqnVfOiXpeUUMKRBmzXaSQ8DdTX4/0ms62r2K+hE6mQ==", "dev": true }, "merge-stream": { @@ -18081,9 +18227,9 @@ "integrity": "sha512-rJgTQnkUnH1sFw8yT6VSU3zD3sWmu6sZhIseY8VX+GRu3P6F7Fu+JNDoXfklElbLJSnc3FUQHVe4cU5hj+BcUg==" }, "object-inspect": { - "version": "1.12.3", - "resolved": "https://registry.npmjs.org/object-inspect/-/object-inspect-1.12.3.tgz", - "integrity": "sha512-geUvdk7c+eizMNUDkRpW1wJwgfOiOeHbxBR/hLXK1aT6zmVSO0jsQcs7fj6MGw89jC/cjGfLcNOrtMYtGqm81g==", + "version": "1.13.2", + "resolved": "https://registry.npmjs.org/object-inspect/-/object-inspect-1.13.2.tgz", + "integrity": "sha512-IRZSRuzJiynemAXPYtPe5BoI/RESNYR7TYm50MC5Mqbd3Jmw5y790sErYw3V6SryFJD64b74qQQs9wn5Bg/k3g==", "dev": true }, "object-keys": { @@ -18322,9 +18468,9 @@ } }, "path-to-regexp": { - "version": "0.1.7", - "resolved": "https://registry.npmjs.org/path-to-regexp/-/path-to-regexp-0.1.7.tgz", - "integrity": "sha512-5DFkuoqlv1uYQKxy8omFBeJPQcdoE07Kv2sferDCrAq1ohOU+MSDswDIbnx3YAM60qIOnYa53wBhXW0EbMonrQ==", + "version": "0.1.10", + "resolved": "https://registry.npmjs.org/path-to-regexp/-/path-to-regexp-0.1.10.tgz", + "integrity": "sha512-7lf7qcQidTku0Gu3YDPc8DJ1q7OOucfa/BSsIwjuh56VU7katFvuM8hULfkwB3Fns/rsVF7PwPKVw1sl5KQS9w==", "dev": true }, "path-type": { @@ -19040,9 +19186,9 @@ "integrity": "sha512-BR7VvDCVHO+q2xBEWskxS6DJE1qRnb7DxzUrogb71CWoSficBxYsiAGd+Kl0mmq/MprG9yArRkyrQxTO6XjMzA==" }, "send": { - "version": "0.18.0", - "resolved": "https://registry.npmjs.org/send/-/send-0.18.0.tgz", - "integrity": "sha512-qqWzuOjSFOuqPjFe4NOsMLafToQQwBSOEpS+FwEt3A2V3vKubTquT3vmLTQpFgMXp8AlFWFuP1qKaJZOtPpVXg==", + "version": "0.19.0", + "resolved": "https://registry.npmjs.org/send/-/send-0.19.0.tgz", + "integrity": "sha512-dW41u5VfLXu8SJh5bwRmyYUbAoSB3c9uQh6L8h/KtsFREPWpbX1lrljJo186Jc4nmci/sGUZ9a0a0J2zgfq2hw==", "dev": true, "requires": { "debug": "2.6.9", @@ -19086,15 +19232,61 @@ } }, "serve-static": { - "version": "1.15.0", - "resolved": "https://registry.npmjs.org/serve-static/-/serve-static-1.15.0.tgz", - "integrity": "sha512-XGuRDNjXUijsUL0vl6nSD7cwURuzEgglbOaFuZM9g3kwDXOWVTck0jLzjPzGD+TazWbboZYu52/9/XPdUgne9g==", + "version": "1.16.0", + "resolved": "https://registry.npmjs.org/serve-static/-/serve-static-1.16.0.tgz", + "integrity": "sha512-pDLK8zwl2eKaYrs8mrPZBJua4hMplRWJ1tIFksVC3FtBEBnl8dxgeHtsaMS8DhS9i4fLObaon6ABoc4/hQGdPA==", "dev": true, "requires": { "encodeurl": "~1.0.2", "escape-html": "~1.0.3", "parseurl": "~1.3.3", "send": "0.18.0" + }, + "dependencies": { + "debug": { + "version": "2.6.9", + "resolved": "https://registry.npmjs.org/debug/-/debug-2.6.9.tgz", + "integrity": "sha512-bC7ElrdJaJnPbAP+1EotYvqZsb3ecl5wi6Bfi6BJTUcNowp6cvspg0jXznRTKDjm/E7AdgFBVeAPVMNcKGsHMA==", + "dev": true, + "requires": { + "ms": "2.0.0" + }, + "dependencies": { + "ms": { + "version": "2.0.0", + "resolved": "https://registry.npmjs.org/ms/-/ms-2.0.0.tgz", + "integrity": "sha512-Tpp60P6IUJDTuOq/5Z8cdskzJujfwqfOTkrwIwj7IRISpnkJnT6SyJ4PCPnGMoFjC9ddhal5KVIYtAt97ix05A==", + "dev": true + } + } + }, + "ms": { + "version": "2.1.3", + "resolved": "https://registry.npmjs.org/ms/-/ms-2.1.3.tgz", + "integrity": "sha512-6FlzubTLZG3J2a/NVCAleEhjzq5oxgHyaCU9yYXvcLsvoVaHJq/s5xXI6/XXP6tz7R9xAOtHnSO/tXtF3WRTlA==", + "dev": true + }, + "send": { + "version": "0.18.0", + "resolved": "https://registry.npmjs.org/send/-/send-0.18.0.tgz", + "integrity": "sha512-qqWzuOjSFOuqPjFe4NOsMLafToQQwBSOEpS+FwEt3A2V3vKubTquT3vmLTQpFgMXp8AlFWFuP1qKaJZOtPpVXg==", + "dev": true, + "requires": { + "debug": "2.6.9", + "depd": "2.0.0", + "destroy": "1.2.0", + "encodeurl": "~1.0.2", + "escape-html": "~1.0.3", + "etag": "~1.8.1", + "fresh": "0.5.2", + "http-errors": "2.0.0", + "mime": "1.6.0", + "ms": "2.1.3", + "on-finished": "2.4.1", + "range-parser": "~1.2.1", + "statuses": "2.0.1" + } + } } }, "set-blocking": { @@ -19104,16 +19296,17 @@ "optional": true }, "set-function-length": { - "version": "1.2.0", - "resolved": "https://registry.npmjs.org/set-function-length/-/set-function-length-1.2.0.tgz", - "integrity": "sha512-4DBHDoyHlM1IRPGYcoxexgh67y4ueR53FKV1yyxwFMY7aCqcN/38M1+SwZ/qJQ8iLv7+ck385ot4CcisOAPT9w==", + "version": "1.2.2", + "resolved": "https://registry.npmjs.org/set-function-length/-/set-function-length-1.2.2.tgz", + "integrity": "sha512-pgRc4hJ4/sNjWCSS9AmnS40x3bNMDTknHgL5UaMBTMyJnU90EgWh1Rz+MC9eFu4BuN/UwZjKQuY/1v3rM7HMfg==", "dev": true, "requires": { - "define-data-property": "^1.1.1", + "define-data-property": "^1.1.4", + "es-errors": "^1.3.0", "function-bind": "^1.1.2", - "get-intrinsic": "^1.2.2", + "get-intrinsic": "^1.2.4", "gopd": "^1.0.1", - "has-property-descriptors": "^1.0.1" + "has-property-descriptors": "^1.0.2" } }, "set-function-name": { @@ -19159,14 +19352,15 @@ "dev": true }, "side-channel": { - "version": "1.0.4", - "resolved": "https://registry.npmjs.org/side-channel/-/side-channel-1.0.4.tgz", - "integrity": "sha512-q5XPytqFEIKHkGdiMIrY10mvLRvnQh42/+GoBlFW3b2LXLE2xxJpZFdm94we0BaoV3RwJyGqg5wS7epxTv0Zvw==", + "version": "1.0.6", + "resolved": "https://registry.npmjs.org/side-channel/-/side-channel-1.0.6.tgz", + "integrity": "sha512-fDW/EZ6Q9RiO8eFG8Hj+7u/oW+XrPTIChwCOM2+th2A6OblDtYYIpve9m+KvI9Z4C9qSEXlaGR6bTEYHReuglA==", "dev": true, "requires": { - "call-bind": "^1.0.0", - "get-intrinsic": "^1.0.2", - "object-inspect": "^1.9.0" + "call-bind": "^1.0.7", + "es-errors": "^1.3.0", + "get-intrinsic": "^1.2.4", + "object-inspect": "^1.13.1" } }, "signal-exit": { diff --git a/dashboard/package.json b/dashboard/package.json index a3716f7802ccf..71621e4159f5d 100644 --- a/dashboard/package.json +++ b/dashboard/package.json @@ -61,7 +61,7 @@ "eslint-plugin-n": "^15.2.5", "eslint-plugin-promise": "^6.0.1", "eslint-plugin-react": "^7.31.6", - "express": "^4.19.2", + "express": "^4.20.0", "prettier": "^2.7.1", "prettier-plugin-organize-imports": "^3.1.1", "typescript": "5.4.2" diff --git a/docker/docker-compose.yml b/docker/docker-compose.yml index 95a78cd83f4a8..781e3e9a476f0 100644 --- a/docker/docker-compose.yml +++ b/docker/docker-compose.yml @@ -60,7 +60,7 @@ services: ENABLE_TELEMETRY: ${ENABLE_TELEMETRY:-true} RW_TELEMETRY_TYPE: ${RW_TELEMETRY_TYPE:-"docker-compose"} RW_SECRET_STORE_PRIVATE_KEY_HEX: ${RW_SECRET_STORE_PRIVATE_KEY_HEX:-0123456789abcdef} - RW_LICENSE_KEY: ${RW_LICENSE_KEY:-""} + RW_LICENSE_KEY: ${RW_LICENSE_KEY:-} container_name: risingwave-standalone healthcheck: test: @@ -113,7 +113,7 @@ services: - "./grafana-risedev-datasource.yml:/etc/grafana/provisioning/datasources/grafana-risedev-datasource.yml" - "./grafana-risedev-dashboard.yml:/etc/grafana/provisioning/dashboards/grafana-risedev-dashboard.yml" - "./dashboards:/dashboards" - environment: { } + environment: {} container_name: grafana-0 healthcheck: test: @@ -187,7 +187,7 @@ services: volumes: - "prometheus-0:/prometheus" - "./prometheus.yaml:/etc/prometheus/prometheus.yml" - environment: { } + environment: {} container_name: prometheus-0 healthcheck: test: @@ -229,7 +229,7 @@ services: depends_on: [ ] volumes: - "message_queue:/var/lib/redpanda/data" - environment: { } + environment: {} container_name: message_queue healthcheck: test: curl -f localhost:9644/v1/status/ready diff --git a/e2e_test/iceberg/main.py b/e2e_test/iceberg/main.py index 01017f3db783d..4279b899c5c1d 100644 --- a/e2e_test/iceberg/main.py +++ b/e2e_test/iceberg/main.py @@ -55,16 +55,23 @@ def execute_slt(args, slt): def verify_result(args, verify_sql, verify_schema, verify_data): tc = unittest.TestCase() - print(f"Executing sql: {verify_sql}") + + time.sleep(3) + print(f"verify_result:\nExecuting sql: {verify_sql}") spark = get_spark(args) df = spark.sql(verify_sql).collect() + print(f"Result:") + print(f"================") for row in df: print(row) + print(f"================") rows = verify_data.splitlines() - tc.assertEqual(len(df), len(rows)) + tc.assertEqual(len(df), len(rows), "row length mismatch") + tc.assertEqual(len(verify_schema), len(df[0]), "column length mismatch") for row1, row2 in zip(df, rows): print(f"Row1: {row1}, Row 2: {row2}") - row2 = row2.split(",") + # New parsing logic for row2 + row2 = parse_row(row2) for idx, ty in enumerate(verify_schema): if ty == "int" or ty == "long": tc.assertEqual(row1[idx], int(row2[idx])) @@ -89,7 +96,7 @@ def verify_result(args, verify_sql, verify_schema, verify_data): else: tc.assertEqual(row1[idx], decimal.Decimal(row2[idx])) else: - tc.fail(f"Unsupported type {ty}") + tc.assertEqual(str(row1[idx]), str(row2[idx])) def compare_sql(args, cmp_sqls): assert len(cmp_sqls) == 2 @@ -113,6 +120,32 @@ def drop_table(args, drop_sqls): spark.sql(sql) +def parse_row(row): + result = [] + current = "" + parenthesis_count = {"{": 0, "[": 0, "(": 0} + for char in row: + if char in parenthesis_count: + parenthesis_count[char] += 1 + elif char == "}": + parenthesis_count["{"] -= 1 + elif char == "]": + parenthesis_count["["] -= 1 + elif char == ")": + parenthesis_count["("] -= 1 + + if char == "," and all(value == 0 for value in parenthesis_count.values()): + result.append(current.strip()) + current = "" + else: + current += char + + if current: + result.append(current.strip()) + + return result + + if __name__ == "__main__": parser = argparse.ArgumentParser(description="Test script for iceberg") parser.add_argument("-t", dest="test_case", type=str, help="Test case file") @@ -151,4 +184,3 @@ def drop_table(args, drop_sqls): execute_slt(config, verify_slt) if drop_sqls is not None and drop_sqls != "": drop_table(config, drop_sqls) - diff --git a/e2e_test/iceberg/start_spark_connect_server.sh b/e2e_test/iceberg/start_spark_connect_server.sh index 345653778b14c..f0f3f19a1fab7 100755 --- a/e2e_test/iceberg/start_spark_connect_server.sh +++ b/e2e_test/iceberg/start_spark_connect_server.sh @@ -1,3 +1,5 @@ +#!/usr/bin/env bash + set -ex ICEBERG_VERSION=1.4.3 diff --git a/e2e_test/iceberg/test_case/append_only_with_checkpoint_interval.slt b/e2e_test/iceberg/test_case/append_only_with_checkpoint_interval.slt index 0dc937303a852..b0e433c819f83 100644 --- a/e2e_test/iceberg/test_case/append_only_with_checkpoint_interval.slt +++ b/e2e_test/iceberg/test_case/append_only_with_checkpoint_interval.slt @@ -1,6 +1,3 @@ -statement ok -set sink_decouple = false; - statement ok set streaming_parallelism=4; @@ -37,7 +34,6 @@ CREATE SINK sink1 AS select * from mv1 WITH ( s3.region = 'us-east-1', s3.access.key = 'hummockadmin', s3.secret.key = 'hummockadmin', - commit_checkpoint_interval = 1 ); statement ok @@ -54,7 +50,6 @@ CREATE SINK sink2 AS select * from mv1 WITH ( s3.region = 'us-east-1', s3.access.key = 'hummockadmin', s3.secret.key = 'hummockadmin', - commit_checkpoint_interval = 1 ); sleep 20s diff --git a/e2e_test/iceberg/test_case/cdc/load.slt b/e2e_test/iceberg/test_case/cdc/load.slt index df0c319990374..6e6850725f98a 100644 --- a/e2e_test/iceberg/test_case/cdc/load.slt +++ b/e2e_test/iceberg/test_case/cdc/load.slt @@ -1,4 +1,6 @@ # CDC source basic test +statement ok +set sink_decouple = false; statement ok create source mysql_mydb with ( diff --git a/e2e_test/iceberg/test_case/iceberg_sink_no_partition_append_only_table.slt b/e2e_test/iceberg/test_case/iceberg_sink_no_partition_append_only_table.slt index a83173fc48ab6..49c4cf3fb1145 100644 --- a/e2e_test/iceberg/test_case/iceberg_sink_no_partition_append_only_table.slt +++ b/e2e_test/iceberg/test_case/iceberg_sink_no_partition_append_only_table.slt @@ -16,7 +16,10 @@ v_bool boolean, v_date date, v_timestamp timestamptz, v_ts_ntz timestamp, -v_decimal decimal +v_decimal decimal, +v_map map(int, int), +v_array int[], +v_struct struct ); statement ok @@ -36,15 +39,15 @@ CREATE SINK s6 AS select * from mv6 WITH ( s3.region = 'us-east-1', s3.access.key = 'hummockadmin', s3.secret.key = 'hummockadmin', - commit_checkpoint_interval = 1 + create_table_if_not_exists = 'true' ); statement ok INSERT INTO t6 VALUES -(1, 1, 1000, 1.1, 1.11, '1-1', true, '2022-03-11', '2022-03-11 01:00:00Z'::timestamptz, '2022-03-11 01:00:00',1.11), -(2, 2, 2000, 2.2, 2.22, '2-2', false, '2022-03-12', '2022-03-12 02:00:00Z'::timestamptz, '2022-03-12 02:00:00',2.22), -(3, 3, 3000, 3.3, 3.33, '3-3', true, '2022-03-13', '2022-03-13 03:00:00Z'::timestamptz, '2022-03-13 03:00:00','inf'), -(4, 4, 4000, 4.4, 4.44, '4-4', false, '2022-03-14', '2022-03-14 04:00:00Z'::timestamptz, '2022-03-14 04:00:00','-inf'); +(1, 1, 1000, 1.1, 1.11, '1-1', true, '2022-03-11', '2022-03-11 01:00:00Z'::timestamptz, '2022-03-11 01:00:00',1.11, map {1:100,2:200}, array[1,2,3], row(1,2)), +(2, 2, 2000, 2.2, 2.22, '2-2', false, '2022-03-12', '2022-03-12 02:00:00Z'::timestamptz, '2022-03-12 02:00:00',2.22, map {3:300}, array[1,null,3], row(3,null)), +(3, 3, 3000, 3.3, 3.33, '3-3', true, '2022-03-13', '2022-03-13 03:00:00Z'::timestamptz, '2022-03-13 03:00:00','inf', null, null, null), +(4, 4, 4000, 4.4, 4.44, '4-4', false, '2022-03-14', '2022-03-14 04:00:00Z'::timestamptz, '2022-03-14 04:00:00','-inf', null, null, null); statement ok FLUSH; @@ -53,13 +56,37 @@ sleep 5s statement ok INSERT INTO t6 VALUES -(5, 5, 5000, 5.5, 5.55, '5-5', true, '2022-03-15', '2022-03-15 05:00:00Z'::timestamptz, '2022-03-15 05:00:00','nan'); +(5, 5, 5000, 5.5, 5.55, '5-5', true, '2022-03-15', '2022-03-15 05:00:00Z'::timestamptz, '2022-03-15 05:00:00','nan', null, null, null); statement ok FLUSH; sleep 5s +statement ok +CREATE Source iceberg_s WITH ( + connector = 'iceberg', + database.name = 'demo_db', + table.name = 'no_partition_append_only_table', + catalog.name = 'demo', + catalog.type = 'storage', + warehouse.path = 's3a://icebergdata/demo', + s3.endpoint = 'http://127.0.0.1:9301', + s3.region = 'us-east-1', + s3.access.key = 'hummockadmin', + s3.secret.key = 'hummockadmin' +); + +query ?????????????? rowsort +select * from iceberg_s +---- +1 1 1000 1.1 1.11 1-1 t 2022-03-11 2022-03-11 01:00:00+00:00 2022-03-11 01:00:00 1.11000 {1:100,2:200} {1,2,3} (1,2) +2 2 2000 2.2 2.22 2-2 f 2022-03-12 2022-03-12 02:00:00+00:00 2022-03-12 02:00:00 2.22000 {3:300} {1,NULL,3} (3,) +3 3 3000 3.3 3.33 3-3 t 2022-03-13 2022-03-13 03:00:00+00:00 2022-03-13 03:00:00 99999.99999 NULL NULL NULL +4 4 4000 4.4 4.44 4-4 f 2022-03-14 2022-03-14 04:00:00+00:00 2022-03-14 04:00:00 -99999.99999 NULL NULL NULL +5 5 5000 5.5 5.55 5-5 t 2022-03-15 2022-03-15 05:00:00+00:00 2022-03-15 05:00:00 NULL NULL NULL NULL + + statement ok DROP SINK s6; @@ -68,3 +95,6 @@ DROP MATERIALIZED VIEW mv6; statement ok DROP TABLE t6; + +statement ok +DROP SOURCE iceberg_s; diff --git a/e2e_test/iceberg/test_case/iceberg_sink_no_partition_upsert_table.slt b/e2e_test/iceberg/test_case/iceberg_sink_no_partition_upsert_table.slt index de96205a2debf..73d953bc2937a 100644 --- a/e2e_test/iceberg/test_case/iceberg_sink_no_partition_upsert_table.slt +++ b/e2e_test/iceberg/test_case/iceberg_sink_no_partition_upsert_table.slt @@ -25,7 +25,6 @@ CREATE SINK s6 AS select mv6.id as id, mv6.v1 as v1, mv6.v2 as v2, mv6.v3 as v3, s3.access.key = 'hummockadmin', s3.secret.key = 'hummockadmin', primary_key = 'v1', - commit_checkpoint_interval = 1 ); statement ok diff --git a/e2e_test/iceberg/test_case/iceberg_sink_partition_append_only_table.slt b/e2e_test/iceberg/test_case/iceberg_sink_partition_append_only_table.slt index 72f0bce46d183..3a27df42903ee 100644 --- a/e2e_test/iceberg/test_case/iceberg_sink_partition_append_only_table.slt +++ b/e2e_test/iceberg/test_case/iceberg_sink_partition_append_only_table.slt @@ -36,7 +36,6 @@ CREATE SINK s6 AS select * from mv6 WITH ( s3.region = 'us-east-1', s3.access.key = 'hummockadmin', s3.secret.key = 'hummockadmin', - commit_checkpoint_interval = 1 ); statement ok diff --git a/e2e_test/iceberg/test_case/iceberg_sink_partition_upsert_table.slt b/e2e_test/iceberg/test_case/iceberg_sink_partition_upsert_table.slt index 2b213a77175bd..39f170a834382 100644 --- a/e2e_test/iceberg/test_case/iceberg_sink_partition_upsert_table.slt +++ b/e2e_test/iceberg/test_case/iceberg_sink_partition_upsert_table.slt @@ -25,7 +25,6 @@ CREATE SINK s6 AS select mv6.id as id, mv6.v1 as v1, mv6.v2 as v2, mv6.v3 as v3, s3.access.key = 'hummockadmin', s3.secret.key = 'hummockadmin', primary_key = 'v1', - commit_checkpoint_interval = 1 ); statement ok diff --git a/e2e_test/iceberg/test_case/iceberg_sink_range_partition_append_only_table.slt b/e2e_test/iceberg/test_case/iceberg_sink_range_partition_append_only_table.slt index 46670ac362599..f0cf9f5fa3133 100644 --- a/e2e_test/iceberg/test_case/iceberg_sink_range_partition_append_only_table.slt +++ b/e2e_test/iceberg/test_case/iceberg_sink_range_partition_append_only_table.slt @@ -36,7 +36,6 @@ CREATE SINK s6 AS select * from mv6 WITH ( s3.region = 'us-east-1', s3.access.key = 'hummockadmin', s3.secret.key = 'hummockadmin', - commit_checkpoint_interval = 1 ); statement ok diff --git a/e2e_test/iceberg/test_case/iceberg_sink_range_partition_upsert_table.slt b/e2e_test/iceberg/test_case/iceberg_sink_range_partition_upsert_table.slt index 5637ce34c940f..f43e2788a020a 100644 --- a/e2e_test/iceberg/test_case/iceberg_sink_range_partition_upsert_table.slt +++ b/e2e_test/iceberg/test_case/iceberg_sink_range_partition_upsert_table.slt @@ -25,7 +25,6 @@ CREATE SINK s6 AS select mv6.id as id, mv6.v1 as v1, mv6.v2 as v2, mv6.v3 as v3, s3.access.key = 'hummockadmin', s3.secret.key = 'hummockadmin', primary_key = 'v1', - commit_checkpoint_interval = 1 ); statement ok diff --git a/e2e_test/iceberg/test_case/iceberg_source_eq_delete.slt b/e2e_test/iceberg/test_case/iceberg_source_eq_delete.slt new file mode 100644 index 0000000000000..820776fb7e773 --- /dev/null +++ b/e2e_test/iceberg/test_case/iceberg_source_eq_delete.slt @@ -0,0 +1,113 @@ +statement ok +set sink_decouple = false; + +statement ok +set streaming_parallelism=4; + +statement ok +CREATE TABLE s1 (i1 int, i2 varchar, i3 varchar); + +statement ok +CREATE MATERIALIZED VIEW mv1 AS SELECT * FROM s1; + +statement ok +CREATE SINK sink1 AS select * from mv1 WITH ( + connector = 'iceberg', + type = 'upsert', + database.name = 'demo_db', + table.name = 't1', + catalog.name = 'demo', + catalog.type = 'storage', + warehouse.path = 's3a://icebergdata/demo', + s3.endpoint = 'http://127.0.0.1:9301', + s3.region = 'us-east-1', + s3.access.key = 'hummockadmin', + s3.secret.key = 'hummockadmin', + create_table_if_not_exists = 'true', + primary_key = 'i1,i2', +); + +statement ok +insert into s1 values(1,'2','3'); + +statement ok +insert into s1 values(7,'8','9'); + +statement ok +insert into s1 values(4,'5','6'); + +statement ok +flush; + +statement ok +delete from s1 where i1 = 7; + +statement ok +flush; + +sleep 5s + +statement ok +CREATE SOURCE iceberg_t1_source +WITH ( + connector = 'iceberg', + s3.endpoint = 'http://127.0.0.1:9301', + s3.region = 'us-east-1', + s3.access.key = 'hummockadmin', + s3.secret.key = 'hummockadmin', + catalog.type = 'storage', + warehouse.path = 's3a://icebergdata/demo', + database.name = 'demo_db', + table.name = 't1', +); + +query I +select * from iceberg_t1_source order by i1; +---- +1 2 3 +4 5 6 + +query I +select i1,i2,i3 from iceberg_t1_source order by i1; +---- +1 2 3 +4 5 6 + +query I +select i3,i2 from iceberg_t1_source order by i2; +---- +3 2 +6 5 + +query I +select i2,i1 from iceberg_t1_source order by i1; +---- +2 1 +5 4 + +query I +select i1 from iceberg_t1_source order by i1; +---- +1 +4 + +query I +select i2 from iceberg_t1_source order by i2; +---- +2 +5 + +query I +select i3 from iceberg_t1_source order by i3; +---- +3 +6 + +statement ok +DROP SINK sink1; + +statement ok +DROP SOURCE iceberg_t1_source; + +statement ok +DROP TABLE s1 cascade; diff --git a/e2e_test/iceberg/test_case/iceberg_source_eq_delete.toml b/e2e_test/iceberg/test_case/iceberg_source_eq_delete.toml new file mode 100644 index 0000000000000..6e49ca949f501 --- /dev/null +++ b/e2e_test/iceberg/test_case/iceberg_source_eq_delete.toml @@ -0,0 +1,11 @@ +init_sqls = [ + 'CREATE SCHEMA IF NOT EXISTS demo_db', + 'DROP TABLE IF EXISTS demo_db.t1', +] + +slt = 'test_case/iceberg_source_eq_delete.slt' + +drop_sqls = [ + 'DROP TABLE IF EXISTS demo_db.t1', + 'DROP SCHEMA IF EXISTS demo_db', +] \ No newline at end of file diff --git a/e2e_test/iceberg/test_case/no_partition_append_only.toml b/e2e_test/iceberg/test_case/no_partition_append_only.toml index 7d2952c508756..9d49b7a29d17f 100644 --- a/e2e_test/iceberg/test_case/no_partition_append_only.toml +++ b/e2e_test/iceberg/test_case/no_partition_append_only.toml @@ -13,24 +13,27 @@ init_sqls = [ v_date date, v_timestamp timestamp, v_ts_ntz timestamp_ntz, - v_decimal decimal(10,5) + v_decimal decimal(10,5), + v_map map, + v_array array, + v_struct struct ) USING iceberg TBLPROPERTIES ('format-version'='2'); ''' ] slt = 'test_case/iceberg_sink_no_partition_append_only_table.slt' -verify_schema = ['long', 'int', 'long', 'float', 'double', 'string', 'boolean', 'date', 'timestamp', 'timestamp_ntz','decimal'] +verify_schema = ['long', 'int', 'long', 'float', 'double', 'string', 'boolean', 'date', 'timestamp', 'timestamp_ntz','decimal', 'map', 'array', 'struct'] verify_sql = 'SELECT * FROM demo_db.no_partition_append_only_table ORDER BY id ASC' verify_data = """ -1,1,1000,1.1,1.11,1-1,true,2022-03-11,2022-03-11 01:00:00+00:00,2022-03-11 01:00:00,1.11 -2,2,2000,2.2,2.22,2-2,false,2022-03-12,2022-03-12 02:00:00+00:00,2022-03-12 02:00:00,2.22 -3,3,3000,3.3,3.33,3-3,true,2022-03-13,2022-03-13 03:00:00+00:00,2022-03-13 03:00:00,99999.99999 -4,4,4000,4.4,4.44,4-4,false,2022-03-14,2022-03-14 04:00:00+00:00,2022-03-14 04:00:00,-99999.99999 -5,5,5000,5.5,5.55,5-5,true,2022-03-15,2022-03-15 05:00:00+00:00,2022-03-15 05:00:00,none +1,1,1000,1.1,1.11,1-1,true,2022-03-11,2022-03-11 01:00:00+00:00,2022-03-11 01:00:00,1.11,{1: 100, 2: 200},[1, 2, 3],Row(a=1, b=2) +2,2,2000,2.2,2.22,2-2,false,2022-03-12,2022-03-12 02:00:00+00:00,2022-03-12 02:00:00,2.22,{3: 300},[1, None, 3],Row(a=3, b=None) +3,3,3000,3.3,3.33,3-3,true,2022-03-13,2022-03-13 03:00:00+00:00,2022-03-13 03:00:00,99999.99999,None,None,None +4,4,4000,4.4,4.44,4-4,false,2022-03-14,2022-03-14 04:00:00+00:00,2022-03-14 04:00:00,-99999.99999,None,None,None +5,5,5000,5.5,5.55,5-5,true,2022-03-15,2022-03-15 05:00:00+00:00,2022-03-15 05:00:00,none,None,None,None """ verify_slt = 'test_case/iceberg_sink_no_partition_append_only_table_verify.slt' diff --git a/e2e_test/iceberg/test_case/no_partition_upsert.toml b/e2e_test/iceberg/test_case/no_partition_upsert.toml index 24444e025f6fe..0c5d63e88216e 100644 --- a/e2e_test/iceberg/test_case/no_partition_upsert.toml +++ b/e2e_test/iceberg/test_case/no_partition_upsert.toml @@ -15,7 +15,7 @@ init_sqls = [ slt = 'test_case/iceberg_sink_no_partition_upsert_table.slt' -verify_schema = ['int','int','long','string'] +verify_schema = ['int','int','long','string','date'] verify_sql = 'SELECT * FROM demo_db.no_partition_upsert_table ORDER BY id, v1 ASC' diff --git a/e2e_test/iceberg/test_case/partition_upsert.toml b/e2e_test/iceberg/test_case/partition_upsert.toml index 38e6455fa9b0a..52cb1c40ea344 100644 --- a/e2e_test/iceberg/test_case/partition_upsert.toml +++ b/e2e_test/iceberg/test_case/partition_upsert.toml @@ -16,7 +16,7 @@ init_sqls = [ slt = 'test_case/iceberg_sink_partition_upsert_table.slt' -verify_schema = ['int','int','long','string'] +verify_schema = ['int','int','long','string', 'date'] verify_sql = 'SELECT * FROM demo_db.partition_upsert_table ORDER BY id, v1 ASC' diff --git a/e2e_test/iceberg/test_case/range_partition_upsert.toml b/e2e_test/iceberg/test_case/range_partition_upsert.toml index 0e63c4218eadc..ceea071d9c8a2 100644 --- a/e2e_test/iceberg/test_case/range_partition_upsert.toml +++ b/e2e_test/iceberg/test_case/range_partition_upsert.toml @@ -16,7 +16,7 @@ init_sqls = [ slt = 'test_case/iceberg_sink_range_partition_upsert_table.slt' -verify_schema = ['int','int','long','string'] +verify_schema = ['int','int','long','string','date'] verify_sql = 'SELECT * FROM demo_db.range_partition_upsert_table ORDER BY id, v1 ASC' diff --git a/e2e_test/s3/fs_parquet_source_and_sink.py b/e2e_test/s3/fs_parquet_source_and_sink.py index 033cb73ffbe70..6425ef1d3a9d6 100644 --- a/e2e_test/s3/fs_parquet_source_and_sink.py +++ b/e2e_test/s3/fs_parquet_source_and_sink.py @@ -116,6 +116,7 @@ def _table(): return 's3_test_parquet' # Execute a SELECT statement + cur.execute(f'''set sink_decouple = false;''') cur.execute(f'''CREATE sink test_file_sink as select id, name, diff --git a/e2e_test/sink/clickhouse_sink.slt b/e2e_test/sink/clickhouse_sink.slt index e037618bb460e..e5bac0d8d521d 100644 --- a/e2e_test/sink/clickhouse_sink.slt +++ b/e2e_test/sink/clickhouse_sink.slt @@ -17,7 +17,6 @@ CREATE SINK s6 AS select mv6.v1 as v1, mv6.v2 as v2, mv6.v3 as v3, mv6.v4 as v4, clickhouse.password = '', clickhouse.database = 'default', clickhouse.table='demo_test', - commit_checkpoint_interval = 1, ); statement ok diff --git a/e2e_test/sink/create_sink_as.slt b/e2e_test/sink/create_sink_as.slt index 5c66c5623553e..dc6d0f61419c6 100644 --- a/e2e_test/sink/create_sink_as.slt +++ b/e2e_test/sink/create_sink_as.slt @@ -1,3 +1,6 @@ +statement ok +set sink_decouple = false; + statement ok CREATE TABLE t4 (v1 int primary key, v2 int); diff --git a/e2e_test/sink/deltalake_rust_sink.slt b/e2e_test/sink/deltalake_rust_sink.slt index 74dca623a9d0a..cb9f9e7817212 100644 --- a/e2e_test/sink/deltalake_rust_sink.slt +++ b/e2e_test/sink/deltalake_rust_sink.slt @@ -1,3 +1,6 @@ +statement ok +set sink_decouple = false; + statement ok CREATE TABLE t6 (v1 int primary key, v2 smallint, v3 bigint, v4 real, v5 float, v6 varchar, v7 date, v8 timestamptz, v9 boolean, v10 decimal, v11 decimal[]); diff --git a/e2e_test/sink/doris_sink.slt b/e2e_test/sink/doris_sink.slt index 3242206badaea..3e6a4aca9d9f6 100644 --- a/e2e_test/sink/doris_sink.slt +++ b/e2e_test/sink/doris_sink.slt @@ -1,3 +1,6 @@ +statement ok +set sink_decouple = false; + statement ok CREATE TABLE t6 (v1 int primary key, v2 smallint, v3 bigint, v4 real, v5 float, v6 varchar, v7 date, v8 timestamp, v9 boolean, v10 jsonb); diff --git a/e2e_test/sink/iceberg_sink.slt b/e2e_test/sink/iceberg_sink.slt index e3917908f651b..b08abd8a4918c 100644 --- a/e2e_test/sink/iceberg_sink.slt +++ b/e2e_test/sink/iceberg_sink.slt @@ -31,7 +31,6 @@ CREATE SINK s6 AS select mv6.v1 as v1, mv6.v2 as v2, mv6.v3 as v3 from mv6 WITH catalog.type = 'storage', database.name='demo_db', table.name='e2e_demo_table', - commit_checkpoint_interval = 1 ); statement ok diff --git a/e2e_test/sink/kafka/protobuf.slt b/e2e_test/sink/kafka/protobuf.slt index 70de91e25c8d7..25b95a49cf1f3 100644 --- a/e2e_test/sink/kafka/protobuf.slt +++ b/e2e_test/sink/kafka/protobuf.slt @@ -4,17 +4,14 @@ set sink_decouple = false; system ok rpk topic create test-rw-sink-append-only-protobuf -system ok -cp src/connector/src/test_data/proto_recursive/recursive.pb ./proto-recursive - statement ok create table from_kafka with ( connector = 'kafka', topic = 'test-rw-sink-append-only-protobuf', properties.bootstrap.server = 'message_queue:29092') format plain encode protobuf ( - schema.location = 'file:///risingwave/proto-recursive', - message = 'recursive.AllTypes'); + schema.location = 'file:///risingwave/src/connector/codec/tests/test_data/all-types.pb', + message = 'all_types.AllTypes'); system ok rpk topic create test-rw-sink-append-only-protobuf-csr-a @@ -91,8 +88,8 @@ create sink sink0 from into_kafka with ( properties.bootstrap.server = 'message_queue:29092') format plain encode protobuf ( force_append_only = true, - schema.location = 'file:///risingwave/proto-recursive', - message = 'recursive.AllTypes'); + schema.location = 'file:///risingwave/src/connector/codec/tests/test_data/all-types.pb', + message = 'all_types.AllTypes'); statement ok create sink sink_csr_trivial as select string_field as field_a from into_kafka with ( @@ -121,8 +118,8 @@ create sink sink_upsert from into_kafka with ( properties.bootstrap.server = 'message_queue:29092', primary_key = 'string_field') format upsert encode protobuf ( - schema.location = 'file:///risingwave/proto-recursive', - message = 'recursive.AllTypes'); + schema.location = 'file:///risingwave/src/connector/codec/tests/test_data/all-types.pb', + message = 'all_types.AllTypes'); ---- db error: ERROR: Failed to run the query @@ -140,8 +137,8 @@ create sink sink_upsert from into_kafka with ( properties.bootstrap.server = 'message_queue:29092', primary_key = 'string_field') format upsert encode protobuf ( - schema.location = 'file:///risingwave/proto-recursive', - message = 'recursive.AllTypes') + schema.location = 'file:///risingwave/src/connector/codec/tests/test_data/all-types.pb', + message = 'all_types.AllTypes') key encode text; # Shall be ignored by force_append_only sinks but processed by upsert sinks. @@ -196,7 +193,7 @@ create sink sink_err from into_kafka with ( format plain encode protobuf ( force_append_only = true, schema.location = 'file:///risingwave/proto-recursiv', - message = 'recursive.AllTypes'); + message = 'all_types.AllTypes'); statement error field not in proto create sink sink_err as select 1 as extra_column with ( @@ -205,8 +202,8 @@ create sink sink_err as select 1 as extra_column with ( properties.bootstrap.server = 'message_queue:29092') format plain encode protobuf ( force_append_only = true, - schema.location = 'file:///risingwave/proto-recursive', - message = 'recursive.AllTypes'); + schema.location = 'file:///risingwave/src/connector/codec/tests/test_data/all-types.pb', + message = 'all_types.AllTypes'); statement error s3 URL not supported yet create sink sink_err from into_kafka with ( @@ -215,8 +212,8 @@ create sink sink_err from into_kafka with ( properties.bootstrap.server = 'message_queue:29092') format plain encode protobuf ( force_append_only = true, - schema.location = 's3:///risingwave/proto-recursive', - message = 'recursive.AllTypes'); + schema.location = 's3:///risingwave/src/connector/codec/tests/test_data/all-types.pb', + message = 'all_types.AllTypes'); statement ok drop table from_kafka cascade; diff --git a/e2e_test/sink/license.slt b/e2e_test/sink/license.slt index e38470d1c70d7..6e65b3653a536 100644 --- a/e2e_test/sink/license.slt +++ b/e2e_test/sink/license.slt @@ -7,32 +7,6 @@ ALTER SYSTEM SET license_key TO ''; statement ok CREATE TABLE t (k INT); -statement error -CREATE SINK file_sink -FROM - t -WITH -( - connector = 's3', - s3.region_name = 'us-east-1', - s3.bucket_name = 'test', - s3.path = '', - s3.file_type = 'parquet', - type = 'append-only', - force_append_only='true' -) FORMAT PLAIN ENCODE PARQUET(force_append_only='true'); ----- -db error: ERROR: Failed to run the query - -Caused by these errors (recent errors listed first): - 1: gRPC request to meta service failed: Internal error - 2: failed to validate sink - 3: Internal error - 4: feature FileSink is only available for tier Paid and above, while the current tier is Free - -Hint: You may want to set a license key with `ALTER SYSTEM SET license_key = '...';` command. - - statement error CREATE SINK dynamodb_sink FROM diff --git a/e2e_test/sink/mongodb_sink.slt b/e2e_test/sink/mongodb_sink.slt index 2122993e3003a..ddc5a91a20c3f 100644 --- a/e2e_test/sink/mongodb_sink.slt +++ b/e2e_test/sink/mongodb_sink.slt @@ -1,3 +1,6 @@ +statement ok +set sink_decouple = false; + statement ok create table t1( a smallint, diff --git a/e2e_test/sink/redis_cluster_sink.slt b/e2e_test/sink/redis_cluster_sink.slt index 03d197485777a..3effd7795d039 100644 --- a/e2e_test/sink/redis_cluster_sink.slt +++ b/e2e_test/sink/redis_cluster_sink.slt @@ -1,3 +1,6 @@ +statement ok +set sink_decouple = false; + statement ok CREATE TABLE t6 (v1 int primary key, v2 int); diff --git a/e2e_test/sink/redis_sink.slt b/e2e_test/sink/redis_sink.slt index 7475a80ae696e..8828c22b80d27 100644 --- a/e2e_test/sink/redis_sink.slt +++ b/e2e_test/sink/redis_sink.slt @@ -1,3 +1,6 @@ +statement ok +set sink_decouple = false; + statement ok CREATE TABLE t6 (v1 int primary key, v2 smallint, v3 bigint, v4 real, v5 float, v6 varchar, v7 date, v8 timestamptz, v9 boolean); diff --git a/e2e_test/sink/remote/types.slt b/e2e_test/sink/remote/types.slt index f2421eabec906..e511d5e6a6ee7 100644 --- a/e2e_test/sink/remote/types.slt +++ b/e2e_test/sink/remote/types.slt @@ -1,3 +1,6 @@ +statement ok +set sink_decouple = false; + statement ok create table t5 (v1 smallint primary key, v2 int, v3 bigint, v4 float, v5 double, v6 decimal, v7 varchar, v8 timestamp, v9 boolean); diff --git a/e2e_test/sink/sqlserver_sink.slt b/e2e_test/sink/sqlserver_sink.slt index 156b8b865ffc8..08bbd3364ed9a 100644 --- a/e2e_test/sink/sqlserver_sink.slt +++ b/e2e_test/sink/sqlserver_sink.slt @@ -1,3 +1,6 @@ +statement ok +set sink_decouple = false; + statement ok create table t_many_data_type_rw ( k1 int, k2 int, diff --git a/e2e_test/sink/starrocks_sink.slt b/e2e_test/sink/starrocks_sink.slt index dedb01755cbbe..0aceac592618a 100644 --- a/e2e_test/sink/starrocks_sink.slt +++ b/e2e_test/sink/starrocks_sink.slt @@ -1,3 +1,6 @@ +statement ok +set sink_decouple = false; + statement ok CREATE TABLE t6 (v1 int primary key, v2 smallint, v3 bigint, v4 real, v5 float, v6 varchar, v7 date, v8 timestamp, v9 boolean, v10 jsonb, v11 decimal); diff --git a/e2e_test/source/basic/kafka.slt b/e2e_test/source/basic/kafka.slt index 0e413c3389d58..227c0aa46bac1 100644 --- a/e2e_test/source/basic/kafka.slt +++ b/e2e_test/source/basic/kafka.slt @@ -187,17 +187,6 @@ create table s10 with ( scan.startup.mode = 'earliest' ) FORMAT PLAIN ENCODE AVRO (schema.location = 'file:///risingwave/avro-complex-schema.avsc', with_deprecated_file_header = true); -statement ok -create table s11 with ( - connector = 'kafka', - topic = 'proto_c_bin', - properties.bootstrap.server = 'message_queue:29092', - scan.startup.mode = 'earliest') -FORMAT PLAIN ENCODE PROTOBUF ( - message = 'test.User', - schema.location = 'file:///risingwave/proto-complex-schema' -); - statement ok CREATE TABLE s12( id int, @@ -273,17 +262,6 @@ create table s16 (v1 int, v2 varchar) with ( scan.startup.mode = 'latest' ) FORMAT PLAIN ENCODE JSON -statement ok -create source s17 with ( - connector = 'kafka', - topic = 'proto_c_bin', - properties.bootstrap.server = 'message_queue:29092', - scan.startup.mode = 'earliest') -FORMAT PLAIN ENCODE PROTOBUF ( - message = 'test.User', - schema.location = 'file:///risingwave/proto-complex-schema' -); - statement ok create source s18 with ( connector = 'kafka', @@ -696,11 +674,6 @@ select id, code, timestamp, xfas, contacts, sex from s10; ---- 100 abc 1473305798 {"(0,200,10.0.0.1)","(1,400,10.0.0.2)"} ("{1xxx,2xxx}","{1xxx,2xxx}") MALE -query ITITT -select id, code, timestamp, xfas, contacts, sex from s11; ----- -0 abc 1473305798 {"(0,200,127.0.0.1)","(1,400,127.0.0.2)"} ("{1xxx,2xxx}","{1xxx,2xxx}") MALE - query ITITT select id, code, timestamp, xfas, contacts, jsonb from s12; ---- @@ -730,9 +703,6 @@ select count(*) from s16 statement error Not supported: alter source with schema registry alter source s18 add column v10 int; -statement error Not supported: alter source with schema registry -alter source s17 add column v10 int; - query III rowsort select * from s21; ---- @@ -875,9 +845,6 @@ drop table s9 statement ok drop table s10 -statement ok -drop table s11 - statement ok drop table s12 @@ -893,9 +860,6 @@ drop table s15 statement ok drop table s16 -statement ok -drop source s17 - statement ok drop source s18 diff --git a/e2e_test/source/basic/old_row_format_syntax/kafka.slt b/e2e_test/source/basic/old_row_format_syntax/kafka.slt index 1f4c118f30dc5..d67665a049daa 100644 --- a/e2e_test/source/basic/old_row_format_syntax/kafka.slt +++ b/e2e_test/source/basic/old_row_format_syntax/kafka.slt @@ -171,14 +171,6 @@ create table s10 with ( scan.startup.mode = 'earliest' ) row format avro row schema location 'file:///risingwave/avro-complex-schema.avsc' -statement ok -create table s11 with ( - connector = 'kafka', - topic = 'proto_c_bin', - properties.bootstrap.server = 'message_queue:29092', - scan.startup.mode = 'earliest' -) row format protobuf message 'test.User' row schema location 'file:///risingwave/proto-complex-schema' - statement ok CREATE TABLE s12( id int, @@ -254,14 +246,6 @@ create table s16 (v1 int, v2 varchar) with ( scan.startup.mode = 'latest' ) ROW FORMAT JSON -statement ok -create source s17 with ( - connector = 'kafka', - topic = 'proto_c_bin', - properties.bootstrap.server = 'message_queue:29092', - scan.startup.mode = 'earliest' -) row format protobuf message 'test.User' row schema location 'file:///risingwave/proto-complex-schema' - statement error without schema registry create source s18 with ( connector = 'kafka', @@ -570,11 +554,6 @@ select id, first_name, last_name, email from s8_no_schema_field; # ---- # 100 abc 1473305798 {"(0,200,10.0.0.1)","(1,400,10.0.0.2)"} ("{1xxx,2xxx}","{1xxx,2xxx}") MALE -query ITITT -select id, code, timestamp, xfas, contacts, sex from s11; ----- -0 abc 1473305798 {"(0,200,127.0.0.1)","(1,400,127.0.0.2)"} ("{1xxx,2xxx}","{1xxx,2xxx}") MALE - query ITITT select id, code, timestamp, xfas, contacts, jsonb from s12; ---- @@ -712,9 +691,6 @@ drop table s8_no_schema_field # statement ok # drop table s10 -statement ok -drop table s11 - statement ok drop table s12 @@ -730,9 +706,6 @@ drop table s15 statement ok drop table s16 -statement ok -drop source s17 - # statement ok # drop source s18 diff --git a/e2e_test/source/opendal/posix_fs.slt b/e2e_test/source/opendal/posix_fs.slt index 3fc572a1a1cc8..8eb1ce665590d 100644 --- a/e2e_test/source/opendal/posix_fs.slt +++ b/e2e_test/source/opendal/posix_fs.slt @@ -8,9 +8,9 @@ CREATE TABLE diamonds ( color TEXT, depth FLOAT, ) WITH ( - connector = 'posix_fs', - match_pattern = 'data*.csv', - posix_fs.root = 'e2e_test/source/opendal/data', + connector = 'posix_fs', + match_pattern = 'data*.csv', + posix_fs.root = 'e2e_test/source/opendal/data', ) FORMAT PLAIN ENCODE CSV ( without_header = 'false', delimiter = ','); sleep 10s diff --git a/e2e_test/source_inline/kafka/protobuf/recover.slt b/e2e_test/source_inline/kafka/protobuf/recover.slt new file mode 100644 index 0000000000000..3babf26793f2a --- /dev/null +++ b/e2e_test/source_inline/kafka/protobuf/recover.slt @@ -0,0 +1,97 @@ +control substitution on + +system ok +rpk topic create 'test-pb-struct' + + +system ok +jq -sR '{"schema":.,"schemaType":"PROTOBUF"}' << EOF | curl -X POST -H 'content-type: application/json' -d @- "${RISEDEV_SCHEMA_REGISTRY_URL}/subjects/test-pb-struct-value/versions" +syntax = "proto3"; +package test; +message User { + int32 id = 1; + Name name = 2; +} +message Name { + string first_name = 1; + string last_name = 2; +} +EOF + + +# create a source with v1 schema +statement ok +create source s with ( + ${RISEDEV_KAFKA_WITH_OPTIONS_COMMON}, + topic = 'test-pb-struct') +format plain encode protobuf ( + schema.registry = '${RISEDEV_SCHEMA_REGISTRY_URL}', + message = 'test.User'); + + +# register a v2 schema +system ok +jq -sR '{"schema":.,"schemaType":"PROTOBUF"}' << EOF | curl -X POST -H 'content-type: application/json' -d @- "${RISEDEV_SCHEMA_REGISTRY_URL}/subjects/test-pb-struct-value/versions" +syntax = "proto3"; +package test; +message User { + int32 id = 1; + Name name = 2; +} +message Name { + string first_name = 1; + string last_name = 2; + string middle_name = 3; +} +EOF + + +# trigger recovery +statement ok +recover; + + +sleep 2s + + +# produce a v2 message +statement ok +create sink sk as select + 1 as id, + row('Alan', 'Turing', 'Mathison')::struct as name +with ( + ${RISEDEV_KAFKA_WITH_OPTIONS_COMMON}, + topic = 'test-pb-struct') +format plain encode protobuf ( + schema.registry = '${RISEDEV_SCHEMA_REGISTRY_URL}', + message = 'test.User'); + + +sleep 1s + + +# reading as v1 shall not panic +query IT +select * from s; +---- +1 (Alan,Turing) + + +statement ok +drop sink sk; + + +statement ok +drop source s; + + +system ok +curl -X DELETE "${RISEDEV_SCHEMA_REGISTRY_URL}/subjects/test-pb-struct-value" + + +system ok +curl -X DELETE "${RISEDEV_SCHEMA_REGISTRY_URL}/subjects/test-pb-struct-value?permanent=true" + + +system ok +rpk topic delete 'test-pb-struct' diff --git a/e2e_test/time_travel/syntax.slt b/e2e_test/time_travel/syntax.slt index 6c3408a276763..5895f6d9b9e8b 100644 --- a/e2e_test/time_travel/syntax.slt +++ b/e2e_test/time_travel/syntax.slt @@ -7,6 +7,10 @@ SET QUERY_MODE TO local; statement ok CREATE TABLE t (k INT); +query I +SELECT * FROM t; +---- + query error SELECT * FROM t FOR SYSTEM_TIME AS OF 963716300; ---- diff --git a/integration_tests/big-query-sink/create_sink.sql b/integration_tests/big-query-sink/create_sink.sql index a41fe0243120d..01fb5e340d545 100644 --- a/integration_tests/big-query-sink/create_sink.sql +++ b/integration_tests/big-query-sink/create_sink.sql @@ -1,3 +1,5 @@ +set sink_decouple = false; + -- create sink with local file CREATE SINK bhv_big_query_sink FROM diff --git a/integration_tests/cassandra-and-scylladb-sink/create_sink.sql b/integration_tests/cassandra-and-scylladb-sink/create_sink.sql index a0a305aebd0e0..fdda994d01427 100644 --- a/integration_tests/cassandra-and-scylladb-sink/create_sink.sql +++ b/integration_tests/cassandra-and-scylladb-sink/create_sink.sql @@ -1,3 +1,5 @@ +set sink_decouple = false; + CREATE SINK bhv_cassandra_sink FROM bhv_mv WITH ( diff --git a/integration_tests/clickhouse-sink/create_sink.sql b/integration_tests/clickhouse-sink/create_sink.sql index 5f730ed6ff910..b913a246b286e 100644 --- a/integration_tests/clickhouse-sink/create_sink.sql +++ b/integration_tests/clickhouse-sink/create_sink.sql @@ -1,3 +1,5 @@ +set sink_decouple = false; + CREATE SINK bhv_clickhouse_sink FROM bhv_mv WITH ( diff --git a/integration_tests/deltalake-sink/create_sink.sql b/integration_tests/deltalake-sink/create_sink.sql index f42b09d726e56..17c1c44aea255 100644 --- a/integration_tests/deltalake-sink/create_sink.sql +++ b/integration_tests/deltalake-sink/create_sink.sql @@ -1,3 +1,5 @@ +set sink_decouple = false; + create sink delta_lake_sink from source with ( connector = 'deltalake', diff --git a/integration_tests/doris-sink/create_sink.sql b/integration_tests/doris-sink/create_sink.sql index d4702219fed09..d6b28148c083d 100644 --- a/integration_tests/doris-sink/create_sink.sql +++ b/integration_tests/doris-sink/create_sink.sql @@ -1,3 +1,5 @@ +set sink_decouple = false; + create secret doris_secret with (backend = 'meta') as '123456'; CREATE SINK bhv_doris_sink diff --git a/integration_tests/dynamodb/create_sink.sql b/integration_tests/dynamodb/create_sink.sql index 6de71404a9da1..43cb2be6d1447 100644 --- a/integration_tests/dynamodb/create_sink.sql +++ b/integration_tests/dynamodb/create_sink.sql @@ -1,3 +1,5 @@ +set sink_decouple = false; + CREATE SINK dyn_sink FROM movies diff --git a/integration_tests/elasticsearch-sink/create_sink.sql b/integration_tests/elasticsearch-sink/create_sink.sql index 07046507d117d..f72f8f0e6ec3b 100644 --- a/integration_tests/elasticsearch-sink/create_sink.sql +++ b/integration_tests/elasticsearch-sink/create_sink.sql @@ -1,3 +1,5 @@ +set sink_decouple = false; + CREATE SINK bhv_es7_sink FROM bhv_mv WITH ( diff --git a/integration_tests/kafka-cdc-sink/create_sink.sql b/integration_tests/kafka-cdc-sink/create_sink.sql index 349aac0ca9b0a..0c25553adebba 100644 --- a/integration_tests/kafka-cdc-sink/create_sink.sql +++ b/integration_tests/kafka-cdc-sink/create_sink.sql @@ -1,3 +1,5 @@ +set sink_decouple = false; + CREATE SINK IF NOT EXISTS counts_sink FROM counts WITH ( diff --git a/integration_tests/mqtt/create_sink.sql b/integration_tests/mqtt/create_sink.sql index 69b6886943944..27b84aa354250 100644 --- a/integration_tests/mqtt/create_sink.sql +++ b/integration_tests/mqtt/create_sink.sql @@ -1,3 +1,5 @@ +set sink_decouple = false; + CREATE SINK mqtt_sink FROM personnel diff --git a/integration_tests/mysql-sink/create_sink.sql b/integration_tests/mysql-sink/create_sink.sql index 9776360df2914..f73b92e8ce259 100644 --- a/integration_tests/mysql-sink/create_sink.sql +++ b/integration_tests/mysql-sink/create_sink.sql @@ -1,3 +1,5 @@ +set sink_decouple = false; + CREATE SINK target_count_mysql_sink FROM target_count WITH ( diff --git a/integration_tests/nats/create_sink.sql b/integration_tests/nats/create_sink.sql index beee01afcecfb..fda1ab1c77621 100644 --- a/integration_tests/nats/create_sink.sql +++ b/integration_tests/nats/create_sink.sql @@ -1,3 +1,5 @@ +set sink_decouple = false; + CREATE TABLE personnel (id integer, name varchar); diff --git a/integration_tests/postgres-sink/create_sink.sql b/integration_tests/postgres-sink/create_sink.sql index 5041f1a36b741..ec76f16ac3037 100644 --- a/integration_tests/postgres-sink/create_sink.sql +++ b/integration_tests/postgres-sink/create_sink.sql @@ -1,3 +1,5 @@ +set sink_decouple = false; + CREATE SINK target_count_postgres_sink FROM target_count WITH ( diff --git a/integration_tests/redis-sink/create_sink.sql b/integration_tests/redis-sink/create_sink.sql index 61ffb67326227..f88a68aca2110 100644 --- a/integration_tests/redis-sink/create_sink.sql +++ b/integration_tests/redis-sink/create_sink.sql @@ -1,3 +1,5 @@ +set sink_decouple = false; + CREATE SINK bhv_redis_sink_1 FROM bhv_mv WITH ( diff --git a/integration_tests/starrocks-sink/create_sink.sql b/integration_tests/starrocks-sink/create_sink.sql index 8d7ebf98dfb20..7cfe69ef21973 100644 --- a/integration_tests/starrocks-sink/create_sink.sql +++ b/integration_tests/starrocks-sink/create_sink.sql @@ -1,3 +1,5 @@ +set sink_decouple = false; + create secret starrocks_secret with (backend = 'meta') as '123456'; CREATE SINK bhv_starrocks_sink_primary diff --git a/integration_tests/twitter-pulsar/pb/create_source.sql b/integration_tests/twitter-pulsar/pb/create_source.sql index bf41939b40d91..22c4927ab3bb9 100644 --- a/integration_tests/twitter-pulsar/pb/create_source.sql +++ b/integration_tests/twitter-pulsar/pb/create_source.sql @@ -1,5 +1,6 @@ CREATE SOURCE twitter WITH ( connector = 'pulsar', pulsar.topic = 'twitter', - pulsar.service.url = 'pulsar://message_queue:6650' + pulsar.service.url = 'pulsar://message_queue:6650', + subscription.name.prefix = 'custom_prefix' ) ROW FORMAT PROTOBUF MESSAGE 'twitter.schema.Event' ROW SCHEMA LOCATION 'http://file_server:8080/schema'; \ No newline at end of file diff --git a/java/connector-node/risingwave-connector-service/src/main/java/com/risingwave/connector/source/common/DbzConnectorConfig.java b/java/connector-node/risingwave-connector-service/src/main/java/com/risingwave/connector/source/common/DbzConnectorConfig.java index fb8aa62916f60..8ba569c7aea72 100644 --- a/java/connector-node/risingwave-connector-service/src/main/java/com/risingwave/connector/source/common/DbzConnectorConfig.java +++ b/java/connector-node/risingwave-connector-service/src/main/java/com/risingwave/connector/source/common/DbzConnectorConfig.java @@ -54,6 +54,7 @@ public class DbzConnectorConfig { public static final String PG_PUB_NAME = "publication.name"; public static final String PG_PUB_CREATE = "publication.create.enable"; public static final String PG_SCHEMA_NAME = "schema.name"; + public static final String PG_SSL_ROOT_CERT = "ssl.root.cert"; /* Sql Server configs */ public static final String SQL_SERVER_SCHEMA_NAME = "schema.name"; @@ -211,6 +212,10 @@ public DbzConnectorConfig( LOG.info("Disable table filtering for the shared Postgres source"); dbzProps.remove("table.include.list"); } + + if (userProps.containsKey(PG_SSL_ROOT_CERT)) { + dbzProps.setProperty("database.sslrootcert", userProps.get(PG_SSL_ROOT_CERT)); + } } else if (source == SourceTypeE.CITUS) { var postgresProps = initiateDbConfig(POSTGRES_CONFIG_FILE, substitutor); diff --git a/java/connector-node/risingwave-connector-service/src/main/resources/postgres.properties b/java/connector-node/risingwave-connector-service/src/main/resources/postgres.properties index 06c4210fcf468..c36b62a7aa531 100644 --- a/java/connector-node/risingwave-connector-service/src/main/resources/postgres.properties +++ b/java/connector-node/risingwave-connector-service/src/main/resources/postgres.properties @@ -7,6 +7,7 @@ database.port=${port} database.user=${username} database.password=${password} database.dbname=${database.name} +database.sslmode=${ssl.mode:-prefer} table.include.list=${schema.name}.${table.name} # The name of the PostgreSQL replication slot slot.name=${slot.name} diff --git a/lints/Cargo.lock b/lints/Cargo.lock index e3b748e6da670..aa1e1e4ef9b32 100644 --- a/lints/Cargo.lock +++ b/lints/Cargo.lock @@ -162,7 +162,8 @@ checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" [[package]] name = "clippy_config" -version = "0.1.80" +version = "0.1.81" +source = "git+https://github.com/risingwavelabs/clippy?rev=5135d0218365e85f3371405b604a7fb1459eb256#5135d0218365e85f3371405b604a7fb1459eb256" dependencies = [ "rustc-semver", "serde", @@ -171,12 +172,14 @@ dependencies = [ [[package]] name = "clippy_utils" -version = "0.1.80" +version = "0.1.81" +source = "git+https://github.com/risingwavelabs/clippy?rev=5135d0218365e85f3371405b604a7fb1459eb256#5135d0218365e85f3371405b604a7fb1459eb256" dependencies = [ "arrayvec", "clippy_config", "itertools", "rustc-semver", + "rustc_apfloat", ] [[package]] @@ -869,6 +872,16 @@ version = "1.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5be1bdc7edf596692617627bbfeaba522131b18e06ca4df2b6b689e3c5d5ce84" +[[package]] +name = "rustc_apfloat" +version = "0.2.1+llvm-462a31f5a5ab" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "886d94c63c812a8037c4faca2607453a0fa4cf82f734665266876b022244543f" +dependencies = [ + "bitflags 1.3.2", + "smallvec", +] + [[package]] name = "rustfix" version = "0.6.1" @@ -975,6 +988,12 @@ dependencies = [ "digest", ] +[[package]] +name = "smallvec" +version = "1.13.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3c5e1a9a646d36c3599cd173a41282daf47c44583ad367b8e6837255952e5c67" + [[package]] name = "syn" version = "2.0.39" diff --git a/lints/Cargo.toml b/lints/Cargo.toml index 43ece1f6fc5b7..e0b8fe5d96664 100644 --- a/lints/Cargo.toml +++ b/lints/Cargo.toml @@ -14,7 +14,7 @@ path = "ui/format_error.rs" # See `README.md` before bumping the version. # Remember to update the version in `ci/Dockerfile` as well. [dependencies] -clippy_utils = { git = "https://github.com/risingwavelabs/clippy", rev = "5e2a7c6adebdb0478ee6d5b67ab4ee94153b2997" } +clippy_utils = { git = "https://github.com/risingwavelabs/clippy", rev = "61e1d2fd7062e46ccf1237707ee6da5aac018f70" } dylint_linting = "3.1.0" itertools = "0.12" diff --git a/lints/rust-toolchain b/lints/rust-toolchain index a146af66cd637..31dbc57d04b2b 100644 --- a/lints/rust-toolchain +++ b/lints/rust-toolchain @@ -1,5 +1,5 @@ # See `README.md` before bumping the version. [toolchain] -channel = "nightly-2024-06-06" +channel = "nightly-2024-07-19" components = ["llvm-tools-preview", "rustc-dev"] diff --git a/proto/connector_service.proto b/proto/connector_service.proto index 964d227452548..99d9c58d4f1ed 100644 --- a/proto/connector_service.proto +++ b/proto/connector_service.proto @@ -229,9 +229,15 @@ message CoordinateRequest { SinkMetadata metadata = 2; } + message UpdateVnodeBitmapRequest { + common.Buffer vnode_bitmap = 1; + } + oneof msg { StartCoordinationRequest start_request = 1; CommitRequest commit_request = 2; + UpdateVnodeBitmapRequest update_vnode_request = 3; + bool stop = 4; } } diff --git a/proto/hummock.proto b/proto/hummock.proto index 19b7e036c9686..7956b4515dce8 100644 --- a/proto/hummock.proto +++ b/proto/hummock.proto @@ -104,6 +104,11 @@ message GroupTableChange { message GroupDestroy {} +message GroupMerge { + uint64 left_group_id = 1; + uint64 right_group_id = 2; +} + message GroupDelta { oneof delta_type { IntraLevelDelta intra_level = 1; @@ -111,6 +116,7 @@ message GroupDelta { GroupDestroy group_destroy = 3; GroupMetaChange group_meta_change = 4 [deprecated = true]; GroupTableChange group_table_change = 5 [deprecated = true]; + GroupMerge group_merge = 6; } } @@ -744,6 +750,7 @@ message PinVersionResponse { message SplitCompactionGroupRequest { uint64 group_id = 1; repeated uint32 table_ids = 2; + uint32 partition_vnode_count = 3; } message SplitCompactionGroupResponse { @@ -833,12 +840,20 @@ message CancelCompactTaskResponse { message GetVersionByEpochRequest { uint64 epoch = 1; + uint32 table_id = 2; } message GetVersionByEpochResponse { HummockVersion version = 1; } +message MergeCompactionGroupRequest { + uint64 left_group_id = 1; + uint64 right_group_id = 2; +} + +message MergeCompactionGroupResponse {} + service HummockManagerService { rpc UnpinVersionBefore(UnpinVersionBeforeRequest) returns (UnpinVersionBeforeResponse); rpc GetCurrentVersion(GetCurrentVersionRequest) returns (GetCurrentVersionResponse); @@ -880,6 +895,7 @@ service HummockManagerService { rpc CancelCompactTask(CancelCompactTaskRequest) returns (CancelCompactTaskResponse); rpc ListChangeLogEpochs(ListChangeLogEpochsRequest) returns (ListChangeLogEpochsResponse); rpc GetVersionByEpoch(GetVersionByEpochRequest) returns (GetVersionByEpochResponse); + rpc MergeCompactionGroup(MergeCompactionGroupRequest) returns (MergeCompactionGroupResponse); } message CompactionConfig { diff --git a/proto/plan_common.proto b/proto/plan_common.proto index 610f40968755c..0f4e988e6c035 100644 --- a/proto/plan_common.proto +++ b/proto/plan_common.proto @@ -141,6 +141,29 @@ enum JoinType { JOIN_TYPE_RIGHT_ANTI = 8; } +enum AsOfJoinType { + AS_OF_JOIN_TYPE_UNSPECIFIED = 0; + AS_OF_JOIN_TYPE_INNER = 1; + AS_OF_JOIN_TYPE_LEFT_OUTER = 2; +} + +enum AsOfJoinInequalityType { + AS_OF_INEQUALITY_TYPE_UNSPECIFIED = 0; + AS_OF_INEQUALITY_TYPE_GT = 1; + AS_OF_INEQUALITY_TYPE_GE = 2; + AS_OF_INEQUALITY_TYPE_LT = 3; + AS_OF_INEQUALITY_TYPE_LE = 4; +} + +message AsOfJoinDesc { + // The index of the right side's as of column. + uint32 right_idx = 1; + // The index of the left side's as of column. + uint32 left_idx = 2; + // The type of the inequality. + AsOfJoinInequalityType inequality_type = 3; +} + // https://github.com/tokio-rs/prost/issues/80 enum FormatType { FORMAT_TYPE_UNSPECIFIED = 0; diff --git a/proto/stream_plan.proto b/proto/stream_plan.proto index a96f54818146e..ca67737aeafe0 100644 --- a/proto/stream_plan.proto +++ b/proto/stream_plan.proto @@ -455,6 +455,32 @@ message HashJoinNode { bool is_append_only = 14; } +message AsOfJoinNode { + plan_common.AsOfJoinType join_type = 1; + repeated int32 left_key = 2; + repeated int32 right_key = 3; + // Used for internal table states. + catalog.Table left_table = 4; + // Used for internal table states. + catalog.Table right_table = 5; + // Used for internal table states. + catalog.Table left_degree_table = 6; + // Used for internal table states. + catalog.Table right_degree_table = 7; + // The output indices of current node + repeated uint32 output_indices = 8; + // Left deduped input pk indices. The pk of the left_table and + // The pk of the left_table is [left_join_key | left_inequality_key | left_deduped_input_pk_indices] + // left_inequality_key is not used but for forward compatibility. + repeated uint32 left_deduped_input_pk_indices = 9; + // Right deduped input pk indices. + // The pk of the right_table is [right_join_key | right_inequality_key | right_deduped_input_pk_indices] + // right_inequality_key is not used but for forward compatibility. + repeated uint32 right_deduped_input_pk_indices = 10; + repeated bool null_safe = 11; + optional plan_common.AsOfJoinDesc asof_desc = 12; +} + message TemporalJoinNode { plan_common.JoinType join_type = 1; repeated int32 left_key = 2; diff --git a/src/batch/src/executor/iceberg_scan.rs b/src/batch/src/executor/iceberg_scan.rs index fca7745284fe3..2f67d8ce005aa 100644 --- a/src/batch/src/executor/iceberg_scan.rs +++ b/src/batch/src/executor/iceberg_scan.rs @@ -12,6 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. +use std::collections::HashMap; use std::mem; use futures_async_stream::try_stream; @@ -20,8 +21,11 @@ use iceberg::scan::FileScanTask; use iceberg::spec::TableMetadata; use itertools::Itertools; use risingwave_common::array::arrow::IcebergArrowConvert; +use risingwave_common::bitmap::Bitmap; use risingwave_common::catalog::{Field, Schema}; +use risingwave_common::row::{OwnedRow, Row}; use risingwave_common::types::DataType; +use risingwave_common::util::iter_util::ZipEqFast; use risingwave_connector::sink::iceberg::IcebergConfig; use risingwave_connector::source::iceberg::{IcebergProperties, IcebergSplit}; use risingwave_connector::source::{ConnectorProperties, SplitImpl, SplitMetaData}; @@ -38,7 +42,8 @@ pub struct IcebergScanExecutor { #[allow(dead_code)] snapshot_id: Option, table_meta: TableMetadata, - file_scan_tasks: Vec, + data_file_scan_tasks: Vec, + eq_delete_file_scan_tasks: Vec, batch_size: usize, schema: Schema, identity: String, @@ -63,7 +68,8 @@ impl IcebergScanExecutor { iceberg_config: IcebergConfig, snapshot_id: Option, table_meta: TableMetadata, - file_scan_tasks: Vec, + data_file_scan_tasks: Vec, + eq_delete_file_scan_tasks: Vec, batch_size: usize, schema: Schema, identity: String, @@ -72,7 +78,8 @@ impl IcebergScanExecutor { iceberg_config, snapshot_id, table_meta, - file_scan_tasks, + data_file_scan_tasks, + eq_delete_file_scan_tasks, batch_size, schema, identity, @@ -86,33 +93,136 @@ impl IcebergScanExecutor { .load_table_v2_with_metadata(self.table_meta) .await?; let data_types = self.schema.data_types(); + let executor_schema_names = self.schema.names(); - let file_scan_tasks = mem::take(&mut self.file_scan_tasks); + let mut eq_delete_file_scan_tasks_map: HashMap = HashMap::default(); + let eq_delete_file_scan_tasks = mem::take(&mut self.eq_delete_file_scan_tasks); - let file_scan_stream = { - #[try_stream] - async move { - for file_scan_task in file_scan_tasks { - yield file_scan_task; + // Build hash map for equality delete files + // Currently, all equality delete files have the same schema which is guaranteed by `IcebergSplitEnumerator`. + let mut eq_delete_ids: Option> = None; + for eq_delete_file_scan_task in eq_delete_file_scan_tasks { + let mut sequence_number = eq_delete_file_scan_task.sequence_number; + + if eq_delete_ids.is_none() { + eq_delete_ids = Some(eq_delete_file_scan_task.project_field_ids.clone()); + } else { + debug_assert_eq!( + eq_delete_ids.as_ref().unwrap(), + &eq_delete_file_scan_task.project_field_ids + ); + } + + let reader = table + .reader_builder() + .with_batch_size(self.batch_size) + .build(); + let delete_file_scan_stream = tokio_stream::once(Ok(eq_delete_file_scan_task)); + + let mut delete_record_batch_stream = reader + .read(Box::pin(delete_file_scan_stream)) + .map_err(BatchError::Iceberg)?; + + while let Some(record_batch) = delete_record_batch_stream.next().await { + let record_batch = record_batch.map_err(BatchError::Iceberg)?; + + let chunk = IcebergArrowConvert.chunk_from_record_batch(&record_batch)?; + for row in chunk.rows() { + let entry = eq_delete_file_scan_tasks_map + .entry(row.to_owned_row()) + .or_default(); + *entry = *entry.max(&mut sequence_number); } } - }; - - let reader = table - .reader_builder() - .with_batch_size(self.batch_size) - .build(); - - let record_batch_stream = reader - .read(Box::pin(file_scan_stream)) - .map_err(BatchError::Iceberg)?; - - #[for_await] - for record_batch in record_batch_stream { - let record_batch = record_batch.map_err(BatchError::Iceberg)?; - let chunk = IcebergArrowConvert.chunk_from_record_batch(&record_batch)?; - debug_assert_eq!(chunk.data_types(), data_types); - yield chunk; + } + + let data_file_scan_tasks = mem::take(&mut self.data_file_scan_tasks); + + // Delete rows in the data file that need to be deleted by map + for data_file_scan_task in data_file_scan_tasks { + let data_sequence_number = data_file_scan_task.sequence_number; + + let data_chunk_column_names: Vec<_> = data_file_scan_task + .project_field_ids + .iter() + .filter_map(|id| { + data_file_scan_task + .schema + .name_by_field_id(*id) + .map(|name| name.to_string()) + }) + .collect(); + + // eq_delete_column_idxes are used to fetch equality delete columns from data files. + let eq_delete_column_idxes = eq_delete_ids.as_ref().map(|eq_delete_ids| { + eq_delete_ids + .iter() + .map(|eq_delete_id| { + data_file_scan_task + .project_field_ids + .iter() + .position(|project_field_id| eq_delete_id == project_field_id) + .expect("eq_delete_id not found in delete_equality_ids") + }) + .collect_vec() + }); + + let reader = table + .reader_builder() + .with_batch_size(self.batch_size) + .build(); + let file_scan_stream = tokio_stream::once(Ok(data_file_scan_task)); + + let mut record_batch_stream = reader + .read(Box::pin(file_scan_stream)) + .map_err(BatchError::Iceberg)?; + + while let Some(record_batch) = record_batch_stream.next().await { + let record_batch = record_batch.map_err(BatchError::Iceberg)?; + + let chunk = IcebergArrowConvert.chunk_from_record_batch(&record_batch)?; + let chunk = match eq_delete_column_idxes.as_ref() { + Some(delete_column_ids) => { + let visibility = Bitmap::from_iter( + // Project with the schema of the delete file + chunk.project(delete_column_ids).rows().map(|row_ref| { + let row = row_ref.to_owned_row(); + if let Some(delete_sequence_number) = + eq_delete_file_scan_tasks_map.get(&row) + && delete_sequence_number > &data_sequence_number + { + // delete_sequence_number > data_sequence_number means the delete file is written later than data file, + // so it needs to be deleted + false + } else { + true + } + }), + ) + .clone(); + // Keep the schema consistent(chunk and executor) + // Filter out (equality delete) columns that are not in the executor schema + let data = chunk + .columns() + .iter() + .zip_eq_fast(&data_chunk_column_names) + .filter_map(|(array, columns)| { + if executor_schema_names.contains(columns) { + Some(array.clone()) + } else { + None + } + }) + .collect_vec(); + let chunk = DataChunk::new(data, visibility); + debug_assert_eq!(chunk.data_types(), data_types); + chunk + } + // If there is no delete file, the data file is directly output + None => chunk, + }; + yield chunk; + } } } } @@ -171,6 +281,11 @@ impl BoxedExecutorBuilder for IcebergScanExecutorBuilder { Some(split.snapshot_id), split.table_meta.deserialize(), split.files.into_iter().map(|x| x.deserialize()).collect(), + split + .eq_delete_files + .into_iter() + .map(|x| x.deserialize()) + .collect(), source.context.get_config().developer.chunk_size, schema, source.plan_node().get_identity().clone(), diff --git a/src/batch/src/executor/join/hash_join.rs b/src/batch/src/executor/join/hash_join.rs index 3bfb583d6459d..863e53035626a 100644 --- a/src/batch/src/executor/join/hash_join.rs +++ b/src/batch/src/executor/join/hash_join.rs @@ -162,9 +162,8 @@ impl<'a> Iterator for RowIdIter<'a> { type Item = RowId; fn next(&mut self) -> Option { - self.current_row_id.map(|row_id| { - self.current_row_id = self.next_row_id[row_id]; - row_id + self.current_row_id.inspect(|row_id| { + self.current_row_id = self.next_row_id[*row_id]; }) } } diff --git a/src/batch/src/lib.rs b/src/batch/src/lib.rs index 414f27b33b4a7..9b88c3be9cd68 100644 --- a/src/batch/src/lib.rs +++ b/src/batch/src/lib.rs @@ -20,7 +20,6 @@ #![feature(coroutines)] #![feature(proc_macro_hygiene, stmt_expr_attributes)] #![feature(iterator_try_collect)] -#![feature(lint_reasons)] #![feature(is_sorted)] #![recursion_limit = "256"] #![feature(let_chains)] diff --git a/src/batch/src/worker_manager/worker_node_manager.rs b/src/batch/src/worker_manager/worker_node_manager.rs index fd4d0e37bbbc4..772bc8a4b6da7 100644 --- a/src/batch/src/worker_manager/worker_node_manager.rs +++ b/src/batch/src/worker_manager/worker_node_manager.rs @@ -19,7 +19,7 @@ use std::time::Duration; use rand::seq::SliceRandom; use risingwave_common::bail; use risingwave_common::catalog::OBJECT_ID_PLACEHOLDER; -use risingwave_common::hash::{VirtualNode, WorkerSlotId, WorkerSlotMapping}; +use risingwave_common::hash::{WorkerSlotId, WorkerSlotMapping}; use risingwave_common::vnode_mapping::vnode_placement::place_vnode; use risingwave_pb::common::{WorkerNode, WorkerType}; @@ -346,38 +346,26 @@ impl WorkerNodeSelector { if self.enable_barrier_read { self.manager.get_streaming_fragment_mapping(&fragment_id) } else { - let (hint, parallelism) = match self.manager.serving_fragment_mapping(fragment_id) { - Ok(o) => { - if self.manager.worker_node_mask().is_empty() { - // 1. Stable mapping for most cases. - return Ok(o); - } - // If it's a singleton, set max_parallelism=1 for place_vnode. - let max_parallelism = o.to_single().map(|_| 1); - (Some(o), max_parallelism) - } - Err(e) => { - if !matches!(e, BatchError::ServingVnodeMappingNotFound(_)) { - return Err(e); - } - // We cannot tell whether it's a singleton, set max_parallelism=1 for place_vnode as if it's a singleton. - let max_parallelism = 1; - tracing::warn!( - fragment_id, - max_parallelism, - "Serving fragment mapping not found, fall back to temporary one." - ); - // Workaround the case that new mapping is not available yet due to asynchronous - // notification. - (None, Some(max_parallelism)) - } - }; - // 2. Temporary mapping that filters out unavailable workers. - let new_workers = self.apply_worker_node_mask(self.manager.list_serving_worker_nodes()); - // TODO(var-vnode): use vnode count from config - let masked_mapping = - place_vnode(hint.as_ref(), &new_workers, parallelism, VirtualNode::COUNT); - masked_mapping.ok_or_else(|| BatchError::EmptyWorkerNodes) + let mapping = (self.manager.serving_fragment_mapping(fragment_id)).or_else(|_| { + tracing::warn!( + fragment_id, + "Serving fragment mapping not found, fall back to streaming one." + ); + self.manager.get_streaming_fragment_mapping(&fragment_id) + })?; + + // Filter out unavailable workers. + if self.manager.worker_node_mask().is_empty() { + Ok(mapping) + } else { + let workers = self.apply_worker_node_mask(self.manager.list_serving_worker_nodes()); + // If it's a singleton, set max_parallelism=1 for place_vnode. + let max_parallelism = mapping.to_single().map(|_| 1); + let masked_mapping = + place_vnode(Some(&mapping), &workers, max_parallelism, mapping.len()) + .ok_or_else(|| BatchError::EmptyWorkerNodes)?; + Ok(masked_mapping) + } } } diff --git a/src/common/benches/bench_data_chunk_encoding.rs b/src/common/benches/bench_data_chunk_encoding.rs index 96413a4305205..4b09aeaeed5c2 100644 --- a/src/common/benches/bench_data_chunk_encoding.rs +++ b/src/common/benches/bench_data_chunk_encoding.rs @@ -55,7 +55,7 @@ fn bench_data_chunk_encoding(c: &mut Criterion) { for null_ratio in NULL_RATIOS { for chunk_size in CHUNK_SIZES { let chunk = rand_chunk::gen_chunk(&case.data_types, *chunk_size, SEED, *null_ratio); - let mut group = c.benchmark_group(&format!( + let mut group = c.benchmark_group(format!( "data chunk encoding: {}, {} rows, Pr[null]={}", case.name, chunk_size, null_ratio )); diff --git a/src/common/benches/bench_sequencer.rs b/src/common/benches/bench_sequencer.rs index 12e92f1f3332d..591b5fd64ee3a 100644 --- a/src/common/benches/bench_sequencer.rs +++ b/src/common/benches/bench_sequencer.rs @@ -12,8 +12,6 @@ // See the License for the specific language governing permissions and // limitations under the License. -#![feature(lint_reasons)] - use std::cell::RefCell; use std::hint::black_box; use std::sync::atomic::{AtomicUsize, Ordering}; diff --git a/src/common/common_service/src/lib.rs b/src/common/common_service/src/lib.rs index 2cf9a56e076f3..ecf89a84fce88 100644 --- a/src/common/common_service/src/lib.rs +++ b/src/common/common_service/src/lib.rs @@ -14,7 +14,6 @@ // This is a stub lib.rs. -#![feature(lint_reasons)] #![feature(impl_trait_in_assoc_type)] #![feature(error_generic_member_access)] diff --git a/src/common/metrics/src/guarded_metrics.rs b/src/common/metrics/src/guarded_metrics.rs index 27710748ae359..9b16cc778938c 100644 --- a/src/common/metrics/src/guarded_metrics.rs +++ b/src/common/metrics/src/guarded_metrics.rs @@ -83,6 +83,22 @@ macro_rules! register_guarded_int_gauge_vec_with_registry { }}; } +#[macro_export] +macro_rules! register_guarded_uint_gauge_vec_with_registry { + ($NAME:expr, $HELP:expr, $LABELS_NAMES:expr, $REGISTRY:expr $(,)?) => {{ + let inner = prometheus::core::GenericGaugeVec::::new( + prometheus::opts!($NAME, $HELP), + $LABELS_NAMES, + ); + inner.and_then(|inner| { + let inner = $crate::__extract_gauge_builder(inner); + let label_guarded = $crate::LabelGuardedUintGaugeVec::new(inner, { $LABELS_NAMES }); + let result = ($REGISTRY).register(Box::new(label_guarded.clone())); + result.map(move |()| label_guarded) + }) + }}; +} + #[macro_export] macro_rules! register_guarded_int_counter_vec_with_registry { ($NAME:expr, $HELP:expr, $LABELS_NAMES:expr, $REGISTRY:expr $(,)?) => {{ @@ -131,6 +147,8 @@ pub type LabelGuardedIntCounterVec = LabelGuardedMetricVec, N>; pub type LabelGuardedIntGaugeVec = LabelGuardedMetricVec, N>; +pub type LabelGuardedUintGaugeVec = + LabelGuardedMetricVec, N>; pub type LabelGuardedGaugeVec = LabelGuardedMetricVec, N>; diff --git a/src/common/src/array/arrow/arrow_iceberg.rs b/src/common/src/array/arrow/arrow_iceberg.rs index ff23bc102ee6b..80c0a3dab1667 100644 --- a/src/common/src/array/arrow/arrow_iceberg.rs +++ b/src/common/src/array/arrow/arrow_iceberg.rs @@ -12,6 +12,8 @@ // See the License for the specific language governing permissions and // limitations under the License. +use std::cell::RefCell; +use std::collections::HashMap; use std::ops::{Div, Mul}; use std::sync::Arc; @@ -138,12 +140,8 @@ impl ToArrow for IcebergArrowConvert { let scale = e.scale() as i8; let diff_scale = abs(max_scale - scale); let value = match scale { - _ if scale < max_scale => { - value.mul(10_i32.pow(diff_scale as u32) as i128) - } - _ if scale > max_scale => { - value.div(10_i32.pow(diff_scale as u32) as i128) - } + _ if scale < max_scale => value.mul(10_i128.pow(diff_scale as u32)), + _ if scale > max_scale => value.div(10_i128.pow(diff_scale as u32)), _ => value, }; Some(value) @@ -171,6 +169,94 @@ impl ToArrow for IcebergArrowConvert { impl FromArrow for IcebergArrowConvert {} +/// Iceberg sink with `create_table_if_not_exists` option will use this struct to convert the +/// iceberg data type to arrow data type. Specifically, it will add the field id to the +/// arrow field metadata, because iceberg-rust and icelake need the field id to be set. +/// +/// Note: this is different from [`IcebergArrowConvert`], which is used to read from/write to +/// an _existing_ iceberg table. In that case, we just need to make sure the data is compatible to the existing schema. +/// But to _create a new table_, we need to meet more requirements of iceberg. +#[derive(Default)] +pub struct IcebergCreateTableArrowConvert { + next_field_id: RefCell, +} + +impl IcebergCreateTableArrowConvert { + pub fn to_arrow_field( + &self, + name: &str, + data_type: &DataType, + ) -> Result { + ToArrow::to_arrow_field(self, name, data_type) + } + + fn add_field_id(&self, arrow_field: &mut arrow_schema::Field) { + *self.next_field_id.borrow_mut() += 1; + let field_id = *self.next_field_id.borrow(); + + let mut metadata = HashMap::new(); + // for iceberg-rust + metadata.insert("PARQUET:field_id".to_string(), field_id.to_string()); + // for icelake + metadata.insert("column_id".to_string(), field_id.to_string()); + arrow_field.set_metadata(metadata); + } +} + +impl ToArrow for IcebergCreateTableArrowConvert { + #[inline] + fn decimal_type_to_arrow(&self, name: &str) -> arrow_schema::Field { + // To create a iceberg table, we need a decimal type with precision and scale to be set + // We choose 28 here + // The decimal type finally will be converted to an iceberg decimal type. + // Iceberg decimal(P,S) + // Fixed-point decimal; precision P, scale S Scale is fixed, precision must be 38 or less. + let data_type = arrow_schema::DataType::Decimal128(28, 10); + + let mut arrow_field = arrow_schema::Field::new(name, data_type, true); + self.add_field_id(&mut arrow_field); + arrow_field + } + + /// Convert RisingWave data type to Arrow data type. + /// + /// This function returns a `Field` instead of `DataType` because some may be converted to + /// extension types which require additional metadata in the field. + fn to_arrow_field( + &self, + name: &str, + value: &DataType, + ) -> Result { + let data_type = match value { + // using the inline function + DataType::Boolean => self.bool_type_to_arrow(), + DataType::Int16 => self.int16_type_to_arrow(), + DataType::Int32 => self.int32_type_to_arrow(), + DataType::Int64 => self.int64_type_to_arrow(), + DataType::Int256 => self.int256_type_to_arrow(), + DataType::Float32 => self.float32_type_to_arrow(), + DataType::Float64 => self.float64_type_to_arrow(), + DataType::Date => self.date_type_to_arrow(), + DataType::Time => self.time_type_to_arrow(), + DataType::Timestamp => self.timestamp_type_to_arrow(), + DataType::Timestamptz => self.timestamptz_type_to_arrow(), + DataType::Interval => self.interval_type_to_arrow(), + DataType::Varchar => self.varchar_type_to_arrow(), + DataType::Bytea => self.bytea_type_to_arrow(), + DataType::Serial => self.serial_type_to_arrow(), + DataType::Decimal => return Ok(self.decimal_type_to_arrow(name)), + DataType::Jsonb => return Ok(self.jsonb_type_to_arrow(name)), + DataType::Struct(fields) => self.struct_type_to_arrow(fields)?, + DataType::List(datatype) => self.list_type_to_arrow(datatype)?, + DataType::Map(datatype) => self.map_type_to_arrow(datatype)?, + }; + + let mut arrow_field = arrow_schema::Field::new(name, data_type, true); + self.add_field_id(&mut arrow_field); + Ok(arrow_field) + } +} + #[cfg(test)] mod test { use std::sync::Arc; @@ -207,4 +293,30 @@ mod test { ) as ArrayRef; assert_eq!(&arrow_array, &expect_array); } + + #[test] + fn decimal_with_large_scale() { + let array = DecimalArray::from_iter([ + None, + Some(Decimal::NaN), + Some(Decimal::PositiveInf), + Some(Decimal::NegativeInf), + Some(Decimal::Normalized("123.4".parse().unwrap())), + Some(Decimal::Normalized("123.456".parse().unwrap())), + ]); + let ty = DataType::Decimal128(28, 10); + let arrow_array = IcebergArrowConvert.decimal_to_arrow(&ty, &array).unwrap(); + let expect_array = Arc::new( + Decimal128Array::from(vec![ + None, + None, + Some(9999999999999999999999999999), + Some(-9999999999999999999999999999), + Some(1234000000000), + Some(1234560000000), + ]) + .with_data_type(ty), + ) as ArrayRef; + assert_eq!(&arrow_array, &expect_array); + } } diff --git a/src/common/src/array/arrow/arrow_impl.rs b/src/common/src/array/arrow/arrow_impl.rs index acc39bc951975..8fa3e2abb6b5f 100644 --- a/src/common/src/array/arrow/arrow_impl.rs +++ b/src/common/src/array/arrow/arrow_impl.rs @@ -448,12 +448,17 @@ pub trait ToArrow { #[inline] fn map_type_to_arrow(&self, map_type: &MapType) -> Result { let sorted = false; - let list_type = map_type.clone().into_list(); + // "key" is always non-null + let key = self + .to_arrow_field("key", map_type.key())? + .with_nullable(false); + let value = self.to_arrow_field("value", map_type.value())?; Ok(arrow_schema::DataType::Map( Arc::new(arrow_schema::Field::new( "entries", - self.list_type_to_arrow(&list_type)?, - true, + arrow_schema::DataType::Struct([Arc::new(key), Arc::new(value)].into()), + // "entries" is always non-null + false, )), sorted, )) diff --git a/src/common/src/array/arrow/arrow_udf.rs b/src/common/src/array/arrow/arrow_udf.rs index e461f49e576a6..a5296ca21cab8 100644 --- a/src/common/src/array/arrow/arrow_udf.rs +++ b/src/common/src/array/arrow/arrow_udf.rs @@ -125,6 +125,7 @@ impl FromArrow for UdfArrowConvert { #[cfg(test)] mod tests { + use super::*; use crate::array::*; @@ -205,4 +206,120 @@ mod tests { .unwrap(); assert_eq!(rw_array.as_list(), &array); } + + #[test] + fn map() { + let map_type = MapType::from_kv(DataType::Varchar, DataType::Int32); + let rw_map_type = DataType::Map(map_type.clone()); + let mut builder = MapArrayBuilder::with_type(3, rw_map_type.clone()); + builder.append_owned(Some( + MapValue::try_from_kv( + ListValue::from_str("{a,b,c}", &DataType::List(Box::new(DataType::Varchar))) + .unwrap(), + ListValue::from_str("{1,2,3}", &DataType::List(Box::new(DataType::Int32))).unwrap(), + ) + .unwrap(), + )); + builder.append_owned(None); + builder.append_owned(Some( + MapValue::try_from_kv( + ListValue::from_str("{a,c}", &DataType::List(Box::new(DataType::Varchar))).unwrap(), + ListValue::from_str("{1,3}", &DataType::List(Box::new(DataType::Int32))).unwrap(), + ) + .unwrap(), + )); + let rw_array = builder.finish(); + + let arrow_map_type = UdfArrowConvert::default() + .map_type_to_arrow(&map_type) + .unwrap(); + expect_test::expect![[r#" + Map( + Field { + name: "entries", + data_type: Struct( + [ + Field { + name: "key", + data_type: Utf8, + nullable: false, + dict_id: 0, + dict_is_ordered: false, + metadata: {}, + }, + Field { + name: "value", + data_type: Int32, + nullable: true, + dict_id: 0, + dict_is_ordered: false, + metadata: {}, + }, + ], + ), + nullable: false, + dict_id: 0, + dict_is_ordered: false, + metadata: {}, + }, + false, + ) + "#]] + .assert_debug_eq(&arrow_map_type); + let rw_map_type_new = UdfArrowConvert::default() + .from_field(&arrow_schema::Field::new( + "map", + arrow_map_type.clone(), + true, + )) + .unwrap(); + assert_eq!(rw_map_type, rw_map_type_new); + let arrow = UdfArrowConvert::default() + .map_to_arrow(&arrow_map_type, &rw_array) + .unwrap(); + expect_test::expect![[r#" + MapArray + [ + StructArray + [ + -- child 0: "key" (Utf8) + StringArray + [ + "a", + "b", + "c", + ] + -- child 1: "value" (Int32) + PrimitiveArray + [ + 1, + 2, + 3, + ] + ], + null, + StructArray + [ + -- child 0: "key" (Utf8) + StringArray + [ + "a", + "c", + ] + -- child 1: "value" (Int32) + PrimitiveArray + [ + 1, + 3, + ] + ], + ] + "#]] + .assert_debug_eq(&arrow); + + let rw_array_new = UdfArrowConvert::default() + .from_map_array(arrow.as_any().downcast_ref().unwrap()) + .unwrap(); + assert_eq!(&rw_array, rw_array_new.as_map()); + } } diff --git a/src/common/src/array/arrow/mod.rs b/src/common/src/array/arrow/mod.rs index fd9f55ee09f7e..d519d62f9935a 100644 --- a/src/common/src/array/arrow/mod.rs +++ b/src/common/src/array/arrow/mod.rs @@ -17,7 +17,7 @@ mod arrow_iceberg; mod arrow_udf; pub use arrow_deltalake::DeltaLakeConvert; -pub use arrow_iceberg::IcebergArrowConvert; +pub use arrow_iceberg::{IcebergArrowConvert, IcebergCreateTableArrowConvert}; pub use arrow_udf::{FromArrow, ToArrow, UdfArrowConvert}; use crate::types::Interval; diff --git a/src/common/src/config.rs b/src/common/src/config.rs index d78fdbe51fa9b..e2b4dd7b0f97c 100644 --- a/src/common/src/config.rs +++ b/src/common/src/config.rs @@ -699,6 +699,9 @@ pub struct StorageConfig { #[serde(default)] pub prefetch_buffer_capacity_mb: Option, + #[serde(default)] + pub max_cached_recent_versions_number: Option, + /// max prefetch block number #[serde(default = "default::storage::max_prefetch_block_number")] pub max_prefetch_block_number: usize, diff --git a/src/common/src/lib.rs b/src/common/src/lib.rs index 8d47d0c621646..e3417853b0201 100644 --- a/src/common/src/lib.rs +++ b/src/common/src/lib.rs @@ -23,7 +23,6 @@ #![feature(test)] #![feature(trusted_len)] #![feature(allocator_api)] -#![feature(lint_reasons)] #![feature(coroutines)] #![feature(map_try_insert)] #![feature(error_generic_member_access)] @@ -76,7 +75,7 @@ pub mod memory; pub use risingwave_common_metrics::{ monitor, register_guarded_gauge_vec_with_registry, register_guarded_histogram_vec_with_registry, register_guarded_int_counter_vec_with_registry, - register_guarded_int_gauge_vec_with_registry, + register_guarded_int_gauge_vec_with_registry, register_guarded_uint_gauge_vec_with_registry, }; pub use { risingwave_common_metrics as metrics, risingwave_common_secret as secret, diff --git a/src/compute/src/lib.rs b/src/compute/src/lib.rs index d91fb56b1cb88..1336a84980cea 100644 --- a/src/compute/src/lib.rs +++ b/src/compute/src/lib.rs @@ -16,7 +16,6 @@ #![feature(coroutines)] #![feature(type_alias_impl_trait)] #![feature(let_chains)] -#![feature(lint_reasons)] #![feature(impl_trait_in_assoc_type)] #![cfg_attr(coverage, feature(coverage_attribute))] @@ -103,8 +102,9 @@ pub struct ComputeNodeOpts { pub role: Role, /// Used for control the metrics level, similar to log level. - /// 0 = disable metrics - /// >0 = enable metrics + /// + /// level = 0: disable metrics + /// level > 0: enable metrics #[clap(long, hide = true, env = "RW_METRICS_LEVEL")] #[override_opts(path = server.metrics_level)] pub metrics_level: Option, diff --git a/src/config/docs.md b/src/config/docs.md index 47905d71e5e0c..bcce61d8bb456 100644 --- a/src/config/docs.md +++ b/src/config/docs.md @@ -119,6 +119,7 @@ This page is automatically generated by `./risedev generate-example-config` | enable_fast_compaction | | true | | high_priority_ratio_in_percent | DEPRECATED: This config will be deprecated in the future version, use `storage.cache.block_cache_eviction.high_priority_ratio_in_percent` with `storage.cache.block_cache_eviction.algorithm = "Lru"` instead. | | | imm_merge_threshold | The threshold for the number of immutable memtables to merge to a new imm. | 0 | +| max_cached_recent_versions_number | | | | max_concurrent_compaction_task_number | | 16 | | max_prefetch_block_number | max prefetch block number | 16 | | max_preload_io_retry_times | | 3 | diff --git a/src/connector/Cargo.toml b/src/connector/Cargo.toml index a77e9cb929d17..2535847c98fe4 100644 --- a/src/connector/Cargo.toml +++ b/src/connector/Cargo.toml @@ -76,7 +76,7 @@ jni = { version = "0.21.1", features = ["invocation"] } jsonbb = { workspace = true } jsonwebtoken = "9.2.0" maplit = "1.0.2" -moka = { version = "0.12", features = ["future"] } +moka = { version = "0.12.0", features = ["future"] } mongodb = { version = "2.8.2", features = ["tokio-runtime"] } mysql_async = { version = "0.34", default-features = false, features = [ "default", @@ -105,7 +105,6 @@ prometheus = { version = "0.13", features = ["process"] } prost = { workspace = true, features = ["no-recursion-limit"] } prost-reflect = { version = "0.14", features = ["serde"] } prost-types = "0.13" -protobuf-native = "0.2.2" pulsar = { version = "6.3", default-features = false, features = [ "tokio-runtime", "telemetry", @@ -194,6 +193,7 @@ assert_matches = "1" criterion = { workspace = true, features = ["async_tokio", "async"] } deltalake = { workspace = true, features = ["datafusion"] } expect-test = "1" +fs-err = "2" paste = "1" pretty_assertions = "1" quote = "1" @@ -206,10 +206,6 @@ tracing-subscriber = "0.3" tracing-test = "0.2" walkdir = "2" -[build-dependencies] -prost-build = "0.12" -protobuf-src = "1" - [[bench]] name = "debezium_json_parser" harness = false diff --git a/src/connector/codec/Cargo.toml b/src/connector/codec/Cargo.toml index 5086549f4bf4c..5848c236dbd4d 100644 --- a/src/connector/codec/Cargo.toml +++ b/src/connector/codec/Cargo.toml @@ -26,6 +26,10 @@ itertools = { workspace = true } jsonbb = { workspace = true } jst = { package = 'jsonschema-transpiler', git = "https://github.com/mozilla/jsonschema-transpiler", rev = "c1a89d720d118843d8bcca51084deb0ed223e4b4" } num-bigint = "0.4" +prost = { workspace = true, features = ["no-recursion-limit"] } +prost-reflect = { version = "0.14", features = ["serde"] } +prost-types = "0.13" +protobuf-native = "0.2.2" risingwave_common = { workspace = true } risingwave_pb = { workspace = true } rust_decimal = "1" @@ -37,7 +41,13 @@ tracing = "0.1" [dev-dependencies] expect-test = "1" +fs-err = "2" hex = "0.4" +tokio = { version = "0.2", package = "madsim-tokio" } + +[build-dependencies] +prost-build = "0.12" +protobuf-src = "1" [target.'cfg(not(madsim))'.dependencies] workspace-hack = { path = "../../workspace-hack" } diff --git a/src/connector/build.rs b/src/connector/codec/build.rs similarity index 87% rename from src/connector/build.rs rename to src/connector/codec/build.rs index 6ef6e1629438c..8a9438d59b9e8 100644 --- a/src/connector/build.rs +++ b/src/connector/codec/build.rs @@ -13,17 +13,17 @@ // limitations under the License. fn main() { - let proto_dir = "./src/test_data/proto_recursive"; + let proto_dir = "./tests/test_data/"; println!("cargo:rerun-if-changed={}", proto_dir); - let proto_files = ["recursive"]; + let proto_files = ["recursive", "all-types"]; let protos: Vec = proto_files .iter() .map(|f| format!("{}/{}.proto", proto_dir, f)) .collect(); prost_build::Config::new() - .out_dir("./src/parser/protobuf") + .out_dir("./tests/integration_tests/protobuf") .compile_protos(&protos, &Vec::::new()) .unwrap(); diff --git a/src/connector/codec/src/common/mod.rs b/src/connector/codec/src/common/mod.rs new file mode 100644 index 0000000000000..c8a7ca35c4209 --- /dev/null +++ b/src/connector/codec/src/common/mod.rs @@ -0,0 +1,15 @@ +// Copyright 2024 RisingWave Labs +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +pub mod protobuf; diff --git a/src/connector/codec/src/common/protobuf/compiler.rs b/src/connector/codec/src/common/protobuf/compiler.rs new file mode 100644 index 0000000000000..80e86d002d4aa --- /dev/null +++ b/src/connector/codec/src/common/protobuf/compiler.rs @@ -0,0 +1,86 @@ +// Copyright 2024 RisingWave Labs +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use std::path::{Path, PathBuf}; + +use itertools::Itertools; + +macro_rules! embed_wkts { + [$( $path:literal ),+ $(,)?] => { + &[$( + ( + concat!("google/protobuf/", $path), + include_bytes!(concat!(env!("PROTO_INCLUDE"), "/google/protobuf/", $path)).as_slice(), + ) + ),+] + }; +} +const WELL_KNOWN_TYPES: &[(&str, &[u8])] = embed_wkts![ + "any.proto", + "api.proto", + "compiler/plugin.proto", + "descriptor.proto", + "duration.proto", + "empty.proto", + "field_mask.proto", + "source_context.proto", + "struct.proto", + "timestamp.proto", + "type.proto", + "wrappers.proto", +]; + +#[derive(Debug, thiserror::Error)] +pub enum PbCompileError { + #[error("build_file_descriptor_set failed\n{}", errs.iter().map(|e| format!("\t{e}")).join("\n"))] + Build { + errs: Vec, + }, + #[error("serialize descriptor set failed")] + Serialize, +} + +pub fn compile_pb( + main_file: (PathBuf, Vec), + dependencies: impl IntoIterator)>, +) -> Result, PbCompileError> { + use protobuf_native::compiler::{ + SimpleErrorCollector, SourceTreeDescriptorDatabase, VirtualSourceTree, + }; + use protobuf_native::MessageLite; + + let root = main_file.0.clone(); + + let mut source_tree = VirtualSourceTree::new(); + for (path, bytes) in std::iter::once(main_file).chain(dependencies.into_iter()) { + source_tree.as_mut().add_file(&path, bytes); + } + for (path, bytes) in WELL_KNOWN_TYPES { + source_tree + .as_mut() + .add_file(Path::new(path), bytes.to_vec()); + } + + let mut error_collector = SimpleErrorCollector::new(); + // `db` needs to be dropped before we can iterate on `error_collector`. + let fds = { + let mut db = SourceTreeDescriptorDatabase::new(source_tree.as_mut()); + db.as_mut().record_errors_to(error_collector.as_mut()); + db.as_mut().build_file_descriptor_set(&[root]) + } + .map_err(|_| PbCompileError::Build { + errs: error_collector.as_mut().collect(), + })?; + fds.serialize().map_err(|_| PbCompileError::Serialize) +} diff --git a/src/connector/codec/src/common/protobuf/mod.rs b/src/connector/codec/src/common/protobuf/mod.rs new file mode 100644 index 0000000000000..f630dedf0d240 --- /dev/null +++ b/src/connector/codec/src/common/protobuf/mod.rs @@ -0,0 +1,16 @@ +// Copyright 2024 RisingWave Labs +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +mod compiler; +pub use compiler::compile_pb; diff --git a/src/connector/codec/src/decoder/mod.rs b/src/connector/codec/src/decoder/mod.rs index bbfdbf0a90d79..e3e579ed36ec1 100644 --- a/src/connector/codec/src/decoder/mod.rs +++ b/src/connector/codec/src/decoder/mod.rs @@ -14,6 +14,7 @@ pub mod avro; pub mod json; +pub mod protobuf; pub mod utils; use risingwave_common::error::NotImplemented; diff --git a/src/connector/src/parser/unified/protobuf.rs b/src/connector/codec/src/decoder/protobuf/mod.rs similarity index 84% rename from src/connector/src/parser/unified/protobuf.rs rename to src/connector/codec/src/decoder/protobuf/mod.rs index b1d34746b5029..7ad357fef50fb 100644 --- a/src/connector/src/parser/unified/protobuf.rs +++ b/src/connector/codec/src/decoder/protobuf/mod.rs @@ -12,17 +12,17 @@ // See the License for the specific language governing permissions and // limitations under the License. +pub mod parser; use std::borrow::Cow; use std::sync::LazyLock; +use parser::from_protobuf_value; use prost_reflect::{DynamicMessage, ReflectMessage}; use risingwave_common::log::LogSuppresser; use risingwave_common::types::{DataType, DatumCow, ToOwnedDatum}; use thiserror_ext::AsReport; -use super::{Access, AccessResult}; -use crate::parser::from_protobuf_value; -use crate::parser::unified::uncategorized; +use super::{uncategorized, Access, AccessResult}; pub struct ProtobufAccess { message: DynamicMessage, @@ -32,14 +32,15 @@ impl ProtobufAccess { pub fn new(message: DynamicMessage) -> Self { Self { message } } + + #[cfg(test)] + pub fn descriptor(&self) -> prost_reflect::MessageDescriptor { + self.message.descriptor() + } } impl Access for ProtobufAccess { - fn access<'a>( - &'a self, - path: &[&str], - _type_expected: &DataType, - ) -> AccessResult> { + fn access<'a>(&'a self, path: &[&str], type_expected: &DataType) -> AccessResult> { debug_assert_eq!(1, path.len()); let field_desc = self .message @@ -55,10 +56,10 @@ impl Access for ProtobufAccess { })?; match self.message.get_field(&field_desc) { - Cow::Borrowed(value) => from_protobuf_value(&field_desc, value), + Cow::Borrowed(value) => from_protobuf_value(&field_desc, value, type_expected), // `Owned` variant occurs only if there's no such field and the default value is returned. - Cow::Owned(value) => from_protobuf_value(&field_desc, &value) + Cow::Owned(value) => from_protobuf_value(&field_desc, &value, type_expected) // enforce `Owned` variant to avoid returning a reference to a temporary value .map(|d| d.to_owned_datum().into()), } diff --git a/src/connector/codec/src/decoder/protobuf/parser.rs b/src/connector/codec/src/decoder/protobuf/parser.rs new file mode 100644 index 0000000000000..852fa9cca48d6 --- /dev/null +++ b/src/connector/codec/src/decoder/protobuf/parser.rs @@ -0,0 +1,275 @@ +// Copyright 2024 RisingWave Labs +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use anyhow::Context; +use itertools::Itertools; +use prost_reflect::{Cardinality, FieldDescriptor, Kind, MessageDescriptor, ReflectMessage, Value}; +use risingwave_common::array::{ListValue, StructValue}; +use risingwave_common::types::{ + DataType, DatumCow, Decimal, JsonbVal, MapType, MapValue, ScalarImpl, ToOwnedDatum, F32, F64, +}; +use risingwave_pb::plan_common::{AdditionalColumn, ColumnDesc, ColumnDescVersion}; +use thiserror::Error; +use thiserror_ext::Macro; + +use crate::decoder::{uncategorized, AccessError, AccessResult}; + +pub fn pb_schema_to_column_descs( + message_descriptor: &MessageDescriptor, +) -> anyhow::Result> { + let mut columns = Vec::with_capacity(message_descriptor.fields().len()); + let mut index = 0; + let mut parse_trace: Vec = vec![]; + for field in message_descriptor.fields() { + columns.push(pb_field_to_col_desc(&field, &mut index, &mut parse_trace)?); + } + + Ok(columns) +} + +/// Maps a protobuf field to a RW column. +fn pb_field_to_col_desc( + field_descriptor: &FieldDescriptor, + index: &mut i32, + parse_trace: &mut Vec, +) -> anyhow::Result { + let field_type = protobuf_type_mapping(field_descriptor, parse_trace) + .context("failed to map protobuf type")?; + if let Kind::Message(m) = field_descriptor.kind() { + let field_descs = if let DataType::List { .. } = field_type { + vec![] + } else { + m.fields() + .map(|f| pb_field_to_col_desc(&f, index, parse_trace)) + .try_collect()? + }; + *index += 1; + Ok(ColumnDesc { + column_id: *index, + name: field_descriptor.name().to_string(), + column_type: Some(field_type.to_protobuf()), + field_descs, + type_name: m.full_name().to_string(), + generated_or_default_column: None, + description: None, + additional_column_type: 0, // deprecated + additional_column: Some(AdditionalColumn { column_type: None }), + version: ColumnDescVersion::Pr13707 as i32, + }) + } else { + *index += 1; + Ok(ColumnDesc { + column_id: *index, + name: field_descriptor.name().to_string(), + column_type: Some(field_type.to_protobuf()), + additional_column: Some(AdditionalColumn { column_type: None }), + version: ColumnDescVersion::Pr13707 as i32, + ..Default::default() + }) + } +} + +#[derive(Error, Debug, Macro)] +#[error("{0}")] +struct ProtobufTypeError(#[message] String); + +fn detect_loop_and_push( + trace: &mut Vec, + fd: &FieldDescriptor, +) -> std::result::Result<(), ProtobufTypeError> { + let identifier = format!("{}({})", fd.name(), fd.full_name()); + if trace.iter().any(|s| s == identifier.as_str()) { + bail_protobuf_type_error!( + "circular reference detected: {}, conflict with {}, kind {:?}", + trace.iter().format("->"), + identifier, + fd.kind(), + ); + } + trace.push(identifier); + Ok(()) +} + +pub fn from_protobuf_value<'a>( + field_desc: &FieldDescriptor, + value: &'a Value, + type_expected: &DataType, +) -> AccessResult> { + let kind = field_desc.kind(); + + macro_rules! borrowed { + ($v:expr) => { + return Ok(DatumCow::Borrowed(Some($v.into()))) + }; + } + + let v: ScalarImpl = match value { + Value::Bool(v) => ScalarImpl::Bool(*v), + Value::I32(i) => ScalarImpl::Int32(*i), + Value::U32(i) => ScalarImpl::Int64(*i as i64), + Value::I64(i) => ScalarImpl::Int64(*i), + Value::U64(i) => ScalarImpl::Decimal(Decimal::from(*i)), + Value::F32(f) => ScalarImpl::Float32(F32::from(*f)), + Value::F64(f) => ScalarImpl::Float64(F64::from(*f)), + Value::String(s) => borrowed!(s.as_str()), + Value::EnumNumber(idx) => { + let enum_desc = kind.as_enum().ok_or_else(|| AccessError::TypeError { + expected: "enum".to_owned(), + got: format!("{kind:?}"), + value: value.to_string(), + })?; + let enum_symbol = enum_desc.get_value(*idx).ok_or_else(|| { + uncategorized!("unknown enum index {} of enum {:?}", idx, enum_desc) + })?; + ScalarImpl::Utf8(enum_symbol.name().into()) + } + Value::Message(dyn_msg) => { + if dyn_msg.descriptor().full_name() == "google.protobuf.Any" { + ScalarImpl::Jsonb(JsonbVal::from( + serde_json::to_value(dyn_msg).map_err(AccessError::ProtobufAnyToJson)?, + )) + } else { + let desc = dyn_msg.descriptor(); + let DataType::Struct(st) = type_expected else { + return Err(AccessError::TypeError { + expected: type_expected.to_string(), + got: desc.full_name().to_string(), + value: value.to_string(), // Protobuf TEXT + }); + }; + + let mut rw_values = Vec::with_capacity(st.len()); + for (name, expected_field_type) in st.iter() { + let Some(field_desc) = desc.get_field_by_name(name) else { + // Field deleted in protobuf. Fallback to SQL NULL (of proper RW type). + rw_values.push(None); + continue; + }; + let value = dyn_msg.get_field(&field_desc); + rw_values.push( + from_protobuf_value(&field_desc, &value, expected_field_type)? + .to_owned_datum(), + ); + } + ScalarImpl::Struct(StructValue::new(rw_values)) + } + } + Value::List(values) => { + let DataType::List(element_type) = type_expected else { + return Err(AccessError::TypeError { + expected: type_expected.to_string(), + got: format!("repeated {:?}", kind), + value: value.to_string(), // Protobuf TEXT + }); + }; + let mut builder = element_type.create_array_builder(values.len()); + for value in values { + builder.append(from_protobuf_value(field_desc, value, element_type)?); + } + ScalarImpl::List(ListValue::new(builder.finish())) + } + Value::Bytes(value) => borrowed!(&**value), + Value::Map(map) => { + let err = || { + AccessError::TypeError { + expected: type_expected.to_string(), + got: format!("{:?}", kind), + value: value.to_string(), // Protobuf TEXT + } + }; + + let DataType::Map(map_type) = type_expected else { + return Err(err()); + }; + if !field_desc.is_map() { + return Err(err()); + } + let map_desc = kind.as_message().ok_or_else(err)?; + + let mut key_builder = map_type.key().create_array_builder(map.len()); + let mut value_builder = map_type.value().create_array_builder(map.len()); + // NOTE: HashMap's iter order is non-deterministic, but MapValue's + // order matters. We sort by key here to have deterministic order + // in tests. We might consider removing this, or make all MapValue sorted + // in the future. + for (key, value) in map.iter().sorted_by_key(|(k, _v)| *k) { + key_builder.append(from_protobuf_value( + &map_desc.map_entry_key_field(), + &key.clone().into(), + map_type.key(), + )?); + value_builder.append(from_protobuf_value( + &map_desc.map_entry_value_field(), + value, + map_type.value(), + )?); + } + let keys = key_builder.finish(); + let values = value_builder.finish(); + ScalarImpl::Map( + MapValue::try_from_kv(ListValue::new(keys), ListValue::new(values)) + .map_err(|e| uncategorized!("failed to convert protobuf map: {e}"))?, + ) + } + }; + Ok(Some(v).into()) +} + +/// Maps protobuf type to RW type. +fn protobuf_type_mapping( + field_descriptor: &FieldDescriptor, + parse_trace: &mut Vec, +) -> std::result::Result { + detect_loop_and_push(parse_trace, field_descriptor)?; + let mut t = match field_descriptor.kind() { + Kind::Bool => DataType::Boolean, + Kind::Double => DataType::Float64, + Kind::Float => DataType::Float32, + Kind::Int32 | Kind::Sint32 | Kind::Sfixed32 => DataType::Int32, + // Fixed32 represents [0, 2^32 - 1]. It's equal to u32. + Kind::Int64 | Kind::Sint64 | Kind::Sfixed64 | Kind::Uint32 | Kind::Fixed32 => { + DataType::Int64 + } + Kind::Uint64 | Kind::Fixed64 => DataType::Decimal, + Kind::String => DataType::Varchar, + Kind::Message(m) => { + if m.full_name() == "google.protobuf.Any" { + // Well-Known Types are identified by their full name + DataType::Jsonb + } else if m.is_map_entry() { + // Map is equivalent to `repeated MapFieldEntry map_field = N;` + debug_assert!(field_descriptor.is_map()); + let key = protobuf_type_mapping(&m.map_entry_key_field(), parse_trace)?; + let value = protobuf_type_mapping(&m.map_entry_value_field(), parse_trace)?; + _ = parse_trace.pop(); + return Ok(DataType::Map(MapType::from_kv(key, value))); + } else { + let fields = m + .fields() + .map(|f| protobuf_type_mapping(&f, parse_trace)) + .try_collect()?; + let field_names = m.fields().map(|f| f.name().to_string()).collect_vec(); + DataType::new_struct(fields, field_names) + } + } + Kind::Enum(_) => DataType::Varchar, + Kind::Bytes => DataType::Bytea, + }; + if field_descriptor.cardinality() == Cardinality::Repeated { + debug_assert!(!field_descriptor.is_map()); + t = DataType::List(Box::new(t)) + } + _ = parse_trace.pop(); + Ok(t) +} diff --git a/src/connector/codec/src/lib.rs b/src/connector/codec/src/lib.rs index cbf0ad14046f7..d3f0a8c6ec2cf 100644 --- a/src/connector/codec/src/lib.rs +++ b/src/connector/codec/src/lib.rs @@ -21,7 +21,6 @@ #![feature(stmt_expr_attributes)] #![feature(box_patterns)] #![feature(trait_alias)] -#![feature(lint_reasons)] #![feature(let_chains)] #![feature(box_into_inner)] #![feature(type_alias_impl_trait)] @@ -38,6 +37,7 @@ #![register_tool(rw)] #![recursion_limit = "256"] +pub mod common; /// Converts JSON/AVRO/Protobuf data to RisingWave datum. /// The core API is [`decoder::Access`]. pub mod decoder; diff --git a/src/connector/codec/tests/integration_tests/avro.rs b/src/connector/codec/tests/integration_tests/avro.rs index 11275f45e9783..ab1df6e7e82b8 100644 --- a/src/connector/codec/tests/integration_tests/avro.rs +++ b/src/connector/codec/tests/integration_tests/avro.rs @@ -64,33 +64,11 @@ fn avro_schema_str_to_risingwave_schema( Ok((resolved_schema, rw_schema)) } -/// Data driven testing for converting Avro Schema to RisingWave Schema, and then converting Avro data into RisingWave data. -/// -/// The expected results can be automatically updated. To run and update the tests: -/// ```bash -/// UPDATE_EXPECT=1 cargo test -p risingwave_connector_codec -/// ``` -/// Or use Rust Analyzer. Refer to . +/// Refer to [crate level documentation](crate) for the ideas. /// /// ## Arguments /// - `avro_schema`: Avro schema in JSON format. /// - `avro_data`: list of Avro data. Refer to [`TestDataEncoding`] for the format. -/// -/// ## Why not directly test the uppermost layer `AvroParserConfig` and `AvroAccessBuilder`? -/// -/// Because their interface are not clean enough, and have complex logic like schema registry. -/// We might need to separate logic to make them clenaer and then we can use it directly for testing. -/// -/// ## If we reimplement a similar logic here, what are we testing? -/// -/// Basically unit tests of `avro_schema_to_column_descs`, `convert_to_datum`, i.e., the type mapping. -/// -/// It makes some sense, as the data parsing logic is generally quite simple (one-liner), and the most -/// complex and error-prone part is the type mapping. -/// -/// ## Why test schema mapping and data mapping together? -/// -/// Because the expected data type for data mapping comes from the schema mapping. #[track_caller] fn check( avro_schema: &str, @@ -992,10 +970,10 @@ fn test_map() { map_map_int(#2): Jsonb, ]"#]], expect![[r#" - Owned(Jsonb(JsonbRef({"a": "x", "b": "y"}))) - Owned(Jsonb(JsonbRef({"m1": {"a": Number(1), "b": Number(2)}, "m2": {"c": Number(3), "d": Number(4)}}))) + Owned(Jsonb({"a": "x", "b": "y"})) + Owned(Jsonb({"m1": {"a": 1, "b": 2}, "m2": {"c": 3, "d": 4}})) ---- - Owned(Jsonb(JsonbRef({}))) - Owned(Jsonb(JsonbRef({})))"#]], + Owned(Jsonb({})) + Owned(Jsonb({}))"#]], ); } diff --git a/src/connector/codec/tests/integration_tests/main.rs b/src/connector/codec/tests/integration_tests/main.rs index 8c718f918d0a6..010fe05936517 100644 --- a/src/connector/codec/tests/integration_tests/main.rs +++ b/src/connector/codec/tests/integration_tests/main.rs @@ -12,6 +12,31 @@ // See the License for the specific language governing permissions and // limitations under the License. +//! Data driven testing for converting Avro/Protobuf Schema to RisingWave Schema, and then converting Avro/Protobuf data into RisingWave data. +//! +//! The expected results can be automatically updated. To run and update the tests: +//! ```bash +//! UPDATE_EXPECT=1 cargo test -p risingwave_connector_codec +//! ``` +//! Or use Rust Analyzer. Refer to . +//! +//! ## Why not directly test the uppermost layer `AvroParserConfig` and `AvroAccessBuilder`? +//! +//! Because their interface are not clean enough, and have complex logic like schema registry. +//! We might need to separate logic to make them cleaner and then we can use it directly for testing. +//! +//! ## If we reimplement a similar logic here, what are we testing? +//! +//! Basically unit tests of `avro_schema_to_column_descs`, `convert_to_datum`, i.e., the type mapping. +//! +//! It makes some sense, as the data parsing logic is generally quite simple (one-liner), and the most +//! complex and error-prone part is the type mapping. +//! +//! ## Why test schema mapping and data mapping together? +//! +//! Because the expected data type for data mapping comes from the schema mapping. + mod avro; +mod protobuf; pub mod utils; diff --git a/src/connector/codec/tests/integration_tests/protobuf.rs b/src/connector/codec/tests/integration_tests/protobuf.rs new file mode 100644 index 0000000000000..9a70ef5e5c7a9 --- /dev/null +++ b/src/connector/codec/tests/integration_tests/protobuf.rs @@ -0,0 +1,719 @@ +// Copyright 2024 RisingWave Labs +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#[rustfmt::skip] +#[allow(clippy::all)] +mod recursive; +#[rustfmt::skip] +#[allow(clippy::all)] +mod all_types; +use std::collections::HashMap; +use std::path::PathBuf; + +use anyhow::Context; +use prost::Message; +use prost_reflect::{DescriptorPool, DynamicMessage, MessageDescriptor}; +use risingwave_connector_codec::common::protobuf::compile_pb; +use risingwave_connector_codec::decoder::protobuf::parser::*; +use risingwave_connector_codec::decoder::protobuf::ProtobufAccess; +use risingwave_connector_codec::decoder::Access; +use thiserror_ext::AsReport; + +use crate::utils::*; + +/// Refer to [crate level documentation](crate) for the ideas. +#[track_caller] +fn check( + pb_schema: MessageDescriptor, + pb_data: &[&[u8]], + expected_risingwave_schema: expect_test::Expect, + expected_risingwave_data: expect_test::Expect, +) { + let rw_schema = pb_schema_to_column_descs(&pb_schema); + + if let Err(e) = rw_schema { + expected_risingwave_schema.assert_eq(&e.to_report_string_pretty()); + expected_risingwave_data.assert_eq(""); + return; + } + + let rw_schema = rw_schema + .unwrap() + .iter() + .map(ColumnDesc::from) + .collect_vec(); + expected_risingwave_schema.assert_eq(&format!( + "{:#?}", + rw_schema.iter().map(ColumnDescTestDisplay).collect_vec() + )); + + let mut data_str = vec![]; + for data in pb_data { + let access = ProtobufAccess::new(DynamicMessage::decode(pb_schema.clone(), *data).unwrap()); + let mut row = vec![]; + for col in &rw_schema { + let rw_data = access.access(&[&col.name], &col.data_type); + match rw_data { + Ok(data) => row.push(format!("{:#?}", DatumCowTestDisplay(&data))), + Err(e) => row.push(format!( + "~~~~\nError at column `{}`: {}\n~~~~", + col.name, + e.to_report_string() + )), + } + } + data_str.push(format!("{}", row.iter().format("\n"))); + } + + expected_risingwave_data.assert_eq(&format!( + "{}", + data_str + .iter() + .format("\n================================================================\n") + )); +} + +fn load_message_descriptor( + file_name: &str, + message_name: &str, +) -> anyhow::Result { + let location = "tests/test_data/".to_string() + file_name; + let file_content = fs_err::read(&location).unwrap(); + let schema_bytes = if file_name.ends_with(".proto") { + compile_pb((PathBuf::from(&location), file_content), [])? + } else { + file_content + }; + let pool = DescriptorPool::decode(schema_bytes.as_slice()) + .with_context(|| format!("cannot build descriptor pool from schema `{location}`"))?; + + pool.get_message_by_name(message_name).with_context(|| { + format!( + "cannot find message `{}` in schema `{}`", + message_name, location, + ) + }) +} + +#[test] +fn test_simple_schema() -> anyhow::Result<()> { + // Id: 123, + // Address: "test address", + // City: "test city", + // Zipcode: 456, + // Rate: 1.2345, + // Date: "2021-01-01" + static PRE_GEN_PROTO_DATA: &[u8] = b"\x08\x7b\x12\x0c\x74\x65\x73\x74\x20\x61\x64\x64\x72\x65\x73\x73\x1a\x09\x74\x65\x73\x74\x20\x63\x69\x74\x79\x20\xc8\x03\x2d\x19\x04\x9e\x3f\x32\x0a\x32\x30\x32\x31\x2d\x30\x31\x2d\x30\x31"; + + let message_descriptor = + load_message_descriptor("simple-schema.proto", "test.TestRecord").unwrap(); + + // validate the binary data is correct + let value = DynamicMessage::decode(message_descriptor.clone(), PRE_GEN_PROTO_DATA).unwrap(); + expect![[r#" + [ + I32( + 123, + ), + String( + "test address", + ), + String( + "test city", + ), + I64( + 456, + ), + F32( + 1.2345, + ), + String( + "2021-01-01", + ), + ] + "#]] + .assert_debug_eq(&value.fields().map(|f| f.1).collect_vec()); + + check( + message_descriptor, + &[PRE_GEN_PROTO_DATA], + expect![[r#" + [ + id(#1): Int32, + address(#2): Varchar, + city(#3): Varchar, + zipcode(#4): Int64, + rate(#5): Float32, + date(#6): Varchar, + ]"#]], + expect![[r#" + Owned(Int32(123)) + Borrowed(Utf8("test address")) + Borrowed(Utf8("test city")) + Owned(Int64(456)) + Owned(Float32(OrderedFloat(1.2345))) + Borrowed(Utf8("2021-01-01"))"#]], + ); + + Ok(()) +} + +#[test] +fn test_complex_schema() -> anyhow::Result<()> { + let message_descriptor = load_message_descriptor("complex-schema.proto", "test.User").unwrap(); + + check( + message_descriptor, + &[], + expect![[r#" + [ + id(#1): Int32, + code(#2): Varchar, + timestamp(#3): Int64, + xfas(#4): List( + Struct { + device_model_id: Int32, + device_make_id: Int32, + ip: Varchar, + }, + ), type_name: test.Xfa, + contacts(#7): Struct { + emails: List(Varchar), + phones: List(Varchar), + }, type_name: test.Contacts, field_descs: [emails(#5): List(Varchar), phones(#6): List(Varchar)], + sex(#8): Varchar, + ]"#]], + expect![""], + ); + + Ok(()) +} + +#[test] +fn test_any_schema() -> anyhow::Result<()> { + let message_descriptor = load_message_descriptor("any-schema.proto", "test.TestAny").unwrap(); + + // id: 12345 + // name { + // type_url: "type.googleapis.com/test.Int32Value" + // value: "\010\322\376\006" + // } + // Unpacked Int32Value from Any: value: 114514 + static ANY_DATA_1: &[u8] = b"\x08\xb9\x60\x12\x2b\x0a\x23\x74\x79\x70\x65\x2e\x67\x6f\x6f\x67\x6c\x65\x61\x70\x69\x73\x2e\x63\x6f\x6d\x2f\x74\x65\x73\x74\x2e\x49\x6e\x74\x33\x32\x56\x61\x6c\x75\x65\x12\x04\x08\xd2\xfe\x06"; + + // "id": 12345, + // "any_value": { + // "type_url": "type.googleapis.com/test.AnyValue", + // "value": { + // "any_value_1": { + // "type_url": "type.googleapis.com/test.StringValue", + // "value": "114514" + // }, + // "any_value_2": { + // "type_url": "type.googleapis.com/test.Int32Value", + // "value": 114514 + // } + // } + // } + static ANY_DATA_2: &[u8] = b"\x08\xb9\x60\x12\x84\x01\x0a\x21\x74\x79\x70\x65\x2e\x67\x6f\x6f\x67\x6c\x65\x61\x70\x69\x73\x2e\x63\x6f\x6d\x2f\x74\x65\x73\x74\x2e\x41\x6e\x79\x56\x61\x6c\x75\x65\x12\x5f\x0a\x30\x0a\x24\x74\x79\x70\x65\x2e\x67\x6f\x6f\x67\x6c\x65\x61\x70\x69\x73\x2e\x63\x6f\x6d\x2f\x74\x65\x73\x74\x2e\x53\x74\x72\x69\x6e\x67\x56\x61\x6c\x75\x65\x12\x08\x0a\x06\x31\x31\x34\x35\x31\x34\x12\x2b\x0a\x23\x74\x79\x70\x65\x2e\x67\x6f\x6f\x67\x6c\x65\x61\x70\x69\x73\x2e\x63\x6f\x6d\x2f\x74\x65\x73\x74\x2e\x49\x6e\x74\x33\x32\x56\x61\x6c\x75\x65\x12\x04\x08\xd2\xfe\x06"; + + // id: 12345 + // name { + // type_url: "type.googleapis.com/test.StringValue" + // value: "\n\010John Doe" + // } + static ANY_DATA_3: &[u8] = b"\x08\xb9\x60\x12\x32\x0a\x24\x74\x79\x70\x65\x2e\x67\x6f\x6f\x67\x6c\x65\x61\x70\x69\x73\x2e\x63\x6f\x6d\x2f\x74\x65\x73\x74\x2e\x53\x74\x72\x69\x6e\x67\x56\x61\x6c\x75\x65\x12\x0a\x0a\x08\x4a\x6f\x68\x6e\x20\x44\x6f\x65"; + + // // id: 12345 + // // any_value: { + // // type_url: "type.googleapis.com/test.StringXalue" + // // value: "\n\010John Doe" + // // } + static ANY_DATA_INVALID: &[u8] = b"\x08\xb9\x60\x12\x32\x0a\x24\x74\x79\x70\x65\x2e\x67\x6f\x6f\x67\x6c\x65\x61\x70\x69\x73\x2e\x63\x6f\x6d\x2f\x74\x65\x73\x74\x2e\x53\x74\x72\x69\x6e\x67\x58\x61\x6c\x75\x65\x12\x0a\x0a\x08\x4a\x6f\x68\x6e\x20\x44\x6f\x65"; + + // validate the binary data is correct + { + let value1 = DynamicMessage::decode(message_descriptor.clone(), ANY_DATA_1).unwrap(); + expect![[r#" + [ + I32( + 12345, + ), + Message( + DynamicMessage { + desc: MessageDescriptor { + name: "Any", + full_name: "google.protobuf.Any", + is_map_entry: false, + fields: [ + FieldDescriptor { + name: "type_url", + full_name: "google.protobuf.Any.type_url", + json_name: "typeUrl", + number: 1, + kind: string, + cardinality: Optional, + containing_oneof: None, + default_value: None, + is_group: false, + is_list: false, + is_map: false, + is_packed: false, + supports_presence: false, + }, + FieldDescriptor { + name: "value", + full_name: "google.protobuf.Any.value", + json_name: "value", + number: 2, + kind: bytes, + cardinality: Optional, + containing_oneof: None, + default_value: None, + is_group: false, + is_list: false, + is_map: false, + is_packed: false, + supports_presence: false, + }, + ], + oneofs: [], + }, + fields: DynamicMessageFieldSet { + fields: { + 1: Value( + String( + "type.googleapis.com/test.Int32Value", + ), + ), + 2: Value( + Bytes( + b"\x08\xd2\xfe\x06", + ), + ), + }, + }, + }, + ), + ] + "#]] + .assert_debug_eq(&value1.fields().map(|f| f.1).collect_vec()); + + let value2 = DynamicMessage::decode(message_descriptor.clone(), ANY_DATA_2).unwrap(); + expect![[r#" + [ + I32( + 12345, + ), + Message( + DynamicMessage { + desc: MessageDescriptor { + name: "Any", + full_name: "google.protobuf.Any", + is_map_entry: false, + fields: [ + FieldDescriptor { + name: "type_url", + full_name: "google.protobuf.Any.type_url", + json_name: "typeUrl", + number: 1, + kind: string, + cardinality: Optional, + containing_oneof: None, + default_value: None, + is_group: false, + is_list: false, + is_map: false, + is_packed: false, + supports_presence: false, + }, + FieldDescriptor { + name: "value", + full_name: "google.protobuf.Any.value", + json_name: "value", + number: 2, + kind: bytes, + cardinality: Optional, + containing_oneof: None, + default_value: None, + is_group: false, + is_list: false, + is_map: false, + is_packed: false, + supports_presence: false, + }, + ], + oneofs: [], + }, + fields: DynamicMessageFieldSet { + fields: { + 1: Value( + String( + "type.googleapis.com/test.AnyValue", + ), + ), + 2: Value( + Bytes( + b"\n0\n$type.googleapis.com/test.StringValue\x12\x08\n\x06114514\x12+\n#type.googleapis.com/test.Int32Value\x12\x04\x08\xd2\xfe\x06", + ), + ), + }, + }, + }, + ), + ] + "#]] + .assert_debug_eq(&value2.fields().map(|f| f.1).collect_vec()); + + let value3 = DynamicMessage::decode(message_descriptor.clone(), ANY_DATA_INVALID).unwrap(); + expect![[r#" + [ + I32( + 12345, + ), + Message( + DynamicMessage { + desc: MessageDescriptor { + name: "Any", + full_name: "google.protobuf.Any", + is_map_entry: false, + fields: [ + FieldDescriptor { + name: "type_url", + full_name: "google.protobuf.Any.type_url", + json_name: "typeUrl", + number: 1, + kind: string, + cardinality: Optional, + containing_oneof: None, + default_value: None, + is_group: false, + is_list: false, + is_map: false, + is_packed: false, + supports_presence: false, + }, + FieldDescriptor { + name: "value", + full_name: "google.protobuf.Any.value", + json_name: "value", + number: 2, + kind: bytes, + cardinality: Optional, + containing_oneof: None, + default_value: None, + is_group: false, + is_list: false, + is_map: false, + is_packed: false, + supports_presence: false, + }, + ], + oneofs: [], + }, + fields: DynamicMessageFieldSet { + fields: { + 1: Value( + String( + "type.googleapis.com/test.StringXalue", + ), + ), + 2: Value( + Bytes( + b"\n\x08John Doe", + ), + ), + }, + }, + }, + ), + ] + "#]] + .assert_debug_eq(&value3.fields().map(|f| f.1).collect_vec()); + } + + check( + message_descriptor, + &[ANY_DATA_1, ANY_DATA_2, ANY_DATA_3, ANY_DATA_INVALID], + expect![[r#" + [ + id(#1): Int32, + any_value(#4): Jsonb, type_name: google.protobuf.Any, field_descs: [type_url(#2): Varchar, value(#3): Bytea], + ]"#]], + expect![[r#" + Owned(Int32(12345)) + Owned(Jsonb({ + "@type": "type.googleapis.com/test.Int32Value", + "value": Number(114514), + })) + ================================================================ + Owned(Int32(12345)) + Owned(Jsonb({ + "@type": "type.googleapis.com/test.AnyValue", + "anyValue1": { + "@type": "type.googleapis.com/test.StringValue", + "value": "114514", + }, + "anyValue2": { + "@type": "type.googleapis.com/test.Int32Value", + "value": Number(114514), + }, + })) + ================================================================ + Owned(Int32(12345)) + Owned(Jsonb({ + "@type": "type.googleapis.com/test.StringValue", + "value": "John Doe", + })) + ================================================================ + Owned(Int32(12345)) + ~~~~ + Error at column `any_value`: Fail to convert protobuf Any into jsonb: message 'test.StringXalue' not found + ~~~~"#]], + ); + + Ok(()) +} + +#[test] +fn test_all_types() -> anyhow::Result<()> { + use self::all_types::all_types::*; + use self::all_types::*; + + let message_descriptor = + load_message_descriptor("all-types.proto", "all_types.AllTypes").unwrap(); + + let data = { + AllTypes { + double_field: 1.2345, + float_field: 1.2345, + int32_field: 42, + int64_field: 1234567890, + uint32_field: 98765, + uint64_field: 9876543210, + sint32_field: -12345, + sint64_field: -987654321, + fixed32_field: 1234, + fixed64_field: 5678, + sfixed32_field: -56789, + sfixed64_field: -123456, + bool_field: true, + string_field: "Hello, Prost!".to_string(), + bytes_field: b"byte data".to_vec(), + enum_field: EnumType::Option1 as i32, + nested_message_field: Some(NestedMessage { + id: 100, + name: "Nested".to_string(), + }), + repeated_int_field: vec![1, 2, 3, 4, 5], + map_field: HashMap::from_iter([ + ("key1".to_string(), 1), + ("key2".to_string(), 2), + ("key3".to_string(), 3), + ]), + timestamp_field: Some(::prost_types::Timestamp { + seconds: 1630927032, + nanos: 500000000, + }), + duration_field: Some(::prost_types::Duration { + seconds: 60, + nanos: 500000000, + }), + any_field: Some(::prost_types::Any { + type_url: "type.googleapis.com/my_custom_type".to_string(), + value: b"My custom data".to_vec(), + }), + int32_value_field: Some(42), + string_value_field: Some("Hello, Wrapper!".to_string()), + example_oneof: Some(ExampleOneof::OneofInt32(123)), + map_struct_field: HashMap::from_iter([ + ( + "key1".to_string(), + NestedMessage { + id: 1, + name: "A".to_string(), + }, + ), + ( + "key2".to_string(), + NestedMessage { + id: 2, + name: "B".to_string(), + }, + ), + ]), + map_enum_field: HashMap::from_iter([ + (1, EnumType::Option1 as i32), + (2, EnumType::Option2 as i32), + ]), + } + }; + let mut data_bytes = Vec::new(); + data.encode(&mut data_bytes).unwrap(); + + check( + message_descriptor, + &[&data_bytes], + expect![[r#" + [ + double_field(#1): Float64, + float_field(#2): Float32, + int32_field(#3): Int32, + int64_field(#4): Int64, + uint32_field(#5): Int64, + uint64_field(#6): Decimal, + sint32_field(#7): Int32, + sint64_field(#8): Int64, + fixed32_field(#9): Int64, + fixed64_field(#10): Decimal, + sfixed32_field(#11): Int32, + sfixed64_field(#12): Int64, + bool_field(#13): Boolean, + string_field(#14): Varchar, + bytes_field(#15): Bytea, + enum_field(#16): Varchar, + nested_message_field(#19): Struct { + id: Int32, + name: Varchar, + }, type_name: all_types.AllTypes.NestedMessage, field_descs: [id(#17): Int32, name(#18): Varchar], + repeated_int_field(#20): List(Int32), + oneof_string(#21): Varchar, + oneof_int32(#22): Int32, + oneof_enum(#23): Varchar, + map_field(#26): Map(Varchar,Int32), type_name: all_types.AllTypes.MapFieldEntry, field_descs: [key(#24): Varchar, value(#25): Int32], + timestamp_field(#29): Struct { + seconds: Int64, + nanos: Int32, + }, type_name: google.protobuf.Timestamp, field_descs: [seconds(#27): Int64, nanos(#28): Int32], + duration_field(#32): Struct { + seconds: Int64, + nanos: Int32, + }, type_name: google.protobuf.Duration, field_descs: [seconds(#30): Int64, nanos(#31): Int32], + any_field(#35): Jsonb, type_name: google.protobuf.Any, field_descs: [type_url(#33): Varchar, value(#34): Bytea], + int32_value_field(#37): Struct { value: Int32 }, type_name: google.protobuf.Int32Value, field_descs: [value(#36): Int32], + string_value_field(#39): Struct { value: Varchar }, type_name: google.protobuf.StringValue, field_descs: [value(#38): Varchar], + map_struct_field(#44): Map(Varchar,Struct { id: Int32, name: Varchar }), type_name: all_types.AllTypes.MapStructFieldEntry, field_descs: [key(#40): Varchar, value(#43): Struct { + id: Int32, + name: Varchar, + }, type_name: all_types.AllTypes.NestedMessage, field_descs: [id(#41): Int32, name(#42): Varchar]], + map_enum_field(#47): Map(Int32,Varchar), type_name: all_types.AllTypes.MapEnumFieldEntry, field_descs: [key(#45): Int32, value(#46): Varchar], + ]"#]], + expect![[r#" + Owned(Float64(OrderedFloat(1.2345))) + Owned(Float32(OrderedFloat(1.2345))) + Owned(Int32(42)) + Owned(Int64(1234567890)) + Owned(Int64(98765)) + Owned(Decimal(Normalized(9876543210))) + Owned(Int32(-12345)) + Owned(Int64(-987654321)) + Owned(Int64(1234)) + Owned(Decimal(Normalized(5678))) + Owned(Int32(-56789)) + Owned(Int64(-123456)) + Owned(Bool(true)) + Borrowed(Utf8("Hello, Prost!")) + Borrowed(Bytea([98, 121, 116, 101, 32, 100, 97, 116, 97])) + Owned(Utf8("OPTION1")) + Owned(StructValue( + Int32(100), + Utf8("Nested"), + )) + Owned([ + Int32(1), + Int32(2), + Int32(3), + Int32(4), + Int32(5), + ]) + Owned(Utf8("")) + Owned(Int32(123)) + Owned(Utf8("DEFAULT")) + Owned([ + StructValue( + Utf8("key1"), + Int32(1), + ), + StructValue( + Utf8("key2"), + Int32(2), + ), + StructValue( + Utf8("key3"), + Int32(3), + ), + ]) + Owned(StructValue( + Int64(1630927032), + Int32(500000000), + )) + Owned(StructValue( + Int64(60), + Int32(500000000), + )) + ~~~~ + Error at column `any_field`: Fail to convert protobuf Any into jsonb: message 'my_custom_type' not found + ~~~~ + Owned(StructValue(Int32(42))) + Owned(StructValue(Utf8("Hello, Wrapper!"))) + Owned([ + StructValue( + Utf8("key1"), + StructValue( + Int32(1), + Utf8("A"), + ), + ), + StructValue( + Utf8("key2"), + StructValue( + Int32(2), + Utf8("B"), + ), + ), + ]) + Owned([ + StructValue( + Int32(1), + Utf8("OPTION1"), + ), + StructValue( + Int32(2), + Utf8("OPTION2"), + ), + ])"#]], + ); + + Ok(()) +} + +#[test] +fn test_recursive() -> anyhow::Result<()> { + let message_descriptor = + load_message_descriptor("recursive.proto", "recursive.ComplexRecursiveMessage").unwrap(); + + check( + message_descriptor, + &[], + expect![[r#" + failed to map protobuf type + + Caused by: + circular reference detected: parent(recursive.ComplexRecursiveMessage.parent)->siblings(recursive.ComplexRecursiveMessage.Parent.siblings), conflict with parent(recursive.ComplexRecursiveMessage.parent), kind recursive.ComplexRecursiveMessage.Parent + "#]], + expect![""], + ); + + Ok(()) +} diff --git a/src/connector/src/parser/protobuf/.gitignore b/src/connector/codec/tests/integration_tests/protobuf/.gitignore similarity index 50% rename from src/connector/src/parser/protobuf/.gitignore rename to src/connector/codec/tests/integration_tests/protobuf/.gitignore index 4109deeeb3337..6e5bea6ee81ce 100644 --- a/src/connector/src/parser/protobuf/.gitignore +++ b/src/connector/codec/tests/integration_tests/protobuf/.gitignore @@ -1 +1,2 @@ recursive.rs +all_types.rs diff --git a/src/connector/codec/tests/integration_tests/utils.rs b/src/connector/codec/tests/integration_tests/utils.rs index dd375656c51e3..889dbeffc306f 100644 --- a/src/connector/codec/tests/integration_tests/utils.rs +++ b/src/connector/codec/tests/integration_tests/utils.rs @@ -40,10 +40,15 @@ impl<'a> std::fmt::Debug for DataTypeTestDisplay<'a> { f.finish()?; Ok(()) } - DataType::List(t) => f - .debug_tuple("List") - .field(&DataTypeTestDisplay(t)) - .finish(), + DataType::List(t) => { + if t.is_struct() { + f.debug_tuple("List") + .field(&DataTypeTestDisplay(t)) + .finish() + } else { + write!(f, "List({:?})", &DataTypeTestDisplay(t)) + } + } DataType::Map(m) => { write!( f, @@ -88,6 +93,14 @@ impl<'a> std::fmt::Debug for ScalarRefImplTestDisplay<'a> { .debug_list() .entries(m.inner().iter().map(DatumRefTestDisplay)) .finish(), + ScalarRefImpl::Jsonb(j) => { + let compact_str = format!("{}", j); + if compact_str.len() > 50 { + write!(f, "Jsonb({:#?})", jsonbb::ValueRef::from(j)) + } else { + write!(f, "Jsonb({:#})", j) + } + } _ => { // do not use alternative display for simple types write!(f, "{:?}", self.0) @@ -174,7 +187,13 @@ impl<'a> std::fmt::Debug for ColumnDescTestDisplay<'a> { write!(f, ", type_name: {}", type_name)?; } if !field_descs.is_empty() { - write!(f, ", field_descs: {:?}", field_descs)?; + write!( + f, + ", field_descs: [{}]", + field_descs.iter().format_with(", ", |field_desc, f| { + f(&format_args!("{:?}", ColumnDescTestDisplay(field_desc))) + }) + )?; } if let Some(generated_or_default_column) = generated_or_default_column { write!( diff --git a/src/connector/src/test_data/proto_recursive/recursive.pb b/src/connector/codec/tests/test_data/all-types.pb similarity index 76% rename from src/connector/src/test_data/proto_recursive/recursive.pb rename to src/connector/codec/tests/test_data/all-types.pb index 5c611c18d0d30..177976d5244ad 100644 Binary files a/src/connector/src/test_data/proto_recursive/recursive.pb and b/src/connector/codec/tests/test_data/all-types.pb differ diff --git a/src/connector/codec/tests/test_data/all-types.proto b/src/connector/codec/tests/test_data/all-types.proto new file mode 100644 index 0000000000000..5070328dbf5f3 --- /dev/null +++ b/src/connector/codec/tests/test_data/all-types.proto @@ -0,0 +1,79 @@ +syntax = "proto3"; + +import "google/protobuf/timestamp.proto"; +import "google/protobuf/duration.proto"; +import "google/protobuf/any.proto"; +import "google/protobuf/wrappers.proto"; + +package all_types; + +// all-types.pb is generated by `protoc all-types.proto -o all-types.pb --include_imports` in the current directory. + +message AllTypes { + // standard types + double double_field = 1; + float float_field = 2; + int32 int32_field = 3; + int64 int64_field = 4; + uint32 uint32_field = 5; + uint64 uint64_field = 6; + sint32 sint32_field = 7; + sint64 sint64_field = 8; + fixed32 fixed32_field = 9; + fixed64 fixed64_field = 10; + sfixed32 sfixed32_field = 11; + sfixed64 sfixed64_field = 12; + bool bool_field = 13; + string string_field = 14; + + bytes bytes_field = 15; + + // enum + enum EnumType { + DEFAULT = 0; + OPTION1 = 1; + OPTION2 = 2; + } + EnumType enum_field = 16; + + // nested message + message NestedMessage { + int32 id = 1; + string name = 2; + } + NestedMessage nested_message_field = 17; + + // repeated field + repeated int32 repeated_int_field = 18; + + // oneof field + oneof example_oneof { + string oneof_string = 19; + int32 oneof_int32 = 20; + EnumType oneof_enum = 21; + } + + // map field + map map_field = 22; + + // timestamp + google.protobuf.Timestamp timestamp_field = 23; + + // duration + google.protobuf.Duration duration_field = 24; + + // any + google.protobuf.Any any_field = 25; + + // -- Unsupported + // // struct + // import "google/protobuf/struct.proto"; + // google.protobuf.Struct struct_field = 26; + + // wrapper types + google.protobuf.Int32Value int32_value_field = 27; + google.protobuf.StringValue string_value_field = 28; + + map map_struct_field = 29; + map map_enum_field = 30; + } diff --git a/src/connector/src/test_data/any-schema.proto b/src/connector/codec/tests/test_data/any-schema.proto similarity index 99% rename from src/connector/src/test_data/any-schema.proto rename to src/connector/codec/tests/test_data/any-schema.proto index 12a367100ce7d..6bd9dcdf32b8f 100644 --- a/src/connector/src/test_data/any-schema.proto +++ b/src/connector/codec/tests/test_data/any-schema.proto @@ -35,4 +35,4 @@ message StringStringInt32Value { message Float32StringValue { float first = 1; string second = 2; -} \ No newline at end of file +} diff --git a/src/connector/src/test_data/complex-schema.proto b/src/connector/codec/tests/test_data/complex-schema.proto similarity index 100% rename from src/connector/src/test_data/complex-schema.proto rename to src/connector/codec/tests/test_data/complex-schema.proto diff --git a/src/connector/codec/tests/test_data/recursive.proto b/src/connector/codec/tests/test_data/recursive.proto new file mode 100644 index 0000000000000..a26a6a98e172f --- /dev/null +++ b/src/connector/codec/tests/test_data/recursive.proto @@ -0,0 +1,24 @@ +syntax = "proto3"; + +package recursive; + +message ComplexRecursiveMessage { + string node_name = 1; + int32 node_id = 2; + + message Attributes { + string key = 1; + string value = 2; + } + + repeated Attributes attributes = 3; + + message Parent { + string parent_name = 1; + int32 parent_id = 2; + repeated ComplexRecursiveMessage siblings = 3; + } + + Parent parent = 4; + repeated ComplexRecursiveMessage children = 5; +} diff --git a/src/connector/src/test_data/simple-schema.proto b/src/connector/codec/tests/test_data/simple-schema.proto similarity index 100% rename from src/connector/src/test_data/simple-schema.proto rename to src/connector/codec/tests/test_data/simple-schema.proto diff --git a/src/connector/src/lib.rs b/src/connector/src/lib.rs index 6ee28a2161aa1..f66b5116c110b 100644 --- a/src/connector/src/lib.rs +++ b/src/connector/src/lib.rs @@ -19,7 +19,6 @@ #![feature(stmt_expr_attributes)] #![feature(box_patterns)] #![feature(trait_alias)] -#![feature(lint_reasons)] #![feature(let_chains)] #![feature(box_into_inner)] #![feature(type_alias_impl_trait)] diff --git a/src/connector/src/parser/mod.rs b/src/connector/src/parser/mod.rs index a49390c2752f4..2142914aa2503 100644 --- a/src/connector/src/parser/mod.rs +++ b/src/connector/src/parser/mod.rs @@ -12,6 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. +use std::collections::HashMap; use std::fmt::Debug; use std::sync::LazyLock; @@ -715,6 +716,7 @@ async fn into_chunk_stream_inner( len: usize, } let mut current_transaction = None; + let mut direct_cdc_event_lag_latency_metrics = HashMap::new(); #[for_await] for batch in data_stream { @@ -764,10 +766,15 @@ async fn into_chunk_stream_inner( if let SourceMeta::DebeziumCdc(msg_meta) = &msg.meta { let lag_ms = process_time_ms - msg_meta.source_ts_ms; // report to promethus - GLOBAL_SOURCE_METRICS - .direct_cdc_event_lag_latency - .with_guarded_label_values(&[&msg_meta.full_table_name]) - .observe(lag_ms as f64); + let full_table_name = msg_meta.full_table_name.clone(); + let direct_cdc_event_lag_latency = direct_cdc_event_lag_latency_metrics + .entry(full_table_name) + .or_insert_with(|| { + GLOBAL_SOURCE_METRICS + .direct_cdc_event_lag_latency + .with_guarded_label_values(&[&msg_meta.full_table_name]) + }); + direct_cdc_event_lag_latency.observe(lag_ms as f64); } let old_len = builder.len(); diff --git a/src/connector/src/parser/plain_parser.rs b/src/connector/src/parser/plain_parser.rs index f1ac65d79a654..e9c9436fd295f 100644 --- a/src/connector/src/parser/plain_parser.rs +++ b/src/connector/src/parser/plain_parser.rs @@ -297,10 +297,9 @@ mod tests { .unwrap() .into_iter() .filter(|c| c.cardinality() > 0) - .map(|c| { + .inspect(|c| { // 5 data messages in a single chunk assert_eq!(5, c.cardinality()); - c }) .collect_vec(); diff --git a/src/connector/src/parser/protobuf/mod.rs b/src/connector/src/parser/protobuf/mod.rs index bfcb0adfe1a18..462e478932ee7 100644 --- a/src/connector/src/parser/protobuf/mod.rs +++ b/src/connector/src/parser/protobuf/mod.rs @@ -14,7 +14,3 @@ mod parser; pub use parser::*; - -#[rustfmt::skip] -#[cfg(test)] -mod recursive; diff --git a/src/connector/src/parser/protobuf/parser.rs b/src/connector/src/parser/protobuf/parser.rs index ec8c747cafd5a..93eeb19cc1565 100644 --- a/src/connector/src/parser/protobuf/parser.rs +++ b/src/connector/src/parser/protobuf/parser.rs @@ -13,23 +13,14 @@ // limitations under the License. use anyhow::Context; -use itertools::Itertools; -use prost_reflect::{ - Cardinality, DescriptorPool, DynamicMessage, FieldDescriptor, FileDescriptor, Kind, - MessageDescriptor, ReflectMessage, Value, -}; -use risingwave_common::array::{ListValue, StructValue}; -use risingwave_common::types::{ - DataType, DatumCow, Decimal, JsonbVal, ScalarImpl, ToOwnedDatum, F32, F64, -}; +use prost_reflect::{DescriptorPool, DynamicMessage, FileDescriptor, MessageDescriptor}; use risingwave_common::{bail, try_match_expand}; -use risingwave_pb::plan_common::{AdditionalColumn, ColumnDesc, ColumnDescVersion}; -use thiserror::Error; -use thiserror_ext::{AsReport, Macro}; +pub use risingwave_connector_codec::decoder::protobuf::parser::*; +use risingwave_connector_codec::decoder::protobuf::ProtobufAccess; +use risingwave_pb::plan_common::ColumnDesc; use crate::error::ConnectorResult; -use crate::parser::unified::protobuf::ProtobufAccess; -use crate::parser::unified::{uncategorized, AccessError, AccessImpl, AccessResult}; +use crate::parser::unified::AccessImpl; use crate::parser::util::bytes_from_url; use crate::parser::{AccessBuilder, EncodingProperties}; use crate::schema::schema_registry::{extract_schema_id, handle_sr_list, Client, WireFormatError}; @@ -124,207 +115,10 @@ impl ProtobufParserConfig { /// Maps the protobuf schema to relational schema. pub fn map_to_columns(&self) -> ConnectorResult> { - let mut columns = Vec::with_capacity(self.message_descriptor.fields().len()); - let mut index = 0; - let mut parse_trace: Vec = vec![]; - for field in self.message_descriptor.fields() { - columns.push(Self::pb_field_to_col_desc( - &field, - &mut index, - &mut parse_trace, - )?); - } - - Ok(columns) - } - - /// Maps a protobuf field to a RW column. - fn pb_field_to_col_desc( - field_descriptor: &FieldDescriptor, - index: &mut i32, - parse_trace: &mut Vec, - ) -> ConnectorResult { - let field_type = protobuf_type_mapping(field_descriptor, parse_trace) - .context("failed to map protobuf type")?; - if let Kind::Message(m) = field_descriptor.kind() { - let field_descs = if let DataType::List { .. } = field_type { - vec![] - } else { - m.fields() - .map(|f| Self::pb_field_to_col_desc(&f, index, parse_trace)) - .try_collect()? - }; - *index += 1; - Ok(ColumnDesc { - column_id: *index, - name: field_descriptor.name().to_string(), - column_type: Some(field_type.to_protobuf()), - field_descs, - type_name: m.full_name().to_string(), - generated_or_default_column: None, - description: None, - additional_column_type: 0, // deprecated - additional_column: Some(AdditionalColumn { column_type: None }), - version: ColumnDescVersion::Pr13707 as i32, - }) - } else { - *index += 1; - Ok(ColumnDesc { - column_id: *index, - name: field_descriptor.name().to_string(), - column_type: Some(field_type.to_protobuf()), - additional_column: Some(AdditionalColumn { column_type: None }), - version: ColumnDescVersion::Pr13707 as i32, - ..Default::default() - }) - } + pb_schema_to_column_descs(&self.message_descriptor).map_err(|e| e.into()) } } -#[derive(Error, Debug, Macro)] -#[error("{0}")] -struct ProtobufTypeError(#[message] String); - -fn detect_loop_and_push( - trace: &mut Vec, - fd: &FieldDescriptor, -) -> std::result::Result<(), ProtobufTypeError> { - let identifier = format!("{}({})", fd.name(), fd.full_name()); - if trace.iter().any(|s| s == identifier.as_str()) { - bail_protobuf_type_error!( - "circular reference detected: {}, conflict with {}, kind {:?}", - trace.iter().format("->"), - identifier, - fd.kind(), - ); - } - trace.push(identifier); - Ok(()) -} - -pub fn from_protobuf_value<'a>( - field_desc: &FieldDescriptor, - value: &'a Value, -) -> AccessResult> { - let kind = field_desc.kind(); - - macro_rules! borrowed { - ($v:expr) => { - return Ok(DatumCow::Borrowed(Some($v.into()))) - }; - } - - let v: ScalarImpl = match value { - Value::Bool(v) => ScalarImpl::Bool(*v), - Value::I32(i) => ScalarImpl::Int32(*i), - Value::U32(i) => ScalarImpl::Int64(*i as i64), - Value::I64(i) => ScalarImpl::Int64(*i), - Value::U64(i) => ScalarImpl::Decimal(Decimal::from(*i)), - Value::F32(f) => ScalarImpl::Float32(F32::from(*f)), - Value::F64(f) => ScalarImpl::Float64(F64::from(*f)), - Value::String(s) => borrowed!(s.as_str()), - Value::EnumNumber(idx) => { - let enum_desc = kind.as_enum().ok_or_else(|| AccessError::TypeError { - expected: "enum".to_owned(), - got: format!("{kind:?}"), - value: value.to_string(), - })?; - let enum_symbol = enum_desc.get_value(*idx).ok_or_else(|| { - uncategorized!("unknown enum index {} of enum {:?}", idx, enum_desc) - })?; - ScalarImpl::Utf8(enum_symbol.name().into()) - } - Value::Message(dyn_msg) => { - if dyn_msg.descriptor().full_name() == "google.protobuf.Any" { - ScalarImpl::Jsonb(JsonbVal::from( - serde_json::to_value(dyn_msg).map_err(AccessError::ProtobufAnyToJson)?, - )) - } else { - let mut rw_values = Vec::with_capacity(dyn_msg.descriptor().fields().len()); - // fields is a btree map in descriptor - // so it's order is the same as datatype - for field_desc in dyn_msg.descriptor().fields() { - // missing field - if !dyn_msg.has_field(&field_desc) - && field_desc.cardinality() == Cardinality::Required - { - return Err(AccessError::Undefined { - name: field_desc.name().to_owned(), - path: dyn_msg.descriptor().full_name().to_owned(), - }); - } - // use default value if dyn_msg doesn't has this field - let value = dyn_msg.get_field(&field_desc); - rw_values.push(from_protobuf_value(&field_desc, &value)?.to_owned_datum()); - } - ScalarImpl::Struct(StructValue::new(rw_values)) - } - } - Value::List(values) => { - let data_type = protobuf_type_mapping(field_desc, &mut vec![]) - .map_err(|e| uncategorized!("{}", e.to_report_string()))?; - let mut builder = data_type.as_list().create_array_builder(values.len()); - for value in values { - builder.append(from_protobuf_value(field_desc, value)?); - } - ScalarImpl::List(ListValue::new(builder.finish())) - } - Value::Bytes(value) => borrowed!(&**value), - _ => { - return Err(AccessError::UnsupportedType { - ty: format!("{kind:?}"), - }); - } - }; - Ok(Some(v).into()) -} - -/// Maps protobuf type to RW type. -fn protobuf_type_mapping( - field_descriptor: &FieldDescriptor, - parse_trace: &mut Vec, -) -> std::result::Result { - detect_loop_and_push(parse_trace, field_descriptor)?; - let field_type = field_descriptor.kind(); - let mut t = match field_type { - Kind::Bool => DataType::Boolean, - Kind::Double => DataType::Float64, - Kind::Float => DataType::Float32, - Kind::Int32 | Kind::Sint32 | Kind::Sfixed32 => DataType::Int32, - // Fixed32 represents [0, 2^32 - 1]. It's equal to u32. - Kind::Int64 | Kind::Sint64 | Kind::Sfixed64 | Kind::Uint32 | Kind::Fixed32 => { - DataType::Int64 - } - Kind::Uint64 | Kind::Fixed64 => DataType::Decimal, - Kind::String => DataType::Varchar, - Kind::Message(m) => match m.full_name() { - // Well-Known Types are identified by their full name - "google.protobuf.Any" => DataType::Jsonb, - _ => { - let fields = m - .fields() - .map(|f| protobuf_type_mapping(&f, parse_trace)) - .try_collect()?; - let field_names = m.fields().map(|f| f.name().to_string()).collect_vec(); - DataType::new_struct(fields, field_names) - } - }, - Kind::Enum(_) => DataType::Varchar, - Kind::Bytes => DataType::Bytea, - }; - if field_descriptor.is_map() { - bail_protobuf_type_error!( - "protobuf map type (on field `{}`) is not supported", - field_descriptor.full_name() - ); - } - if field_descriptor.cardinality() == Cardinality::Repeated { - t = DataType::List(Box::new(t)) - } - _ = parse_trace.pop(); - Ok(t) -} - /// A port from the implementation of confluent's Varint Zig-zag deserialization. /// See `ReadVarint` in fn decode_varint_zigzag(buffer: &[u8]) -> ConnectorResult<(i32, usize)> { @@ -380,601 +174,7 @@ pub(crate) fn resolve_pb_header(payload: &[u8]) -> ConnectorResult<&[u8]> { #[cfg(test)] mod test { - use std::path::PathBuf; - - use prost::Message; - use risingwave_common::types::StructType; - use risingwave_connector_codec::decoder::AccessExt; - use risingwave_pb::catalog::StreamSourceInfo; - use risingwave_pb::data::data_type::PbTypeName; - use risingwave_pb::plan_common::{PbEncodeType, PbFormatType}; - use serde_json::json; - use super::*; - use crate::parser::protobuf::recursive::all_types::{EnumType, ExampleOneof, NestedMessage}; - use crate::parser::protobuf::recursive::AllTypes; - use crate::parser::SpecificParserConfig; - - fn schema_dir() -> String { - let dir = PathBuf::from("src/test_data"); - format!( - "file://{}", - std::fs::canonicalize(dir).unwrap().to_str().unwrap() - ) - } - - // Id: 123, - // Address: "test address", - // City: "test city", - // Zipcode: 456, - // Rate: 1.2345, - // Date: "2021-01-01" - static PRE_GEN_PROTO_DATA: &[u8] = b"\x08\x7b\x12\x0c\x74\x65\x73\x74\x20\x61\x64\x64\x72\x65\x73\x73\x1a\x09\x74\x65\x73\x74\x20\x63\x69\x74\x79\x20\xc8\x03\x2d\x19\x04\x9e\x3f\x32\x0a\x32\x30\x32\x31\x2d\x30\x31\x2d\x30\x31"; - - #[tokio::test] - async fn test_simple_schema() -> crate::error::ConnectorResult<()> { - let location = schema_dir() + "/simple-schema"; - println!("location: {}", location); - let message_name = "test.TestRecord"; - let info = StreamSourceInfo { - proto_message_name: message_name.to_string(), - row_schema_location: location.to_string(), - use_schema_registry: false, - format: PbFormatType::Plain.into(), - row_encode: PbEncodeType::Protobuf.into(), - ..Default::default() - }; - let parser_config = SpecificParserConfig::new(&info, &Default::default())?; - let conf = ProtobufParserConfig::new(parser_config.encoding_config).await?; - let value = DynamicMessage::decode(conf.message_descriptor, PRE_GEN_PROTO_DATA).unwrap(); - - assert_eq!( - value.get_field_by_name("id").unwrap().into_owned(), - Value::I32(123) - ); - assert_eq!( - value.get_field_by_name("address").unwrap().into_owned(), - Value::String("test address".to_string()) - ); - assert_eq!( - value.get_field_by_name("city").unwrap().into_owned(), - Value::String("test city".to_string()) - ); - assert_eq!( - value.get_field_by_name("zipcode").unwrap().into_owned(), - Value::I64(456) - ); - assert_eq!( - value.get_field_by_name("rate").unwrap().into_owned(), - Value::F32(1.2345) - ); - assert_eq!( - value.get_field_by_name("date").unwrap().into_owned(), - Value::String("2021-01-01".to_string()) - ); - - Ok(()) - } - - #[tokio::test] - async fn test_complex_schema() -> crate::error::ConnectorResult<()> { - let location = schema_dir() + "/complex-schema"; - let message_name = "test.User"; - - let info = StreamSourceInfo { - proto_message_name: message_name.to_string(), - row_schema_location: location.to_string(), - use_schema_registry: false, - format: PbFormatType::Plain.into(), - row_encode: PbEncodeType::Protobuf.into(), - ..Default::default() - }; - let parser_config = SpecificParserConfig::new(&info, &Default::default())?; - let conf = ProtobufParserConfig::new(parser_config.encoding_config).await?; - let columns = conf.map_to_columns().unwrap(); - - assert_eq!(columns[0].name, "id".to_string()); - assert_eq!(columns[1].name, "code".to_string()); - assert_eq!(columns[2].name, "timestamp".to_string()); - - let data_type = columns[3].column_type.as_ref().unwrap(); - assert_eq!(data_type.get_type_name().unwrap(), PbTypeName::List); - let inner_field_type = data_type.field_type.clone(); - assert_eq!( - inner_field_type[0].get_type_name().unwrap(), - PbTypeName::Struct - ); - let struct_inner = inner_field_type[0].field_type.clone(); - assert_eq!(struct_inner[0].get_type_name().unwrap(), PbTypeName::Int32); - assert_eq!(struct_inner[1].get_type_name().unwrap(), PbTypeName::Int32); - assert_eq!( - struct_inner[2].get_type_name().unwrap(), - PbTypeName::Varchar - ); - - assert_eq!(columns[4].name, "contacts".to_string()); - let inner_field_type = columns[4].column_type.as_ref().unwrap().field_type.clone(); - assert_eq!( - inner_field_type[0].get_type_name().unwrap(), - PbTypeName::List - ); - assert_eq!( - inner_field_type[1].get_type_name().unwrap(), - PbTypeName::List - ); - Ok(()) - } - - #[tokio::test] - async fn test_refuse_recursive_proto_message() { - let location = schema_dir() + "/proto_recursive/recursive.pb"; - let message_name = "recursive.ComplexRecursiveMessage"; - - let info = StreamSourceInfo { - proto_message_name: message_name.to_string(), - row_schema_location: location.to_string(), - use_schema_registry: false, - format: PbFormatType::Plain.into(), - row_encode: PbEncodeType::Protobuf.into(), - ..Default::default() - }; - let parser_config = SpecificParserConfig::new(&info, &Default::default()).unwrap(); - let conf = ProtobufParserConfig::new(parser_config.encoding_config) - .await - .unwrap(); - let columns = conf.map_to_columns(); - // expect error message: - // "Err(Protocol error: circular reference detected: - // parent(recursive.ComplexRecursiveMessage.parent)->siblings(recursive. - // ComplexRecursiveMessage.Parent.siblings), conflict with - // parent(recursive.ComplexRecursiveMessage.parent), kind - // recursive.ComplexRecursiveMessage.Parent" - assert!(columns.is_err()); - } - - async fn create_recursive_pb_parser_config( - location: &str, - message_name: &str, - ) -> ProtobufParserConfig { - let location = schema_dir() + location; - - let info = StreamSourceInfo { - proto_message_name: message_name.to_string(), - row_schema_location: location.to_string(), - use_schema_registry: false, - format: PbFormatType::Plain.into(), - row_encode: PbEncodeType::Protobuf.into(), - ..Default::default() - }; - let parser_config = SpecificParserConfig::new(&info, &Default::default()).unwrap(); - - ProtobufParserConfig::new(parser_config.encoding_config) - .await - .unwrap() - } - - #[tokio::test] - async fn test_all_types_create_source() { - let conf = create_recursive_pb_parser_config( - "/proto_recursive/recursive.pb", - "recursive.AllTypes", - ) - .await; - - // Ensure that the parser can recognize the schema. - let columns = conf - .map_to_columns() - .unwrap() - .into_iter() - .map(|c| DataType::from(&c.column_type.unwrap())) - .collect_vec(); - assert_eq!( - columns, - vec![ - DataType::Float64, // double_field - DataType::Float32, // float_field - DataType::Int32, // int32_field - DataType::Int64, // int64_field - DataType::Int64, // uint32_field - DataType::Decimal, // uint64_field - DataType::Int32, // sint32_field - DataType::Int64, // sint64_field - DataType::Int64, // fixed32_field - DataType::Decimal, // fixed64_field - DataType::Int32, // sfixed32_field - DataType::Int64, // sfixed64_field - DataType::Boolean, // bool_field - DataType::Varchar, // string_field - DataType::Bytea, // bytes_field - DataType::Varchar, // enum_field - DataType::Struct(StructType::new(vec![ - ("id", DataType::Int32), - ("name", DataType::Varchar) - ])), // nested_message_field - DataType::List(DataType::Int32.into()), // repeated_int_field - DataType::Varchar, // oneof_string - DataType::Int32, // oneof_int32 - DataType::Varchar, // oneof_enum - DataType::Struct(StructType::new(vec![ - ("seconds", DataType::Int64), - ("nanos", DataType::Int32) - ])), // timestamp_field - DataType::Struct(StructType::new(vec![ - ("seconds", DataType::Int64), - ("nanos", DataType::Int32) - ])), // duration_field - DataType::Jsonb, // any_field - DataType::Struct(StructType::new(vec![("value", DataType::Int32)])), /* int32_value_field */ - DataType::Struct(StructType::new(vec![("value", DataType::Varchar)])), /* string_value_field */ - ] - ) - } - - #[tokio::test] - async fn test_all_types_data_parsing() { - let m = create_all_types_message(); - let mut payload = Vec::new(); - m.encode(&mut payload).unwrap(); - - let conf = create_recursive_pb_parser_config( - "/proto_recursive/recursive.pb", - "recursive.AllTypes", - ) - .await; - let mut access_builder = ProtobufAccessBuilder::new(conf).unwrap(); - let access = access_builder.generate_accessor(payload).await.unwrap(); - if let AccessImpl::Protobuf(a) = access { - assert_all_types_eq(&a, &m); - } else { - panic!("unexpected") - } - } - - fn assert_all_types_eq(a: &ProtobufAccess, m: &AllTypes) { - type S = ScalarImpl; - - pb_eq(a, "double_field", S::Float64(m.double_field.into())); - pb_eq(a, "float_field", S::Float32(m.float_field.into())); - pb_eq(a, "int32_field", S::Int32(m.int32_field)); - pb_eq(a, "int64_field", S::Int64(m.int64_field)); - pb_eq(a, "uint32_field", S::Int64(m.uint32_field.into())); - pb_eq(a, "uint64_field", S::Decimal(m.uint64_field.into())); - pb_eq(a, "sint32_field", S::Int32(m.sint32_field)); - pb_eq(a, "sint64_field", S::Int64(m.sint64_field)); - pb_eq(a, "fixed32_field", S::Int64(m.fixed32_field.into())); - pb_eq(a, "fixed64_field", S::Decimal(m.fixed64_field.into())); - pb_eq(a, "sfixed32_field", S::Int32(m.sfixed32_field)); - pb_eq(a, "sfixed64_field", S::Int64(m.sfixed64_field)); - pb_eq(a, "bool_field", S::Bool(m.bool_field)); - pb_eq(a, "string_field", S::Utf8(m.string_field.as_str().into())); - pb_eq(a, "bytes_field", S::Bytea(m.bytes_field.clone().into())); - pb_eq(a, "enum_field", S::Utf8("OPTION1".into())); - pb_eq( - a, - "nested_message_field", - S::Struct(StructValue::new(vec![ - Some(ScalarImpl::Int32(100)), - Some(ScalarImpl::Utf8("Nested".into())), - ])), - ); - pb_eq( - a, - "repeated_int_field", - S::List(ListValue::from_iter(m.repeated_int_field.clone())), - ); - pb_eq( - a, - "timestamp_field", - S::Struct(StructValue::new(vec![ - Some(ScalarImpl::Int64(1630927032)), - Some(ScalarImpl::Int32(500000000)), - ])), - ); - pb_eq( - a, - "duration_field", - S::Struct(StructValue::new(vec![ - Some(ScalarImpl::Int64(60)), - Some(ScalarImpl::Int32(500000000)), - ])), - ); - pb_eq( - a, - "int32_value_field", - S::Struct(StructValue::new(vec![Some(ScalarImpl::Int32(42))])), - ); - pb_eq( - a, - "string_value_field", - S::Struct(StructValue::new(vec![Some(ScalarImpl::Utf8( - m.string_value_field.as_ref().unwrap().as_str().into(), - ))])), - ); - pb_eq(a, "oneof_string", S::Utf8("".into())); - pb_eq(a, "oneof_int32", S::Int32(123)); - pb_eq(a, "oneof_enum", S::Utf8("DEFAULT".into())); - } - - fn pb_eq(a: &ProtobufAccess, field_name: &str, value: ScalarImpl) { - let dummy_type = DataType::Varchar; - let d = a.access_owned(&[field_name], &dummy_type).unwrap().unwrap(); - assert_eq!(d, value, "field: {} value: {:?}", field_name, d); - } - - fn create_all_types_message() -> AllTypes { - AllTypes { - double_field: 1.2345, - float_field: 1.2345, - int32_field: 42, - int64_field: 1234567890, - uint32_field: 98765, - uint64_field: 9876543210, - sint32_field: -12345, - sint64_field: -987654321, - fixed32_field: 1234, - fixed64_field: 5678, - sfixed32_field: -56789, - sfixed64_field: -123456, - bool_field: true, - string_field: "Hello, Prost!".to_string(), - bytes_field: b"byte data".to_vec(), - enum_field: EnumType::Option1 as i32, - nested_message_field: Some(NestedMessage { - id: 100, - name: "Nested".to_string(), - }), - repeated_int_field: vec![1, 2, 3, 4, 5], - timestamp_field: Some(::prost_types::Timestamp { - seconds: 1630927032, - nanos: 500000000, - }), - duration_field: Some(::prost_types::Duration { - seconds: 60, - nanos: 500000000, - }), - any_field: Some(::prost_types::Any { - type_url: "type.googleapis.com/my_custom_type".to_string(), - value: b"My custom data".to_vec(), - }), - int32_value_field: Some(42), - string_value_field: Some("Hello, Wrapper!".to_string()), - example_oneof: Some(ExampleOneof::OneofInt32(123)), - } - } - - // id: 12345 - // name { - // type_url: "type.googleapis.com/test.StringValue" - // value: "\n\010John Doe" - // } - static ANY_GEN_PROTO_DATA: &[u8] = b"\x08\xb9\x60\x12\x32\x0a\x24\x74\x79\x70\x65\x2e\x67\x6f\x6f\x67\x6c\x65\x61\x70\x69\x73\x2e\x63\x6f\x6d\x2f\x74\x65\x73\x74\x2e\x53\x74\x72\x69\x6e\x67\x56\x61\x6c\x75\x65\x12\x0a\x0a\x08\x4a\x6f\x68\x6e\x20\x44\x6f\x65"; - - #[tokio::test] - async fn test_any_schema() -> crate::error::ConnectorResult<()> { - let conf = create_recursive_pb_parser_config("/any-schema.pb", "test.TestAny").await; - - println!("Current conf: {:#?}", conf); - println!("---------------------------"); - - let value = - DynamicMessage::decode(conf.message_descriptor.clone(), ANY_GEN_PROTO_DATA).unwrap(); - - println!("Test ANY_GEN_PROTO_DATA, current value: {:#?}", value); - println!("---------------------------"); - - // This is of no use - let field = value.fields().next().unwrap().0; - - if let Some(ret) = from_protobuf_value(&field, &Value::Message(value)) - .unwrap() - .to_owned_datum() - { - println!("Decoded Value for ANY_GEN_PROTO_DATA: {:#?}", ret); - println!("---------------------------"); - - let ScalarImpl::Struct(struct_value) = ret else { - panic!("Expected ScalarImpl::Struct"); - }; - - let fields = struct_value.fields(); - - match fields[0].clone() { - Some(ScalarImpl::Int32(v)) => { - println!("Successfully decode field[0]"); - assert_eq!(v, 12345); - } - _ => panic!("Expected ScalarImpl::Int32"), - } - - match fields[1].clone() { - Some(ScalarImpl::Jsonb(jv)) => { - assert_eq!( - jv, - JsonbVal::from(json!({ - "@type": "type.googleapis.com/test.StringValue", - "value": "John Doe" - })) - ); - } - _ => panic!("Expected ScalarImpl::Jsonb"), - } - } - - Ok(()) - } - - // id: 12345 - // name { - // type_url: "type.googleapis.com/test.Int32Value" - // value: "\010\322\376\006" - // } - // Unpacked Int32Value from Any: value: 114514 - static ANY_GEN_PROTO_DATA_1: &[u8] = b"\x08\xb9\x60\x12\x2b\x0a\x23\x74\x79\x70\x65\x2e\x67\x6f\x6f\x67\x6c\x65\x61\x70\x69\x73\x2e\x63\x6f\x6d\x2f\x74\x65\x73\x74\x2e\x49\x6e\x74\x33\x32\x56\x61\x6c\x75\x65\x12\x04\x08\xd2\xfe\x06"; - - #[tokio::test] - async fn test_any_schema_1() -> crate::error::ConnectorResult<()> { - let conf = create_recursive_pb_parser_config("/any-schema.pb", "test.TestAny").await; - - println!("Current conf: {:#?}", conf); - println!("---------------------------"); - - let value = - DynamicMessage::decode(conf.message_descriptor.clone(), ANY_GEN_PROTO_DATA_1).unwrap(); - - println!("Current Value: {:#?}", value); - println!("---------------------------"); - - // This is of no use - let field = value.fields().next().unwrap().0; - - if let Some(ret) = from_protobuf_value(&field, &Value::Message(value)) - .unwrap() - .to_owned_datum() - { - println!("Decoded Value for ANY_GEN_PROTO_DATA: {:#?}", ret); - println!("---------------------------"); - - let ScalarImpl::Struct(struct_value) = ret else { - panic!("Expected ScalarImpl::Struct"); - }; - - let fields = struct_value.fields(); - - match fields[0].clone() { - Some(ScalarImpl::Int32(v)) => { - println!("Successfully decode field[0]"); - assert_eq!(v, 12345); - } - _ => panic!("Expected ScalarImpl::Int32"), - } - - match fields[1].clone() { - Some(ScalarImpl::Jsonb(jv)) => { - assert_eq!( - jv, - JsonbVal::from(json!({ - "@type": "type.googleapis.com/test.Int32Value", - "value": 114514 - })) - ); - } - _ => panic!("Expected ScalarImpl::Jsonb"), - } - } - - Ok(()) - } - - // "id": 12345, - // "any_value": { - // "type_url": "type.googleapis.com/test.AnyValue", - // "value": { - // "any_value_1": { - // "type_url": "type.googleapis.com/test.StringValue", - // "value": "114514" - // }, - // "any_value_2": { - // "type_url": "type.googleapis.com/test.Int32Value", - // "value": 114514 - // } - // } - // } - static ANY_RECURSIVE_GEN_PROTO_DATA: &[u8] = b"\x08\xb9\x60\x12\x84\x01\x0a\x21\x74\x79\x70\x65\x2e\x67\x6f\x6f\x67\x6c\x65\x61\x70\x69\x73\x2e\x63\x6f\x6d\x2f\x74\x65\x73\x74\x2e\x41\x6e\x79\x56\x61\x6c\x75\x65\x12\x5f\x0a\x30\x0a\x24\x74\x79\x70\x65\x2e\x67\x6f\x6f\x67\x6c\x65\x61\x70\x69\x73\x2e\x63\x6f\x6d\x2f\x74\x65\x73\x74\x2e\x53\x74\x72\x69\x6e\x67\x56\x61\x6c\x75\x65\x12\x08\x0a\x06\x31\x31\x34\x35\x31\x34\x12\x2b\x0a\x23\x74\x79\x70\x65\x2e\x67\x6f\x6f\x67\x6c\x65\x61\x70\x69\x73\x2e\x63\x6f\x6d\x2f\x74\x65\x73\x74\x2e\x49\x6e\x74\x33\x32\x56\x61\x6c\x75\x65\x12\x04\x08\xd2\xfe\x06"; - - #[tokio::test] - async fn test_any_recursive() -> crate::error::ConnectorResult<()> { - let conf = create_recursive_pb_parser_config("/any-schema.pb", "test.TestAny").await; - - println!("Current conf: {:#?}", conf); - println!("---------------------------"); - - let value = DynamicMessage::decode( - conf.message_descriptor.clone(), - ANY_RECURSIVE_GEN_PROTO_DATA, - ) - .unwrap(); - - println!("Current Value: {:#?}", value); - println!("---------------------------"); - - // This is of no use - let field = value.fields().next().unwrap().0; - - if let Some(ret) = from_protobuf_value(&field, &Value::Message(value)) - .unwrap() - .to_owned_datum() - { - println!("Decoded Value for ANY_RECURSIVE_GEN_PROTO_DATA: {:#?}", ret); - println!("---------------------------"); - - let ScalarImpl::Struct(struct_value) = ret else { - panic!("Expected ScalarImpl::Struct"); - }; - - let fields = struct_value.fields(); - - match fields[0].clone() { - Some(ScalarImpl::Int32(v)) => { - println!("Successfully decode field[0]"); - assert_eq!(v, 12345); - } - _ => panic!("Expected ScalarImpl::Int32"), - } - - match fields[1].clone() { - Some(ScalarImpl::Jsonb(jv)) => { - assert_eq!( - jv, - JsonbVal::from(json!({ - "@type": "type.googleapis.com/test.AnyValue", - "anyValue1": { - "@type": "type.googleapis.com/test.StringValue", - "value": "114514", - }, - "anyValue2": { - "@type": "type.googleapis.com/test.Int32Value", - "value": 114514, - } - })) - ); - } - _ => panic!("Expected ScalarImpl::Jsonb"), - } - } - - Ok(()) - } - - // id: 12345 - // any_value: { - // type_url: "type.googleapis.com/test.StringXalue" - // value: "\n\010John Doe" - // } - static ANY_GEN_PROTO_DATA_INVALID: &[u8] = b"\x08\xb9\x60\x12\x32\x0a\x24\x74\x79\x70\x65\x2e\x67\x6f\x6f\x67\x6c\x65\x61\x70\x69\x73\x2e\x63\x6f\x6d\x2f\x74\x65\x73\x74\x2e\x53\x74\x72\x69\x6e\x67\x58\x61\x6c\x75\x65\x12\x0a\x0a\x08\x4a\x6f\x68\x6e\x20\x44\x6f\x65"; - - #[tokio::test] - async fn test_any_invalid() -> crate::error::ConnectorResult<()> { - let conf = create_recursive_pb_parser_config("/any-schema.pb", "test.TestAny").await; - - let value = - DynamicMessage::decode(conf.message_descriptor.clone(), ANY_GEN_PROTO_DATA_INVALID) - .unwrap(); - - // The top-level `Value` is not a proto field, but we need a dummy one. - let field = value.fields().next().unwrap().0; - - let err = from_protobuf_value(&field, &Value::Message(value)).unwrap_err(); - - let expected = expect_test::expect![[r#" - Fail to convert protobuf Any into jsonb - - Caused by: - message 'test.StringXalue' not found - "#]]; - expected.assert_eq(err.to_report_string_pretty().as_str()); - - Ok(()) - } #[test] fn test_decode_varint_zigzag() { diff --git a/src/connector/src/parser/unified/mod.rs b/src/connector/src/parser/unified/mod.rs index fdfe3aae6aaee..adf32df572307 100644 --- a/src/connector/src/parser/unified/mod.rs +++ b/src/connector/src/parser/unified/mod.rs @@ -17,11 +17,11 @@ use auto_impl::auto_impl; use risingwave_common::types::{DataType, DatumCow}; use risingwave_connector_codec::decoder::avro::AvroAccess; -pub use risingwave_connector_codec::decoder::{uncategorized, Access, AccessError, AccessResult}; +use risingwave_connector_codec::decoder::protobuf::ProtobufAccess; +pub use risingwave_connector_codec::decoder::{Access, AccessError, AccessResult}; use self::bytes::BytesAccess; use self::json::JsonAccess; -use self::protobuf::ProtobufAccess; use crate::parser::unified::debezium::MongoJsonAccess; use crate::source::SourceColumnDesc; @@ -30,7 +30,6 @@ pub mod debezium; pub mod json; pub mod kv_event; pub mod maxwell; -pub mod protobuf; pub mod util; pub enum AccessImpl<'a> { diff --git a/src/connector/src/schema/protobuf.rs b/src/connector/src/schema/protobuf.rs index d140af83c853f..634d692066ac1 100644 --- a/src/connector/src/schema/protobuf.rs +++ b/src/connector/src/schema/protobuf.rs @@ -13,9 +13,10 @@ // limitations under the License. use std::collections::BTreeMap; +use std::path::PathBuf; -use itertools::Itertools as _; use prost_reflect::{DescriptorPool, FileDescriptor, MessageDescriptor}; +use risingwave_connector_codec::common::protobuf::compile_pb; use super::loader::{LoadedSchema, SchemaLoader}; use super::schema_registry::Subject; @@ -98,91 +99,29 @@ pub async fn fetch_from_registry( impl LoadedSchema for FileDescriptor { fn compile(primary: Subject, references: Vec) -> Result { let primary_name = primary.name.clone(); - match compile_pb(primary, references) { - Err(e) => Err(SchemaFetchError::SchemaCompile(e.into())), - Ok(b) => { - let pool = DescriptorPool::decode(b.as_slice()) - .map_err(|e| SchemaFetchError::SchemaCompile(e.into()))?; - pool.get_file_by_name(&primary_name).ok_or_else(|| { - SchemaFetchError::SchemaCompile( - anyhow::anyhow!("{primary_name} lost after compilation").into(), - ) - }) - } - } - } -} - -macro_rules! embed_wkts { - [$( $path:literal ),+ $(,)?] => { - &[$( - ( - concat!("google/protobuf/", $path), - include_bytes!(concat!(env!("PROTO_INCLUDE"), "/google/protobuf/", $path)).as_slice(), + let compiled_pb = compile_pb_subject(primary, references)?; + let pool = DescriptorPool::decode(compiled_pb.as_slice()) + .map_err(|e| SchemaFetchError::SchemaCompile(e.into()))?; + pool.get_file_by_name(&primary_name).ok_or_else(|| { + SchemaFetchError::SchemaCompile( + anyhow::anyhow!("{primary_name} lost after compilation").into(), ) - ),+] - }; -} -const WELL_KNOWN_TYPES: &[(&str, &[u8])] = embed_wkts![ - "any.proto", - "api.proto", - "compiler/plugin.proto", - "descriptor.proto", - "duration.proto", - "empty.proto", - "field_mask.proto", - "source_context.proto", - "struct.proto", - "timestamp.proto", - "type.proto", - "wrappers.proto", -]; - -#[derive(Debug, thiserror::Error)] -pub enum PbCompileError { - #[error("build_file_descriptor_set failed\n{}", errs.iter().map(|e| format!("\t{e}")).join("\n"))] - Build { - errs: Vec, - }, - #[error("serialize descriptor set failed")] - Serialize, + }) + } } -pub fn compile_pb( +fn compile_pb_subject( primary_subject: Subject, dependency_subjects: Vec, -) -> Result, PbCompileError> { - use std::iter; - use std::path::Path; - - use protobuf_native::compiler::{ - SimpleErrorCollector, SourceTreeDescriptorDatabase, VirtualSourceTree, - }; - use protobuf_native::MessageLite; - - let mut source_tree = VirtualSourceTree::new(); - for subject in iter::once(&primary_subject).chain(dependency_subjects.iter()) { - source_tree.as_mut().add_file( - Path::new(&subject.name), - subject.schema.content.as_bytes().to_vec(), - ); - } - for (path, bytes) in WELL_KNOWN_TYPES { - source_tree - .as_mut() - .add_file(Path::new(path), bytes.to_vec()); - } - - let mut error_collector = SimpleErrorCollector::new(); - // `db` needs to be dropped before we can iterate on `error_collector`. - let fds = { - let mut db = SourceTreeDescriptorDatabase::new(source_tree.as_mut()); - db.as_mut().record_errors_to(error_collector.as_mut()); - db.as_mut() - .build_file_descriptor_set(&[Path::new(&primary_subject.name)]) - } - .map_err(|_| PbCompileError::Build { - errs: error_collector.as_mut().collect(), - })?; - fds.serialize().map_err(|_| PbCompileError::Serialize) +) -> Result, SchemaFetchError> { + compile_pb( + ( + PathBuf::from(&primary_subject.name), + primary_subject.schema.content.as_bytes().to_vec(), + ), + dependency_subjects + .into_iter() + .map(|s| (PathBuf::from(&s.name), s.schema.content.as_bytes().to_vec())), + ) + .map_err(|e| SchemaFetchError::SchemaCompile(e.into())) } diff --git a/src/connector/src/sink/clickhouse.rs b/src/connector/src/sink/clickhouse.rs index 6b3e78f6a7b9d..07db42790f581 100644 --- a/src/connector/src/sink/clickhouse.rs +++ b/src/connector/src/sink/clickhouse.rs @@ -25,7 +25,6 @@ use risingwave_common::array::{Op, StreamChunk}; use risingwave_common::bitmap::Bitmap; use risingwave_common::catalog::Schema; use risingwave_common::row::Row; -use risingwave_common::session_config::sink_decouple::SinkDecouple; use risingwave_common::types::{DataType, Decimal, ScalarRefImpl, Serial}; use serde::ser::{SerializeSeq, SerializeStruct}; use serde::Serialize; @@ -38,12 +37,10 @@ use with_options::WithOptions; use super::decouple_checkpoint_log_sink::{ default_commit_checkpoint_interval, DecoupleCheckpointLogSinkerOf, - DEFAULT_COMMIT_CHECKPOINT_INTERVAL, }; use super::writer::SinkWriter; use super::{DummySinkCommitCoordinator, SinkWriterParam}; use crate::error::ConnectorResult; -use crate::sink::catalog::desc::SinkDesc; use crate::sink::{ Result, Sink, SinkError, SinkParam, SINK_TYPE_APPEND_ONLY, SINK_TYPE_OPTION, SINK_TYPE_UPSERT, }; @@ -497,29 +494,6 @@ impl Sink for ClickHouseSink { const SINK_NAME: &'static str = CLICKHOUSE_SINK; - fn is_sink_decouple(desc: &SinkDesc, user_specified: &SinkDecouple) -> Result { - let commit_checkpoint_interval = - if let Some(interval) = desc.properties.get("commit_checkpoint_interval") { - interval - .parse::() - .unwrap_or(DEFAULT_COMMIT_CHECKPOINT_INTERVAL) - } else { - DEFAULT_COMMIT_CHECKPOINT_INTERVAL - }; - - match user_specified { - SinkDecouple::Default | SinkDecouple::Enable => Ok(true), - SinkDecouple::Disable => { - if commit_checkpoint_interval > 1 { - return Err(SinkError::Config(anyhow!( - "config conflict: Clickhouse config `commit_checkpoint_interval` larger than 1 means that sink decouple must be enabled, but session config sink_decouple is disabled" - ))); - } - Ok(false) - } - } - } - async fn validate(&self) -> Result<()> { // For upsert clickhouse sink, the primary key must be defined. if !self.is_append_only && self.pk_indices.is_empty() { diff --git a/src/connector/src/sink/coordinate.rs b/src/connector/src/sink/coordinate.rs index c069167870101..fcfb8c0877d6b 100644 --- a/src/connector/src/sink/coordinate.rs +++ b/src/connector/src/sink/coordinate.rs @@ -15,10 +15,12 @@ use std::sync::Arc; use anyhow::anyhow; +use futures::FutureExt; use risingwave_common::array::StreamChunk; use risingwave_common::bitmap::Bitmap; use risingwave_pb::connector_service::SinkMetadata; use risingwave_rpc_client::CoordinatorStreamHandle; +use thiserror_ext::AsReport; use tracing::warn; use super::SinkCoordinationRpcClientEnum; @@ -81,6 +83,23 @@ impl>> SinkWriter for Coordi } async fn update_vnode_bitmap(&mut self, vnode_bitmap: Arc) -> Result<()> { + self.coordinator_stream_handle + .update_vnode_bitmap(&vnode_bitmap) + .await?; self.inner.update_vnode_bitmap(vnode_bitmap).await } } + +impl>> Drop for CoordinatedSinkWriter { + fn drop(&mut self) { + match self.coordinator_stream_handle.stop().now_or_never() { + None => { + warn!("unable to send stop due to channel full") + } + Some(Err(e)) => { + warn!(e = ?e.as_report(), "failed to stop the coordinator"); + } + Some(Ok(_)) => {} + } + } +} diff --git a/src/connector/src/sink/decouple_checkpoint_log_sink.rs b/src/connector/src/sink/decouple_checkpoint_log_sink.rs index 4ba57e3adda7a..59e3335eb36db 100644 --- a/src/connector/src/sink/decouple_checkpoint_log_sink.rs +++ b/src/connector/src/sink/decouple_checkpoint_log_sink.rs @@ -20,10 +20,12 @@ use async_trait::async_trait; use crate::sink::log_store::{LogStoreReadItem, TruncateOffset}; use crate::sink::writer::SinkWriter; use crate::sink::{LogSinker, Result, SinkLogReader, SinkMetrics}; -pub const DEFAULT_COMMIT_CHECKPOINT_INTERVAL: u64 = 10; +pub const DEFAULT_COMMIT_CHECKPOINT_INTERVAL_WITH_SINK_DECOUPLE: u64 = 10; +pub const DEFAULT_COMMIT_CHECKPOINT_INTERVAL_WITHOUT_SINK_DECOUPLE: u64 = 1; +pub const COMMIT_CHECKPOINT_INTERVAL: &str = "commit_checkpoint_interval"; pub fn default_commit_checkpoint_interval() -> u64 { - DEFAULT_COMMIT_CHECKPOINT_INTERVAL + DEFAULT_COMMIT_CHECKPOINT_INTERVAL_WITH_SINK_DECOUPLE } /// The `LogSinker` implementation used for commit-decoupled sinks (such as `Iceberg`, `DeltaLake` and `StarRocks`). @@ -65,7 +67,7 @@ impl> LogSinker for DecoupleCheckpointLogSink EpochBegun { curr_epoch: u64 }, /// Mark that the consumer has just received a barrier - BarrierReceived { prev_epoch: u64 }, + BarrierReceived { prev_epoch: u64, committed: bool }, } let mut state = LogConsumerState::Uninitialized; @@ -75,15 +77,34 @@ impl> LogSinker for DecoupleCheckpointLogSink loop { let (epoch, item): (u64, LogStoreReadItem) = log_reader.next_item().await?; - if let LogStoreReadItem::UpdateVnodeBitmap(_) = &item { - match &state { - LogConsumerState::BarrierReceived { .. } => {} + if let LogStoreReadItem::UpdateVnodeBitmap(vnode_bitmap) = &item { + match &mut state { + LogConsumerState::BarrierReceived { + committed, + prev_epoch, + } => { + if !*committed { + // force commit on update vnode bitmap + let start_time = Instant::now(); + sink_writer.barrier(true).await?; + sink_metrics + .sink_commit_duration_metrics + .observe(start_time.elapsed().as_millis() as f64); + log_reader.truncate(TruncateOffset::Barrier { epoch: *prev_epoch })?; + current_checkpoint = 0; + *committed = true; + } + sink_writer + .update_vnode_bitmap(vnode_bitmap.clone()) + .await?; + } _ => unreachable!( "update vnode bitmap can be accepted only right after \ barrier, but current state is {:?}", state ), } + continue; } // begin_epoch when not previously began state = match state { @@ -100,7 +121,7 @@ impl> LogSinker for DecoupleCheckpointLogSink ); LogConsumerState::EpochBegun { curr_epoch: epoch } } - LogConsumerState::BarrierReceived { prev_epoch } => { + LogConsumerState::BarrierReceived { prev_epoch, .. } => { assert!( epoch > prev_epoch, "new epoch {} should be greater than prev epoch {}", @@ -123,7 +144,7 @@ impl> LogSinker for DecoupleCheckpointLogSink LogConsumerState::EpochBegun { curr_epoch } => curr_epoch, _ => unreachable!("epoch must have begun before handling barrier"), }; - if is_checkpoint { + let committed = if is_checkpoint { current_checkpoint += 1; if current_checkpoint >= commit_checkpoint_interval.get() { let start_time = Instant::now(); @@ -133,16 +154,22 @@ impl> LogSinker for DecoupleCheckpointLogSink .observe(start_time.elapsed().as_millis() as f64); log_reader.truncate(TruncateOffset::Barrier { epoch })?; current_checkpoint = 0; + true } else { sink_writer.barrier(false).await?; + false } } else { sink_writer.barrier(false).await?; + false + }; + state = LogConsumerState::BarrierReceived { + prev_epoch, + committed, } - state = LogConsumerState::BarrierReceived { prev_epoch } } - LogStoreReadItem::UpdateVnodeBitmap(vnode_bitmap) => { - sink_writer.update_vnode_bitmap(vnode_bitmap).await?; + LogStoreReadItem::UpdateVnodeBitmap(_) => { + unreachable!("should have been handle earlier") } } } diff --git a/src/connector/src/sink/deltalake.rs b/src/connector/src/sink/deltalake.rs index 2dedffa3469e3..494adb2dd6fed 100644 --- a/src/connector/src/sink/deltalake.rs +++ b/src/connector/src/sink/deltalake.rs @@ -31,7 +31,6 @@ use risingwave_common::array::StreamChunk; use risingwave_common::bail; use risingwave_common::bitmap::Bitmap; use risingwave_common::catalog::Schema; -use risingwave_common::session_config::sink_decouple::SinkDecouple; use risingwave_common::types::DataType; use risingwave_common::util::iter_util::ZipEqFast; use risingwave_pb::connector_service::sink_metadata::Metadata::Serialized; @@ -41,11 +40,9 @@ use serde_derive::{Deserialize, Serialize}; use serde_with::{serde_as, DisplayFromStr}; use with_options::WithOptions; -use super::catalog::desc::SinkDesc; use super::coordinate::CoordinatedSinkWriter; use super::decouple_checkpoint_log_sink::{ default_commit_checkpoint_interval, DecoupleCheckpointLogSinkerOf, - DEFAULT_COMMIT_CHECKPOINT_INTERVAL, }; use super::writer::SinkWriter; use super::{ @@ -285,29 +282,6 @@ impl Sink for DeltaLakeSink { const SINK_NAME: &'static str = DELTALAKE_SINK; - fn is_sink_decouple(desc: &SinkDesc, user_specified: &SinkDecouple) -> Result { - let commit_checkpoint_interval = - if let Some(interval) = desc.properties.get("commit_checkpoint_interval") { - interval - .parse::() - .unwrap_or(DEFAULT_COMMIT_CHECKPOINT_INTERVAL) - } else { - DEFAULT_COMMIT_CHECKPOINT_INTERVAL - }; - - match user_specified { - SinkDecouple::Default | SinkDecouple::Enable => Ok(true), - SinkDecouple::Disable => { - if commit_checkpoint_interval > 1 { - return Err(SinkError::Config(anyhow!( - "config conflict: DeltaLake config `commit_checkpoint_interval` larger than 1 means that sink decouple must be enabled, but session config sink_decouple is disabled" - ))); - } - Ok(false) - } - } - } - async fn new_log_sinker(&self, writer_param: SinkWriterParam) -> Result { let inner = DeltaLakeSinkWriter::new( self.config.clone(), diff --git a/src/connector/src/sink/encoder/proto.rs b/src/connector/src/sink/encoder/proto.rs index 8046606b5690c..ce6e8503b624e 100644 --- a/src/connector/src/sink/encoder/proto.rs +++ b/src/connector/src/sink/encoder/proto.rs @@ -440,10 +440,10 @@ mod tests { #[test] fn test_encode_proto_ok() { let pool_path = std::path::Path::new(env!("CARGO_MANIFEST_DIR")) - .join("src/test_data/proto_recursive/recursive.pb"); + .join("codec/tests/test_data/all-types.pb"); let pool_bytes = std::fs::read(pool_path).unwrap(); let pool = prost_reflect::DescriptorPool::decode(pool_bytes.as_ref()).unwrap(); - let descriptor = pool.get_message_by_name("recursive.AllTypes").unwrap(); + let descriptor = pool.get_message_by_name("all_types.AllTypes").unwrap(); let schema = Schema::new(vec![ Field::with_name(DataType::Boolean, "bool_field"), Field::with_name(DataType::Varchar, "string_field"), @@ -495,7 +495,7 @@ mod tests { // Hint: write the binary output to a file `test.binpb`, and view it with `protoc`: // ``` // protoc --decode_raw < test.binpb - // protoc --decode=recursive.AllTypes recursive.proto < test.binpb + // protoc --decode=all_types.AllTypes all-types.proto < test.binpb // ``` [ 9, 0, 0, 0, 0, 0, 0, 17, 64, 21, 0, 0, 96, 64, 24, 22, 32, 23, 56, 48, 93, 26, 0, @@ -509,10 +509,10 @@ mod tests { #[test] fn test_encode_proto_repeated() { let pool_path = std::path::Path::new(env!("CARGO_MANIFEST_DIR")) - .join("src/test_data/proto_recursive/recursive.pb"); - let pool_bytes = std::fs::read(pool_path).unwrap(); + .join("codec/tests/test_data/all-types.pb"); + let pool_bytes = fs_err::read(pool_path).unwrap(); let pool = prost_reflect::DescriptorPool::decode(pool_bytes.as_ref()).unwrap(); - let message_descriptor = pool.get_message_by_name("recursive.AllTypes").unwrap(); + let message_descriptor = pool.get_message_by_name("all_types.AllTypes").unwrap(); let schema = Schema::new(vec![Field::with_name( DataType::List(DataType::List(DataType::Int32.into()).into()), @@ -561,10 +561,10 @@ mod tests { #[test] fn test_encode_proto_err() { let pool_path = std::path::Path::new(env!("CARGO_MANIFEST_DIR")) - .join("src/test_data/proto_recursive/recursive.pb"); + .join("codec/tests/test_data/all-types.pb"); let pool_bytes = std::fs::read(pool_path).unwrap(); let pool = prost_reflect::DescriptorPool::decode(pool_bytes.as_ref()).unwrap(); - let message_descriptor = pool.get_message_by_name("recursive.AllTypes").unwrap(); + let message_descriptor = pool.get_message_by_name("all_types.AllTypes").unwrap(); let err = validate_fields( std::iter::once(("not_exists", &DataType::Int16)), diff --git a/src/connector/src/sink/file_sink/opendal_sink.rs b/src/connector/src/sink/file_sink/opendal_sink.rs index 1fd461015b4ba..65ec46f494345 100644 --- a/src/connector/src/sink/file_sink/opendal_sink.rs +++ b/src/connector/src/sink/file_sink/opendal_sink.rs @@ -97,9 +97,6 @@ impl Sink for FileSink { const SINK_NAME: &'static str = S::SINK_NAME; async fn validate(&self) -> Result<()> { - risingwave_common::license::Feature::FileSink - .check_available() - .map_err(|e| anyhow::anyhow!(e))?; if !self.is_append_only { return Err(SinkError::Config(anyhow!( "File sink only supports append-only mode at present. \ diff --git a/src/connector/src/sink/google_pubsub.rs b/src/connector/src/sink/google_pubsub.rs index ea0e0e4776318..ff9079591a2f5 100644 --- a/src/connector/src/sink/google_pubsub.rs +++ b/src/connector/src/sink/google_pubsub.rs @@ -14,11 +14,7 @@ use std::collections::BTreeMap; -use anyhow::{anyhow, Context}; -use futures::future::try_join_all; -use futures::prelude::future::FutureExt; -use futures::prelude::TryFuture; -use futures::TryFutureExt; +use anyhow::anyhow; use google_cloud_gax::conn::Environment; use google_cloud_googleapis::pubsub::v1::PubsubMessage; use google_cloud_pubsub::apiv1; @@ -26,7 +22,7 @@ use google_cloud_pubsub::client::google_cloud_auth::credentials::CredentialsFile use google_cloud_pubsub::client::google_cloud_auth::project; use google_cloud_pubsub::client::google_cloud_auth::token::DefaultTokenSourceProvider; use google_cloud_pubsub::client::{Client, ClientConfig}; -use google_cloud_pubsub::publisher::{Awaiter, Publisher}; +use google_cloud_pubsub::publisher::Publisher; use risingwave_common::array::StreamChunk; use risingwave_common::catalog::Schema; use serde_derive::Deserialize; @@ -46,19 +42,33 @@ use crate::dispatch_sink_formatter_str_key_impl; pub const PUBSUB_SINK: &str = "google_pubsub"; const PUBSUB_SEND_FUTURE_BUFFER_MAX_SIZE: usize = 65536; -fn may_delivery_future(awaiter: Vec) -> GooglePubSubSinkDeliveryFuture { - try_join_all(awaiter.into_iter().map(|awaiter| { - awaiter.get().map(|result| { - result - .context("Google Pub/Sub sink error") - .map_err(SinkError::GooglePubSub) - .map(|_| ()) - }) - })) - .map_ok(|_: Vec<()>| ()) - .boxed() +mod delivery_future { + use anyhow::Context; + use futures::future::try_join_all; + use futures::{FutureExt, TryFuture, TryFutureExt}; + use google_cloud_pubsub::publisher::Awaiter; + + use crate::sink::SinkError; + + pub type GooglePubSubSinkDeliveryFuture = + impl TryFuture + Unpin + 'static; + + pub(super) fn may_delivery_future(awaiter: Vec) -> GooglePubSubSinkDeliveryFuture { + try_join_all(awaiter.into_iter().map(|awaiter| { + awaiter.get().map(|result| { + result + .context("Google Pub/Sub sink error") + .map_err(SinkError::GooglePubSub) + .map(|_| ()) + }) + })) + .map_ok(|_: Vec<()>| ()) + .boxed() + } } +use delivery_future::*; + #[serde_as] #[derive(Clone, Debug, Deserialize, WithOptions)] pub struct GooglePubSubConfig { @@ -172,9 +182,6 @@ struct GooglePubSubPayloadWriter<'w> { add_future: DeliveryFutureManagerAddFuture<'w, GooglePubSubSinkDeliveryFuture>, } -pub type GooglePubSubSinkDeliveryFuture = - impl TryFuture + Unpin + 'static; - impl GooglePubSubSinkWriter { pub async fn new( config: GooglePubSubConfig, diff --git a/src/connector/src/sink/iceberg/jni_catalog.rs b/src/connector/src/sink/iceberg/jni_catalog.rs index b80a6a305870f..6529ea733428d 100644 --- a/src/connector/src/sink/iceberg/jni_catalog.rs +++ b/src/connector/src/sink/iceberg/jni_catalog.rs @@ -288,7 +288,7 @@ impl CatalogV2 for JniCatalog { "Failed to crete iceberg table.", ) .with_source(e) - }) + })? } /// Load table from the catalog. @@ -338,7 +338,7 @@ impl CatalogV2 for JniCatalog { "Failed to load iceberg table.", ) .with_source(e) - }) + })? } /// Drop a table from the catalog. diff --git a/src/connector/src/sink/iceberg/mod.rs b/src/connector/src/sink/iceberg/mod.rs index b68e74b1f5d95..9e87694539f0c 100644 --- a/src/connector/src/sink/iceberg/mod.rs +++ b/src/connector/src/sink/iceberg/mod.rs @@ -43,11 +43,10 @@ use icelake::io_v2::{ DataFileWriterBuilder, EqualityDeltaWriterBuilder, IcebergWriterBuilder, DELETE_OP, INSERT_OP, }; use icelake::transaction::Transaction; -use icelake::types::{data_file_from_json, data_file_to_json, Any, DataFile, COLUMN_ID_META_KEY}; +use icelake::types::{data_file_from_json, data_file_to_json, Any, DataFile}; use icelake::{Table, TableIdentifier}; use itertools::Itertools; -use parquet::arrow::PARQUET_FIELD_ID_META_KEY; -use risingwave_common::array::arrow::IcebergArrowConvert; +use risingwave_common::array::arrow::{IcebergArrowConvert, IcebergCreateTableArrowConvert}; use risingwave_common::array::{Op, StreamChunk}; use risingwave_common::bail; use risingwave_common::bitmap::Bitmap; @@ -65,10 +64,8 @@ use with_options::WithOptions; use self::mock_catalog::MockCatalog; use self::prometheus::monitored_base_file_writer::MonitoredBaseFileWriterBuilder; use self::prometheus::monitored_position_delete_writer::MonitoredPositionDeleteWriterBuilder; -use super::catalog::desc::SinkDesc; use super::decouple_checkpoint_log_sink::{ default_commit_checkpoint_interval, DecoupleCheckpointLogSinkerOf, - DEFAULT_COMMIT_CHECKPOINT_INTERVAL, }; use super::{ Sink, SinkError, SinkWriterParam, SINK_TYPE_APPEND_ONLY, SINK_TYPE_OPTION, SINK_TYPE_UPSERT, @@ -76,7 +73,7 @@ use super::{ use crate::error::ConnectorResult; use crate::sink::coordinate::CoordinatedSinkWriter; use crate::sink::writer::SinkWriter; -use crate::sink::{Result, SinkCommitCoordinator, SinkDecouple, SinkParam}; +use crate::sink::{Result, SinkCommitCoordinator, SinkParam}; use crate::{ deserialize_bool_from_string, deserialize_optional_bool_from_string, deserialize_optional_string_seq_from_string, @@ -672,7 +669,7 @@ impl IcebergConfig { .file_io(storage_catalog.file_io().clone()) // Only support readonly table for storage catalog now. .readonly(true) - .build()) + .build()?) } _ => self.load_table_v2().await, } @@ -747,30 +744,20 @@ impl IcebergSink { bail!("database name must be set if you want to create table") }; + let iceberg_create_table_arrow_convert = IcebergCreateTableArrowConvert::default(); // convert risingwave schema -> arrow schema -> iceberg schema let arrow_fields = self .param .columns .iter() .map(|column| { - let mut arrow_field = IcebergArrowConvert + Ok(iceberg_create_table_arrow_convert .to_arrow_field(&column.name, &column.data_type) .map_err(|e| SinkError::Iceberg(anyhow!(e))) .context(format!( "failed to convert {}: {} to arrow type", &column.name, &column.data_type - ))?; - let mut metadata = HashMap::new(); - metadata.insert( - PARQUET_FIELD_ID_META_KEY.to_string(), - column.column_id.get_id().to_string(), - ); - metadata.insert( - COLUMN_ID_META_KEY.to_string(), - column.column_id.get_id().to_string(), - ); - arrow_field.set_metadata(metadata); - Ok(arrow_field) + ))?) }) .collect::>>()?; let arrow_schema = arrow_schema_iceberg::Schema::new(arrow_fields); @@ -843,31 +830,6 @@ impl Sink for IcebergSink { const SINK_NAME: &'static str = ICEBERG_SINK; - fn is_sink_decouple(desc: &SinkDesc, user_specified: &SinkDecouple) -> Result { - let commit_checkpoint_interval = - desc.properties - .get("commit_checkpoint_interval") - .map(|interval| { - interval - .parse::() - .unwrap_or(DEFAULT_COMMIT_CHECKPOINT_INTERVAL) - }); - - match user_specified { - SinkDecouple::Default | SinkDecouple::Enable => Ok(true), - SinkDecouple::Disable => { - if let Some(commit_checkpoint_interval) = commit_checkpoint_interval - && commit_checkpoint_interval > 1 - { - return Err(SinkError::Config(anyhow!( - "config conflict: Iceberg config `commit_checkpoint_interval` larger than 1 means that sink decouple must be enabled, but session config sink_decouple is disabled" - ))); - } - Ok(false) - } - } - } - async fn validate(&self) -> Result<()> { if "glue".eq_ignore_ascii_case(self.config.catalog_type()) { risingwave_common::license::Feature::IcebergSinkWithGlue @@ -1375,15 +1337,21 @@ pub fn try_matches_arrow_schema( (ArrowDataType::Decimal128(_, _), ArrowDataType::Decimal128(_, _)) => true, (ArrowDataType::Binary, ArrowDataType::LargeBinary) => true, (ArrowDataType::LargeBinary, ArrowDataType::Binary) => true, - (left, right) => left == right, + // cases where left != right (metadata, field name mismatch) + // + // all nested types: in iceberg `field_id` will always be present, but RW doesn't have it: + // {"PARQUET:field_id": ".."} + // + // map: The standard name in arrow is "entries", "key", "value". + // in iceberg-rs, it's called "key_value" + (left, right) => left.equals_datatype(right), }; if !compatible { - bail!("Field {}'s type not compatible, risingwave converted data type {}, iceberg's data type: {}", + bail!("field {}'s type is incompatible\nRisingWave converted data type: {}\niceberg's data type: {}", arrow_field.name(), converted_arrow_data_type, arrow_field.data_type() ); } } - Ok(()) } @@ -1393,7 +1361,7 @@ mod test { use risingwave_common::catalog::Field; - use crate::sink::decouple_checkpoint_log_sink::DEFAULT_COMMIT_CHECKPOINT_INTERVAL; + use crate::sink::decouple_checkpoint_log_sink::DEFAULT_COMMIT_CHECKPOINT_INTERVAL_WITH_SINK_DECOUPLE; use crate::sink::iceberg::IcebergConfig; use crate::source::DataType; @@ -1476,7 +1444,7 @@ mod test { .into_iter() .map(|(k, v)| (k.to_string(), v.to_string())) .collect(), - commit_checkpoint_interval: DEFAULT_COMMIT_CHECKPOINT_INTERVAL, + commit_checkpoint_interval: DEFAULT_COMMIT_CHECKPOINT_INTERVAL_WITH_SINK_DECOUPLE, create_table_if_not_exists: false, }; diff --git a/src/connector/src/sink/iceberg/prometheus/monitored_partition_writer.rs b/src/connector/src/sink/iceberg/prometheus/monitored_partition_writer.rs index d85d712c41ac3..463b1f3c9dbd4 100644 --- a/src/connector/src/sink/iceberg/prometheus/monitored_partition_writer.rs +++ b/src/connector/src/sink/iceberg/prometheus/monitored_partition_writer.rs @@ -27,7 +27,6 @@ pub struct MonitoredFanoutPartitionedWriterBuilder { } impl MonitoredFanoutPartitionedWriterBuilder { - #[expect(dead_code)] pub fn new( inner: FanoutPartitionedWriterBuilder, partition_num: LabelGuardedIntGauge<2>, diff --git a/src/connector/src/sink/iceberg/prometheus/monitored_write_writer.rs b/src/connector/src/sink/iceberg/prometheus/monitored_write_writer.rs index dc44434e5d9c2..aebb5939ff143 100644 --- a/src/connector/src/sink/iceberg/prometheus/monitored_write_writer.rs +++ b/src/connector/src/sink/iceberg/prometheus/monitored_write_writer.rs @@ -28,7 +28,6 @@ pub struct MonitoredWriteWriterBuilder { impl MonitoredWriteWriterBuilder { /// Create writer context. - #[expect(dead_code)] pub fn new( inner: B, write_qps: LabelGuardedIntCounter<2>, diff --git a/src/connector/src/sink/iceberg/storage_catalog.rs b/src/connector/src/sink/iceberg/storage_catalog.rs index 01adb510882a2..18e2ff0e036ff 100644 --- a/src/connector/src/sink/iceberg/storage_catalog.rs +++ b/src/connector/src/sink/iceberg/storage_catalog.rs @@ -249,11 +249,11 @@ impl Catalog for StorageCatalog { let version_hint_output = self.file_io.new_output(&version_hint_path)?; version_hint_output.write("1".into()).await?; - Ok(Table::builder() + Table::builder() .metadata(table_metadata) .identifier(table_ident) .file_io(self.file_io.clone()) - .build()) + .build() } /// Load table from the catalog. @@ -283,13 +283,13 @@ impl Catalog for StorageCatalog { let metadata_file_content = metadata_file.read().await?; let table_metadata = serde_json::from_slice::(&metadata_file_content)?; - Ok(Table::builder() + Table::builder() .metadata(table_metadata) .identifier(table.clone()) .file_io(self.file_io.clone()) // Only support readonly table for storage catalog now. .readonly(true) - .build()) + .build() } /// Drop a table from the catalog. diff --git a/src/connector/src/sink/mod.rs b/src/connector/src/sink/mod.rs index dafbc856207a9..b453af53cca41 100644 --- a/src/connector/src/sink/mod.rs +++ b/src/connector/src/sink/mod.rs @@ -53,6 +53,13 @@ use ::deltalake::DeltaTableError; use ::redis::RedisError; use anyhow::anyhow; use async_trait::async_trait; +use clickhouse::CLICKHOUSE_SINK; +use decouple_checkpoint_log_sink::{ + COMMIT_CHECKPOINT_INTERVAL, DEFAULT_COMMIT_CHECKPOINT_INTERVAL_WITHOUT_SINK_DECOUPLE, + DEFAULT_COMMIT_CHECKPOINT_INTERVAL_WITH_SINK_DECOUPLE, +}; +use deltalake::DELTALAKE_SINK; +use iceberg::ICEBERG_SINK; use opendal::Error as OpendalError; use risingwave_common::array::ArrayError; use risingwave_common::bitmap::Bitmap; @@ -66,6 +73,7 @@ use risingwave_pb::catalog::PbSinkType; use risingwave_pb::connector_service::{PbSinkParam, SinkMetadata, TableSchema}; use risingwave_rpc_client::error::RpcError; use risingwave_rpc_client::MetaClient; +use starrocks::STARROCKS_SINK; use thiserror::Error; use thiserror_ext::AsReport; pub use tracing; @@ -366,13 +374,54 @@ impl SinkWriterParam { } } +fn is_sink_support_commit_checkpoint_interval(sink_name: &str) -> bool { + matches!( + sink_name, + ICEBERG_SINK | CLICKHOUSE_SINK | STARROCKS_SINK | DELTALAKE_SINK + ) +} pub trait Sink: TryFrom { const SINK_NAME: &'static str; type LogSinker: LogSinker; type Coordinator: SinkCommitCoordinator; + fn set_default_commit_checkpoint_interval( + desc: &mut SinkDesc, + user_specified: &SinkDecouple, + ) -> Result<()> { + if is_sink_support_commit_checkpoint_interval(Self::SINK_NAME) { + match desc.properties.get(COMMIT_CHECKPOINT_INTERVAL) { + Some(commit_checkpoint_interval) => { + let commit_checkpoint_interval = commit_checkpoint_interval + .parse::() + .map_err(|e| SinkError::Config(anyhow!(e)))?; + if matches!(user_specified, SinkDecouple::Disable) + && commit_checkpoint_interval > 1 + { + return Err(SinkError::Config(anyhow!("config conflict: `commit_checkpoint_interval` larger than 1 means that sink decouple must be enabled, but session config sink_decouple is disabled"))); + } + } + None => match user_specified { + SinkDecouple::Default | SinkDecouple::Enable => { + desc.properties.insert( + COMMIT_CHECKPOINT_INTERVAL.to_string(), + DEFAULT_COMMIT_CHECKPOINT_INTERVAL_WITH_SINK_DECOUPLE.to_string(), + ); + } + SinkDecouple::Disable => { + desc.properties.insert( + COMMIT_CHECKPOINT_INTERVAL.to_string(), + DEFAULT_COMMIT_CHECKPOINT_INTERVAL_WITHOUT_SINK_DECOUPLE.to_string(), + ); + } + }, + } + } + Ok(()) + } + /// `user_specified` is the value of `sink_decouple` config. - fn is_sink_decouple(_desc: &SinkDesc, user_specified: &SinkDecouple) -> Result { + fn is_sink_decouple(user_specified: &SinkDecouple) -> Result { match user_specified { SinkDecouple::Default | SinkDecouple::Enable => Ok(true), SinkDecouple::Disable => Ok(false), diff --git a/src/connector/src/sink/redis.rs b/src/connector/src/sink/redis.rs index 49207e668e41b..763d7e9bba49a 100644 --- a/src/connector/src/sink/redis.rs +++ b/src/connector/src/sink/redis.rs @@ -288,7 +288,7 @@ impl RedisSinkPayloadWriter { return Ok(()); } } - self.pipe.query(self.conn.as_mut().unwrap()).await?; + self.pipe.query::<()>(self.conn.as_mut().unwrap()).await?; self.pipe.clear(); Ok(()) } diff --git a/src/connector/src/sink/remote.rs b/src/connector/src/sink/remote.rs index 6fcef5d41b654..aa8ca0625d05f 100644 --- a/src/connector/src/sink/remote.rs +++ b/src/connector/src/sink/remote.rs @@ -23,7 +23,6 @@ use async_trait::async_trait; use await_tree::InstrumentAwait; use futures::future::select; use futures::TryStreamExt; -use itertools::Itertools; use jni::JavaVM; use prost::Message; use risingwave_common::array::StreamChunk; @@ -60,7 +59,6 @@ use tracing::warn; use super::elasticsearch::{is_es_sink, StreamChunkConverter, ES_OPTION_DELIMITER}; use crate::error::ConnectorResult; -use crate::sink::catalog::desc::SinkDesc; use crate::sink::coordinate::CoordinatedSinkWriter; use crate::sink::log_store::{LogStoreReadItem, LogStoreResult, TruncateOffset}; use crate::sink::writer::{LogSinkerOf, SinkWriter, SinkWriterExt}; @@ -116,7 +114,7 @@ def_remote_sink!(); pub trait RemoteSinkTrait: Send + Sync + 'static { const SINK_NAME: &'static str; - fn default_sink_decouple(_desc: &SinkDesc) -> bool { + fn default_sink_decouple() -> bool { true } } @@ -144,9 +142,9 @@ impl Sink for RemoteSink { const SINK_NAME: &'static str = R::SINK_NAME; - fn is_sink_decouple(desc: &SinkDesc, user_specified: &SinkDecouple) -> Result { + fn is_sink_decouple(user_specified: &SinkDecouple) -> Result { match user_specified { - SinkDecouple::Default => Ok(R::default_sink_decouple(desc)), + SinkDecouple::Default => Ok(R::default_sink_decouple()), SinkDecouple::Enable => Ok(true), SinkDecouple::Disable => Ok(false), } @@ -175,7 +173,7 @@ async fn validate_remote_sink(param: &SinkParam, sink_name: &str) -> ConnectorRe bail!("Es sink only supports single pk or pk with delimiter option"); } // FIXME: support struct and array in stream sink - param.columns.iter().map(|col| { + param.columns.iter().try_for_each(|col| { match &col.data_type { DataType::Int16 | DataType::Int32 @@ -218,7 +216,7 @@ async fn validate_remote_sink(param: &SinkParam, sink_name: &str) -> ConnectorRe "remote sink supports Int16, Int32, Int64, Float32, Float64, Boolean, Decimal, Time, Date, Interval, Jsonb, Timestamp, Timestamptz, Bytea, List and Varchar, (Es sink support Struct) got {:?}: {:?}", col.name, col.data_type, - )))}}).try_collect()?; + )))}})?; let jvm = JVM.get_or_init()?; let sink_param = param.to_proto(); diff --git a/src/connector/src/sink/starrocks.rs b/src/connector/src/sink/starrocks.rs index 21a4fc371b940..5c3e724721d18 100644 --- a/src/connector/src/sink/starrocks.rs +++ b/src/connector/src/sink/starrocks.rs @@ -24,7 +24,6 @@ use mysql_async::Opts; use risingwave_common::array::{Op, StreamChunk}; use risingwave_common::bitmap::Bitmap; use risingwave_common::catalog::Schema; -use risingwave_common::session_config::sink_decouple::SinkDecouple; use risingwave_common::types::DataType; use risingwave_pb::connector_service::sink_metadata::Metadata::Serialized; use risingwave_pb::connector_service::sink_metadata::SerializedMetadata; @@ -38,7 +37,7 @@ use tokio::task::JoinHandle; use url::form_urlencoded; use with_options::WithOptions; -use super::decouple_checkpoint_log_sink::DEFAULT_COMMIT_CHECKPOINT_INTERVAL; +use super::decouple_checkpoint_log_sink::DEFAULT_COMMIT_CHECKPOINT_INTERVAL_WITH_SINK_DECOUPLE; use super::doris_starrocks_connector::{ HeaderBuilder, InserterInner, StarrocksTxnRequestBuilder, STARROCKS_DELETE_SIGN, STARROCKS_SUCCESS_STATUS, @@ -48,7 +47,6 @@ use super::{ SinkCommitCoordinator, SinkError, SinkParam, SINK_TYPE_APPEND_ONLY, SINK_TYPE_OPTION, SINK_TYPE_UPSERT, }; -use crate::sink::catalog::desc::SinkDesc; use crate::sink::coordinate::CoordinatedSinkWriter; use crate::sink::decouple_checkpoint_log_sink::DecoupleCheckpointLogSinkerOf; use crate::sink::{Result, Sink, SinkWriter, SinkWriterParam}; @@ -118,7 +116,7 @@ pub struct StarrocksConfig { } fn default_commit_checkpoint_interval() -> u64 { - DEFAULT_COMMIT_CHECKPOINT_INTERVAL + DEFAULT_COMMIT_CHECKPOINT_INTERVAL_WITH_SINK_DECOUPLE } impl StarrocksConfig { @@ -264,29 +262,6 @@ impl Sink for StarrocksSink { const SINK_NAME: &'static str = STARROCKS_SINK; - fn is_sink_decouple(desc: &SinkDesc, user_specified: &SinkDecouple) -> Result { - let commit_checkpoint_interval = - if let Some(interval) = desc.properties.get("commit_checkpoint_interval") { - interval - .parse::() - .unwrap_or(DEFAULT_COMMIT_CHECKPOINT_INTERVAL) - } else { - DEFAULT_COMMIT_CHECKPOINT_INTERVAL - }; - - match user_specified { - SinkDecouple::Default | SinkDecouple::Enable => Ok(true), - SinkDecouple::Disable => { - if commit_checkpoint_interval > 1 { - return Err(SinkError::Config(anyhow!( - "config conflict: Starrocks config `commit_checkpoint_interval` larger than 1 means that sink decouple must be enabled, but session config sink_decouple is disabled" - ))); - } - Ok(false) - } - } - } - async fn validate(&self) -> Result<()> { if !self.is_append_only && self.pk_indices.is_empty() { return Err(SinkError::Config(anyhow!( diff --git a/src/connector/src/sink/trivial.rs b/src/connector/src/sink/trivial.rs index 5c5e093c8e0f0..e19f99943338c 100644 --- a/src/connector/src/sink/trivial.rs +++ b/src/connector/src/sink/trivial.rs @@ -17,7 +17,6 @@ use std::marker::PhantomData; use async_trait::async_trait; use risingwave_common::session_config::sink_decouple::SinkDecouple; -use super::catalog::desc::SinkDesc; use crate::sink::log_store::{LogStoreReadItem, TruncateOffset}; use crate::sink::{ DummySinkCommitCoordinator, LogSinker, Result, Sink, SinkError, SinkLogReader, SinkParam, @@ -67,7 +66,7 @@ impl Sink for TrivialSink { const SINK_NAME: &'static str = T::SINK_NAME; // Disable sink decoupling for all trivial sinks because it introduces overhead without any benefit - fn is_sink_decouple(_desc: &SinkDesc, _user_specified: &SinkDecouple) -> Result { + fn is_sink_decouple(_user_specified: &SinkDecouple) -> Result { Ok(false) } diff --git a/src/connector/src/source/cdc/external/mod.rs b/src/connector/src/source/cdc/external/mod.rs index be1c891b8d078..7a73f9b9bce98 100644 --- a/src/connector/src/source/cdc/external/mod.rs +++ b/src/connector/src/source/cdc/external/mod.rs @@ -237,7 +237,12 @@ pub struct ExternalTableConfig { /// Choices include `disabled`, `preferred`, and `required`. /// This field is optional. #[serde(rename = "ssl.mode", default = "Default::default")] - pub sslmode: SslMode, + #[serde(alias = "debezium.database.sslmode")] + pub ssl_mode: SslMode, + + #[serde(rename = "ssl.root.cert")] + #[serde(alias = "debezium.database.sslrootcert")] + pub ssl_root_cert: Option, } impl ExternalTableConfig { @@ -253,7 +258,7 @@ impl ExternalTableConfig { } } -#[derive(Debug, Clone, Deserialize)] +#[derive(Debug, Clone, PartialEq, Deserialize)] #[serde(rename_all = "lowercase")] pub enum SslMode { #[serde(alias = "disable")] @@ -262,6 +267,14 @@ pub enum SslMode { Preferred, #[serde(alias = "require")] Required, + /// verify that the server is trustworthy by checking the certificate chain + /// up to the root certificate stored on the client. + #[serde(alias = "verify-ca")] + VerifyCa, + /// Besides verify the certificate, will also verify that the serverhost name + /// matches the name stored in the server certificate. + #[serde(alias = "verify-full")] + VerifyFull, } impl Default for SslMode { @@ -277,6 +290,8 @@ impl fmt::Display for SslMode { SslMode::Disabled => "disabled", SslMode::Preferred => "preferred", SslMode::Required => "required", + SslMode::VerifyCa => "verify-ca", + SslMode::VerifyFull => "verify-full", }) } } diff --git a/src/connector/src/source/cdc/external/mysql.rs b/src/connector/src/source/cdc/external/mysql.rs index 0e7ec02cfac27..59971f8761068 100644 --- a/src/connector/src/source/cdc/external/mysql.rs +++ b/src/connector/src/source/cdc/external/mysql.rs @@ -85,9 +85,12 @@ impl MySqlExternalTable { .host(&config.host) .port(config.port.parse::().unwrap()) .database(&config.database) - .ssl_mode(match config.sslmode { + .ssl_mode(match config.ssl_mode { SslMode::Disabled | SslMode::Preferred => sqlx::mysql::MySqlSslMode::Disabled, SslMode::Required => sqlx::mysql::MySqlSslMode::Required, + _ => { + return Err(anyhow!("unsupported SSL mode").into()); + } }); let connection = MySqlPool::connect_with(options).await?; @@ -308,9 +311,10 @@ impl MySqlExternalTableReader { .tcp_port(config.port.parse::().unwrap()) .db_name(Some(config.database)); - opts_builder = match config.sslmode { + opts_builder = match config.ssl_mode { SslMode::Disabled | SslMode::Preferred => opts_builder.ssl_opts(None), - SslMode::Required => { + // verify-ca and verify-full are same as required for mysql now + SslMode::Required | SslMode::VerifyCa | SslMode::VerifyFull => { let ssl_without_verify = mysql_async::SslOpts::default() .with_danger_accept_invalid_certs(true) .with_danger_skip_domain_validation(true); @@ -529,7 +533,8 @@ mod tests { database: "mydb".to_string(), schema: "".to_string(), table: "part".to_string(), - sslmode: Default::default(), + ssl_mode: Default::default(), + ssl_root_cert: None, }; let table = MySqlExternalTable::connect(config).await.unwrap(); diff --git a/src/connector/src/source/cdc/external/postgres.rs b/src/connector/src/source/cdc/external/postgres.rs index ca0caf46d6125..9123c7451b74e 100644 --- a/src/connector/src/source/cdc/external/postgres.rs +++ b/src/connector/src/source/cdc/external/postgres.rs @@ -86,18 +86,26 @@ pub struct PostgresExternalTable { impl PostgresExternalTable { pub async fn connect(config: ExternalTableConfig) -> ConnectorResult { tracing::debug!("connect to postgres external table"); - let options = PgConnectOptions::new() + let mut options = PgConnectOptions::new() .username(&config.username) .password(&config.password) .host(&config.host) .port(config.port.parse::().unwrap()) .database(&config.database) - .ssl_mode(match config.sslmode { + .ssl_mode(match config.ssl_mode { SslMode::Disabled => PgSslMode::Disable, SslMode::Preferred => PgSslMode::Prefer, SslMode::Required => PgSslMode::Require, + SslMode::VerifyCa => PgSslMode::VerifyCa, + SslMode::VerifyFull => PgSslMode::VerifyFull, }); + if config.ssl_mode == SslMode::VerifyCa || config.ssl_mode == SslMode::VerifyFull { + if let Some(ref root_cert) = config.ssl_root_cert { + options = options.ssl_root_cert(root_cert.as_str()); + } + } + let connection = PgPool::connect_with(options).await?; let schema_discovery = SchemaDiscovery::new(connection, config.schema.as_str()); // fetch column schema and primary key @@ -288,8 +296,14 @@ impl PostgresExternalTableReader { .port(config.port.parse::().unwrap()) .dbname(&config.database); + let (_verify_ca, verify_hostname) = match config.ssl_mode { + SslMode::VerifyCa => (true, false), + SslMode::VerifyFull => (true, true), + _ => (false, false), + }; + #[cfg(not(madsim))] - let connector = match config.sslmode { + let connector = match config.ssl_mode { SslMode::Disabled => { pg_config.ssl_mode(tokio_postgres::config::SslMode::Disable); MaybeMakeTlsConnector::NoTls(NoTls) @@ -315,6 +329,24 @@ impl PostgresExternalTableReader { builder.set_verify(SslVerifyMode::NONE); MaybeMakeTlsConnector::Tls(MakeTlsConnector::new(builder.build())) } + + SslMode::VerifyCa | SslMode::VerifyFull => { + pg_config.ssl_mode(tokio_postgres::config::SslMode::Require); + let mut builder = SslConnector::builder(SslMethod::tls())?; + if let Some(ssl_root_cert) = config.ssl_root_cert { + builder.set_ca_file(ssl_root_cert).map_err(|e| { + anyhow!(format!("bad ssl root cert error: {}", e.to_report_string())) + })?; + } + let mut connector = MakeTlsConnector::new(builder.build()); + if !verify_hostname { + connector.set_callback(|config, _| { + config.set_verify_hostname(false); + Ok(()) + }); + } + MaybeMakeTlsConnector::Tls(connector) + } }; #[cfg(madsim)] let connector = NoTls; @@ -482,7 +514,8 @@ mod tests { database: "mydb".to_string(), schema: "public".to_string(), table: "mytest".to_string(), - sslmode: Default::default(), + ssl_mode: Default::default(), + ssl_root_cert: None, }; let table = PostgresExternalTable::connect(config).await.unwrap(); diff --git a/src/connector/src/source/cdc/source/reader.rs b/src/connector/src/source/cdc/source/reader.rs index b29ef1312bbd9..e2fc405cd6297 100644 --- a/src/connector/src/source/cdc/source/reader.rs +++ b/src/connector/src/source/cdc/source/reader.rs @@ -213,15 +213,15 @@ impl CdcSplitReader { let mut rx = self.rx; let source_id = self.source_id.to_string(); let metrics = self.source_ctx.metrics.clone(); + let connector_source_rows_received_metrics = metrics + .connector_source_rows_received + .with_guarded_label_values(&[source_type.as_str_name(), &source_id]); while let Some(result) = rx.recv().await { match result { Ok(GetEventStreamResponse { events, .. }) => { tracing::trace!("receive {} cdc events ", events.len()); - metrics - .connector_source_rows_received - .with_guarded_label_values(&[source_type.as_str_name(), &source_id]) - .inc_by(events.len() as u64); + connector_source_rows_received_metrics.inc_by(events.len() as u64); let msgs = events.into_iter().map(SourceMessage::from).collect_vec(); yield msgs; } diff --git a/src/connector/src/source/common.rs b/src/connector/src/source/common.rs index 3acb85a87150e..80aacff2899c7 100644 --- a/src/connector/src/source/common.rs +++ b/src/connector/src/source/common.rs @@ -11,6 +11,7 @@ // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. +use std::collections::HashMap; use futures::{Stream, StreamExt, TryStreamExt}; use futures_async_stream::try_stream; @@ -33,6 +34,8 @@ pub(crate) async fn into_chunk_stream( let source_id = source_ctx.source_id.to_string(); let source_name = source_ctx.source_name.to_string(); let metrics = source_ctx.metrics.clone(); + let mut partition_input_count = HashMap::new(); + let mut partition_bytes_count = HashMap::new(); // add metrics to the data stream let data_stream = data_stream @@ -40,22 +43,38 @@ pub(crate) async fn into_chunk_stream( let mut by_split_id = std::collections::HashMap::new(); for msg in data_batch { + let split_id: String = msg.split_id.as_ref().to_string(); by_split_id - .entry(msg.split_id.as_ref()) + .entry(split_id.clone()) .or_insert_with(Vec::new) .push(msg); + partition_input_count + .entry(split_id.clone()) + .or_insert_with(|| { + metrics.partition_input_count.with_guarded_label_values(&[ + &actor_id, + &source_id, + &split_id.clone(), + &source_name, + &fragment_id, + ]) + }); + partition_bytes_count + .entry(split_id.clone()) + .or_insert_with(|| { + metrics.partition_input_bytes.with_guarded_label_values(&[ + &actor_id, + &source_id, + &split_id, + &source_name, + &fragment_id, + ]) + }); } - for (split_id, msgs) in by_split_id { - metrics - .partition_input_count - .with_guarded_label_values(&[ - &actor_id, - &source_id, - split_id, - &source_name, - &fragment_id, - ]) + partition_input_count + .get_mut(&split_id) + .unwrap() .inc_by(msgs.len() as u64); let sum_bytes = msgs @@ -63,15 +82,9 @@ pub(crate) async fn into_chunk_stream( .flat_map(|msg| msg.payload.as_ref().map(|p| p.len() as u64)) .sum(); - metrics - .partition_input_bytes - .with_guarded_label_values(&[ - &actor_id, - &source_id, - split_id, - &source_name, - &fragment_id, - ]) + partition_input_count + .get_mut(&split_id) + .unwrap() .inc_by(sum_bytes); } }) diff --git a/src/connector/src/source/datagen/source/reader.rs b/src/connector/src/source/datagen/source/reader.rs index e6c6db6af5a71..33c0c4ea29261 100644 --- a/src/connector/src/source/datagen/source/reader.rs +++ b/src/connector/src/source/datagen/source/reader.rs @@ -18,6 +18,7 @@ use anyhow::Context; use async_trait::async_trait; use futures::{Stream, StreamExt, TryStreamExt}; use risingwave_common::field_generator::{FieldGeneratorImpl, VarcharProperty}; +use risingwave_common_estimate_size::EstimateSize; use thiserror_ext::AsReport; use super::generator::DatagenEventGenerator; @@ -156,20 +157,30 @@ impl SplitReader for DatagenSplitReader { let source_name = self.source_ctx.source_name.to_string(); let split_id = self.split_id.to_string(); let metrics = self.source_ctx.metrics.clone(); + let partition_input_count_metric = + metrics.partition_input_count.with_guarded_label_values(&[ + &actor_id, + &source_id, + &split_id, + &source_name, + &fragment_id, + ]); + let partition_input_bytes_metric = + metrics.partition_input_bytes.with_guarded_label_values(&[ + &actor_id, + &source_id, + &split_id, + &source_name, + &fragment_id, + ]); + spawn_data_generation_stream( self.generator .into_native_stream() .inspect_ok(move |stream_chunk| { - metrics - .partition_input_count - .with_guarded_label_values(&[ - &actor_id, - &source_id, - &split_id, - &source_name, - &fragment_id, - ]) - .inc_by(stream_chunk.cardinality() as u64); + partition_input_count_metric.inc_by(stream_chunk.cardinality() as u64); + partition_input_bytes_metric + .inc_by(stream_chunk.estimated_size() as u64); }), BUFFER_SIZE, ) diff --git a/src/connector/src/source/filesystem/opendal_source/azblob_source.rs b/src/connector/src/source/filesystem/opendal_source/azblob_source.rs index 2ee050f21f812..8c6dac01ab87b 100644 --- a/src/connector/src/source/filesystem/opendal_source/azblob_source.rs +++ b/src/connector/src/source/filesystem/opendal_source/azblob_source.rs @@ -66,7 +66,6 @@ impl OpendalEnumerator { }; let compression_format = azblob_properties.compression_format; - Ok(Self { op, prefix, diff --git a/src/connector/src/source/filesystem/opendal_source/gcs_source.rs b/src/connector/src/source/filesystem/opendal_source/gcs_source.rs index 768f19fc36722..9a6d883f3c922 100644 --- a/src/connector/src/source/filesystem/opendal_source/gcs_source.rs +++ b/src/connector/src/source/filesystem/opendal_source/gcs_source.rs @@ -60,7 +60,6 @@ impl OpendalEnumerator { }; let compression_format = gcs_properties.compression_format; - Ok(Self { op, prefix, diff --git a/src/connector/src/source/filesystem/opendal_source/mod.rs b/src/connector/src/source/filesystem/opendal_source/mod.rs index cbb3c2a9c7b85..c0b4898758a79 100644 --- a/src/connector/src/source/filesystem/opendal_source/mod.rs +++ b/src/connector/src/source/filesystem/opendal_source/mod.rs @@ -48,7 +48,6 @@ pub struct FsSourceCommon { #[serde_as(as = "Option")] pub refresh_interval_sec: Option, } - #[derive(Clone, Debug, Deserialize, PartialEq, WithOptions)] pub struct GcsProperties { #[serde(rename = "gcs.bucket_name")] diff --git a/src/connector/src/source/filesystem/opendal_source/opendal_enumerator.rs b/src/connector/src/source/filesystem/opendal_source/opendal_enumerator.rs index 7396eac2ea38e..c9788aed28e69 100644 --- a/src/connector/src/source/filesystem/opendal_source/opendal_enumerator.rs +++ b/src/connector/src/source/filesystem/opendal_source/opendal_enumerator.rs @@ -72,7 +72,7 @@ impl OpendalEnumerator { let object_lister = self .op .lister_with(prefix) - .recursive(false) + .recursive(true) .metakey(Metakey::ContentLength | Metakey::LastModified) .await?; let stream = stream::unfold(object_lister, |mut object_lister| async move { @@ -108,5 +108,9 @@ impl OpendalEnumerator { pub fn get_matcher(&self) -> &Option { &self.matcher } + + pub fn get_prefix(&self) -> &str { + self.prefix.as_deref().unwrap_or("/") + } } pub type ObjectMetadataIter = BoxStream<'static, ConnectorResult>; diff --git a/src/connector/src/source/filesystem/opendal_source/opendal_reader.rs b/src/connector/src/source/filesystem/opendal_source/opendal_reader.rs index 5757452d2b4cd..1cfc9c1355167 100644 --- a/src/connector/src/source/filesystem/opendal_source/opendal_reader.rs +++ b/src/connector/src/source/filesystem/opendal_source/opendal_reader.rs @@ -176,6 +176,16 @@ impl OpendalReader { let mut offset: usize = split.offset; let mut batch_size: usize = 0; let mut batch = Vec::new(); + let partition_input_bytes_metrics = source_ctx + .metrics + .partition_input_bytes + .with_guarded_label_values(&[ + &actor_id, + &source_id, + &split_id, + &source_name, + &fragment_id, + ]); let stream = ReaderStream::with_capacity(buf_reader, STREAM_READER_CAPACITY); #[for_await] for read in stream { @@ -193,34 +203,14 @@ impl OpendalReader { batch.push(msg); if batch.len() >= max_chunk_size { - source_ctx - .metrics - .partition_input_bytes - .with_guarded_label_values(&[ - &actor_id, - &source_id, - &split_id, - &source_name, - &fragment_id, - ]) - .inc_by(batch_size as u64); + partition_input_bytes_metrics.inc_by(batch_size as u64); let yield_batch = std::mem::take(&mut batch); batch_size = 0; yield yield_batch; } } if !batch.is_empty() { - source_ctx - .metrics - .partition_input_bytes - .with_guarded_label_values(&[ - &actor_id, - &source_id, - &split_id, - &source_name, - &fragment_id, - ]) - .inc_by(batch_size as u64); + partition_input_bytes_metrics.inc_by(batch_size as u64); yield batch; } } diff --git a/src/connector/src/source/filesystem/s3/source/reader.rs b/src/connector/src/source/filesystem/s3/source/reader.rs index 7e02102686d00..910c98c1a5dae 100644 --- a/src/connector/src/source/filesystem/s3/source/reader.rs +++ b/src/connector/src/source/filesystem/s3/source/reader.rs @@ -106,6 +106,16 @@ impl S3FileReader { let mut offset: usize = split.offset; let mut batch_size: usize = 0; let mut batch = Vec::new(); + let partition_input_bytes_metrics = source_ctx + .metrics + .partition_input_bytes + .with_guarded_label_values(&[ + &actor_id, + &source_id, + &split_id, + &source_name, + &fragment_id, + ]); #[for_await] for read in stream { let bytes = read?; @@ -121,34 +131,14 @@ impl S3FileReader { batch_size += len; batch.push(msg); if batch.len() >= max_chunk_size { - source_ctx - .metrics - .partition_input_bytes - .with_guarded_label_values(&[ - &actor_id, - &source_id, - &split_id, - &source_name, - &fragment_id, - ]) - .inc_by(batch_size as u64); + partition_input_bytes_metrics.inc_by(batch_size as u64); let yield_batch = std::mem::take(&mut batch); batch_size = 0; yield yield_batch; } } if !batch.is_empty() { - source_ctx - .metrics - .partition_input_bytes - .with_guarded_label_values(&[ - &actor_id, - &source_id, - &split_id, - &source_name, - &fragment_id, - ]) - .inc_by(batch_size as u64); + partition_input_bytes_metrics.inc_by(batch_size as u64); yield batch; } } diff --git a/src/connector/src/source/iceberg/mod.rs b/src/connector/src/source/iceberg/mod.rs index d65929faafba1..845ffb66804d3 100644 --- a/src/connector/src/source/iceberg/mod.rs +++ b/src/connector/src/source/iceberg/mod.rs @@ -21,6 +21,7 @@ use async_trait::async_trait; use futures_async_stream::for_await; use iceberg::scan::FileScanTask; use iceberg::spec::TableMetadata; +use iceberg::table::Table; use itertools::Itertools; pub use parquet_file_reader::*; use risingwave_common::bail; @@ -28,7 +29,7 @@ use risingwave_common::catalog::Schema; use risingwave_common::types::JsonbVal; use serde::{Deserialize, Serialize}; -use crate::error::ConnectorResult; +use crate::error::{ConnectorError, ConnectorResult}; use crate::parser::ParserConfig; use crate::sink::iceberg::IcebergConfig; use crate::source::{ @@ -144,6 +145,7 @@ pub struct IcebergSplit { pub snapshot_id: i64, pub table_meta: TableMetadataJsonStr, pub files: Vec, + pub eq_delete_files: Vec, } impl SplitMetaData for IcebergSplit { @@ -206,6 +208,7 @@ impl IcebergSplitEnumerator { bail!("Batch parallelism is 0. Cannot split the iceberg files."); } let table = self.config.load_table_v2().await?; + let current_snapshot = table.metadata().current_snapshot(); if current_snapshot.is_none() { // If there is no snapshot, we will return a mock `IcebergSplit` with empty files. @@ -214,6 +217,7 @@ impl IcebergSplitEnumerator { snapshot_id: 0, // unused table_meta: TableMetadataJsonStr::serialize(table.metadata()), files: vec![], + eq_delete_files: vec![], }]); } @@ -228,10 +232,13 @@ impl IcebergSplitEnumerator { let snapshot = table .metadata() .snapshots() - .filter(|snapshot| snapshot.timestamp().timestamp_millis() <= timestamp) - .max_by_key(|snapshot| snapshot.timestamp().timestamp_millis()); + .map(|snapshot| snapshot.timestamp().map(|ts| ts.timestamp_millis())) + .collect::, _>>()? + .into_iter() + .filter(|&snapshot_millis| snapshot_millis <= timestamp) + .max_by_key(|&snapshot_millis| snapshot_millis); match snapshot { - Some(snapshot) => snapshot.snapshot_id(), + Some(snapshot) => snapshot, None => { // convert unix time to human readable time let time = chrono::DateTime::from_timestamp_millis(timestamp); @@ -248,12 +255,15 @@ impl IcebergSplitEnumerator { current_snapshot.unwrap().snapshot_id() } }; - let mut files = vec![]; + let require_names = Self::get_require_field_names(&table, snapshot_id, schema).await?; + + let mut data_files = vec![]; + let mut eq_delete_files = vec![]; let scan = table .scan() .snapshot_id(snapshot_id) - .select(schema.names()) + .select(require_names) .build() .map_err(|e| anyhow!(e))?; @@ -261,16 +271,27 @@ impl IcebergSplitEnumerator { #[for_await] for task in file_scan_stream { - let task = task.map_err(|e| anyhow!(e))?; - files.push(IcebergFileScanTaskJsonStr::serialize(&task)); + let mut task: FileScanTask = task.map_err(|e| anyhow!(e))?; + match task.data_file_content { + iceberg::spec::DataContentType::Data => { + data_files.push(IcebergFileScanTaskJsonStr::serialize(&task)); + } + iceberg::spec::DataContentType::EqualityDeletes => { + task.project_field_ids = task.equality_ids.clone(); + eq_delete_files.push(IcebergFileScanTaskJsonStr::serialize(&task)); + } + iceberg::spec::DataContentType::PositionDeletes => { + bail!("Position delete file is not supported") + } + } } let table_meta = TableMetadataJsonStr::serialize(table.metadata()); let split_num = batch_parallelism; // evenly split the files into splits based on the parallelism. - let split_size = files.len() / split_num; - let remaining = files.len() % split_num; + let split_size = data_files.len() / split_num; + let remaining = data_files.len() % split_num; let mut splits = vec![]; for i in 0..split_num { let start = i * split_size; @@ -279,20 +300,62 @@ impl IcebergSplitEnumerator { split_id: i as i64, snapshot_id, table_meta: table_meta.clone(), - files: files[start..end].to_vec(), + files: data_files[start..end].to_vec(), + eq_delete_files: eq_delete_files.clone(), }; splits.push(split); } for i in 0..remaining { splits[i] .files - .push(files[split_num * split_size + i].clone()); + .push(data_files[split_num * split_size + i].clone()); } Ok(splits .into_iter() .filter(|split| !split.files.is_empty()) .collect_vec()) } + + async fn get_require_field_names( + table: &Table, + snapshot_id: i64, + rw_schema: Schema, + ) -> ConnectorResult> { + let scan = table + .scan() + .snapshot_id(snapshot_id) + .build() + .map_err(|e| anyhow!(e))?; + let file_scan_stream = scan.plan_files().await.map_err(|e| anyhow!(e))?; + let schema = scan.snapshot().schema(table.metadata())?; + let mut equality_ids = vec![]; + #[for_await] + for task in file_scan_stream { + let task: FileScanTask = task.map_err(|e| anyhow!(e))?; + if task.data_file_content == iceberg::spec::DataContentType::EqualityDeletes { + if equality_ids.is_empty() { + equality_ids = task.equality_ids; + } else if equality_ids != task.equality_ids { + bail!("The schema of iceberg equality delete file must be consistent"); + } + } + } + let delete_columns = equality_ids + .into_iter() + .map(|id| match schema.name_by_field_id(id) { + Some(name) => Ok::(name.to_string()), + None => bail!("Delete field id {} not found in schema", id), + }) + .collect::>>()?; + let mut require_field_names: Vec<_> = rw_schema.names().to_vec(); + // Add the delete columns to the required field names + for names in delete_columns { + if !require_field_names.contains(&names) { + require_field_names.push(names); + } + } + Ok(require_field_names) + } } #[derive(Debug)] diff --git a/src/connector/src/source/kafka/enumerator/client.rs b/src/connector/src/source/kafka/enumerator/client.rs index 5551c12b433b3..a425de418ef4a 100644 --- a/src/connector/src/source/kafka/enumerator/client.rs +++ b/src/connector/src/source/kafka/enumerator/client.rs @@ -17,10 +17,12 @@ use std::time::Duration; use anyhow::{anyhow, Context}; use async_trait::async_trait; +use prometheus::core::{AtomicI64, GenericGauge}; use rdkafka::consumer::{BaseConsumer, Consumer}; use rdkafka::error::KafkaResult; use rdkafka::{Offset, TopicPartitionList}; use risingwave_common::bail; +use risingwave_common::metrics::LabelGuardedMetric; use crate::error::ConnectorResult; use crate::source::base::SplitEnumerator; @@ -49,6 +51,7 @@ pub struct KafkaSplitEnumerator { stop_offset: KafkaEnumeratorOffset, sync_call_timeout: Duration, + high_watermark_metrics: HashMap, 2>>, } impl KafkaSplitEnumerator {} @@ -124,6 +127,7 @@ impl SplitEnumerator for KafkaSplitEnumerator { start_offset: scan_start_offset, stop_offset: KafkaEnumeratorOffset::None, sync_call_timeout: properties.common.sync_call_timeout, + high_watermark_metrics: HashMap::new(), }) } @@ -160,7 +164,10 @@ impl SplitEnumerator for KafkaSplitEnumerator { } impl KafkaSplitEnumerator { - async fn get_watermarks(&self, partitions: &[i32]) -> KafkaResult> { + async fn get_watermarks( + &mut self, + partitions: &[i32], + ) -> KafkaResult> { let mut map = HashMap::new(); for partition in partitions { let (low, high) = self @@ -358,15 +365,20 @@ impl KafkaSplitEnumerator { } #[inline] - fn report_high_watermark(&self, partition: i32, offset: i64) { - self.context - .metrics - .high_watermark - .with_guarded_label_values(&[ - &self.context.info.source_id.to_string(), - &partition.to_string(), - ]) - .set(offset); + fn report_high_watermark(&mut self, partition: i32, offset: i64) { + let high_watermark_metrics = + self.high_watermark_metrics + .entry(partition) + .or_insert_with(|| { + self.context + .metrics + .high_watermark + .with_guarded_label_values(&[ + &self.context.info.source_id.to_string(), + &partition.to_string(), + ]) + }); + high_watermark_metrics.set(offset); } pub async fn check_reachability(&self) -> bool { diff --git a/src/connector/src/source/kafka/source/reader.rs b/src/connector/src/source/kafka/source/reader.rs index 72d4c36377c81..d58f1b70dd9fc 100644 --- a/src/connector/src/source/kafka/source/reader.rs +++ b/src/connector/src/source/kafka/source/reader.rs @@ -21,10 +21,12 @@ use anyhow::Context; use async_trait::async_trait; use futures::StreamExt; use futures_async_stream::try_stream; +use prometheus::core::{AtomicI64, GenericGauge}; use rdkafka::config::RDKafkaLogLevel; use rdkafka::consumer::{Consumer, StreamConsumer}; use rdkafka::error::KafkaError; use rdkafka::{ClientConfig, Message, Offset, TopicPartitionList}; +use risingwave_common::metrics::LabelGuardedMetric; use risingwave_pb::plan_common::additional_column::ColumnType as AdditionalColumnType; use crate::error::ConnectorResult as Result; @@ -185,21 +187,6 @@ impl SplitReader for KafkaSplitReader { } } -impl KafkaSplitReader { - fn report_latest_message_id(&self, split_id: &str, offset: i64) { - self.source_ctx - .metrics - .latest_message_id - .with_guarded_label_values(&[ - // source name is not available here - &self.source_ctx.source_id.to_string(), - &self.source_ctx.actor_id.to_string(), - split_id, - ]) - .set(offset); - } -} - impl KafkaSplitReader { #[try_stream(ok = Vec, error = crate::error::ConnectorError)] async fn into_data_stream(self) { @@ -236,6 +223,11 @@ impl KafkaSplitReader { ) }); + let mut latest_message_id_metrics: HashMap< + String, + LabelGuardedMetric, 3>, + > = HashMap::new(); + #[for_await] 'for_outer_loop: for msgs in self.consumer.stream().ready_chunks(max_chunk_size) { let msgs: Vec<_> = msgs @@ -250,7 +242,20 @@ impl KafkaSplitReader { for (partition, offset) in split_msg_offsets { let split_id = partition.to_string(); - self.report_latest_message_id(&split_id, offset); + latest_message_id_metrics + .entry(split_id.clone()) + .or_insert_with(|| { + self.source_ctx + .metrics + .latest_message_id + .with_guarded_label_values(&[ + // source name is not available here + &self.source_ctx.source_id.to_string(), + &self.source_ctx.actor_id.to_string(), + &split_id, + ]) + }) + .set(offset); } for msg in msgs { diff --git a/src/connector/src/source/kafka/stats.rs b/src/connector/src/source/kafka/stats.rs index 679f5c24bd2a1..7a36c4d1fffea 100644 --- a/src/connector/src/source/kafka/stats.rs +++ b/src/connector/src/source/kafka/stats.rs @@ -12,34 +12,37 @@ // See the License for the specific language governing permissions and // limitations under the License. -use prometheus::core::{AtomicU64, GenericGaugeVec}; -use prometheus::{register_int_gauge_vec_with_registry, IntGaugeVec, Registry}; +use prometheus::core::AtomicU64; +use prometheus::Registry; use rdkafka::statistics::{Broker, ConsumerGroup, Partition, Topic, Window}; use rdkafka::Statistics; -use risingwave_common::metrics::register_uint_gauge_vec_with_registry; +use risingwave_common::metrics::{LabelGuardedIntGaugeVec, LabelGuardedUintGaugeVec}; +use risingwave_common::{ + register_guarded_int_gauge_vec_with_registry, register_guarded_uint_gauge_vec_with_registry, +}; #[derive(Debug, Clone)] pub struct RdKafkaStats { pub registry: Registry, - pub ts: IntGaugeVec, - pub time: IntGaugeVec, - pub age: IntGaugeVec, - pub replyq: IntGaugeVec, - pub msg_cnt: GenericGaugeVec, - pub msg_size: GenericGaugeVec, - pub msg_max: GenericGaugeVec, - pub msg_size_max: GenericGaugeVec, - pub tx: IntGaugeVec, - pub tx_bytes: IntGaugeVec, - pub rx: IntGaugeVec, - pub rx_bytes: IntGaugeVec, - pub tx_msgs: IntGaugeVec, - pub tx_msgs_bytes: IntGaugeVec, - pub rx_msgs: IntGaugeVec, - pub rx_msgs_bytes: IntGaugeVec, - pub simple_cnt: IntGaugeVec, - pub metadata_cache_cnt: IntGaugeVec, + pub ts: LabelGuardedIntGaugeVec<2>, + pub time: LabelGuardedIntGaugeVec<2>, + pub age: LabelGuardedIntGaugeVec<2>, + pub replyq: LabelGuardedIntGaugeVec<2>, + pub msg_cnt: LabelGuardedUintGaugeVec<2>, + pub msg_size: LabelGuardedUintGaugeVec<2>, + pub msg_max: LabelGuardedUintGaugeVec<2>, + pub msg_size_max: LabelGuardedUintGaugeVec<2>, + pub tx: LabelGuardedIntGaugeVec<2>, + pub tx_bytes: LabelGuardedIntGaugeVec<2>, + pub rx: LabelGuardedIntGaugeVec<2>, + pub rx_bytes: LabelGuardedIntGaugeVec<2>, + pub tx_msgs: LabelGuardedIntGaugeVec<2>, + pub tx_msgs_bytes: LabelGuardedIntGaugeVec<2>, + pub rx_msgs: LabelGuardedIntGaugeVec<2>, + pub rx_msgs_bytes: LabelGuardedIntGaugeVec<2>, + pub simple_cnt: LabelGuardedIntGaugeVec<2>, + pub metadata_cache_cnt: LabelGuardedIntGaugeVec<2>, pub broker_stats: BrokerStats, pub topic_stats: TopicStats, @@ -50,29 +53,29 @@ pub struct RdKafkaStats { pub struct BrokerStats { pub registry: Registry, - pub state_age: IntGaugeVec, - pub outbuf_cnt: IntGaugeVec, - pub outbuf_msg_cnt: IntGaugeVec, - pub waitresp_cnt: IntGaugeVec, - pub waitresp_msg_cnt: IntGaugeVec, - pub tx: GenericGaugeVec, - pub tx_bytes: GenericGaugeVec, - pub tx_errs: GenericGaugeVec, - pub tx_retries: GenericGaugeVec, - pub tx_idle: IntGaugeVec, - pub req_timeouts: GenericGaugeVec, - pub rx: GenericGaugeVec, - pub rx_bytes: GenericGaugeVec, - pub rx_errs: GenericGaugeVec, - pub rx_corriderrs: GenericGaugeVec, - pub rx_partial: GenericGaugeVec, - pub rx_idle: IntGaugeVec, - pub req: IntGaugeVec, - pub zbuf_grow: GenericGaugeVec, - pub buf_grow: GenericGaugeVec, - pub wakeups: GenericGaugeVec, - pub connects: IntGaugeVec, - pub disconnects: IntGaugeVec, + pub state_age: LabelGuardedIntGaugeVec<4>, + pub outbuf_cnt: LabelGuardedIntGaugeVec<4>, + pub outbuf_msg_cnt: LabelGuardedIntGaugeVec<4>, + pub waitresp_cnt: LabelGuardedIntGaugeVec<4>, + pub waitresp_msg_cnt: LabelGuardedIntGaugeVec<4>, + pub tx: LabelGuardedUintGaugeVec<4>, + pub tx_bytes: LabelGuardedUintGaugeVec<4>, + pub tx_errs: LabelGuardedUintGaugeVec<4>, + pub tx_retries: LabelGuardedUintGaugeVec<4>, + pub tx_idle: LabelGuardedIntGaugeVec<4>, + pub req_timeouts: LabelGuardedUintGaugeVec<4>, + pub rx: LabelGuardedUintGaugeVec<4>, + pub rx_bytes: LabelGuardedUintGaugeVec<4>, + pub rx_errs: LabelGuardedUintGaugeVec<4>, + pub rx_corriderrs: LabelGuardedUintGaugeVec<4>, + pub rx_partial: LabelGuardedUintGaugeVec<4>, + pub rx_idle: LabelGuardedIntGaugeVec<4>, + pub req: LabelGuardedIntGaugeVec<5>, + pub zbuf_grow: LabelGuardedUintGaugeVec<4>, + pub buf_grow: LabelGuardedUintGaugeVec<4>, + pub wakeups: LabelGuardedUintGaugeVec<4>, + pub connects: LabelGuardedIntGaugeVec<4>, + pub disconnects: LabelGuardedIntGaugeVec<4>, pub int_latency: StatsWindow, pub outbuf_latency: StatsWindow, pub rtt: StatsWindow, @@ -83,7 +86,7 @@ pub struct BrokerStats { pub struct TopicStats { pub registry: Registry, - pub metadata_age: IntGaugeVec, + pub metadata_age: LabelGuardedIntGaugeVec<3>, pub batch_size: StatsWindow, pub batch_cnt: StatsWindow, pub partitions: PartitionStats, @@ -93,58 +96,58 @@ pub struct TopicStats { pub struct StatsWindow { pub registry: Registry, - pub min: IntGaugeVec, - pub max: IntGaugeVec, - pub avg: IntGaugeVec, - pub sum: IntGaugeVec, - pub cnt: IntGaugeVec, - pub stddev: IntGaugeVec, - pub hdr_size: IntGaugeVec, - pub p50: IntGaugeVec, - pub p75: IntGaugeVec, - pub p90: IntGaugeVec, - pub p95: IntGaugeVec, - pub p99: IntGaugeVec, - pub p99_99: IntGaugeVec, - pub out_of_range: IntGaugeVec, + pub min: LabelGuardedIntGaugeVec<4>, + pub max: LabelGuardedIntGaugeVec<4>, + pub avg: LabelGuardedIntGaugeVec<4>, + pub sum: LabelGuardedIntGaugeVec<4>, + pub cnt: LabelGuardedIntGaugeVec<4>, + pub stddev: LabelGuardedIntGaugeVec<4>, + pub hdr_size: LabelGuardedIntGaugeVec<4>, + pub p50: LabelGuardedIntGaugeVec<4>, + pub p75: LabelGuardedIntGaugeVec<4>, + pub p90: LabelGuardedIntGaugeVec<4>, + pub p95: LabelGuardedIntGaugeVec<4>, + pub p99: LabelGuardedIntGaugeVec<4>, + pub p99_99: LabelGuardedIntGaugeVec<4>, + pub out_of_range: LabelGuardedIntGaugeVec<4>, } #[derive(Debug, Clone)] pub struct ConsumerGroupStats { pub registry: Registry, - pub state_age: IntGaugeVec, + pub state_age: LabelGuardedIntGaugeVec<3>, // todo: (do not know value set) join_state: IntGaugeVec, - pub rebalance_age: IntGaugeVec, - pub rebalance_cnt: IntGaugeVec, + pub rebalance_age: LabelGuardedIntGaugeVec<3>, + pub rebalance_cnt: LabelGuardedIntGaugeVec<3>, // todo: (cannot handle string) rebalance_reason, - pub assignment_size: IntGaugeVec, + pub assignment_size: LabelGuardedIntGaugeVec<3>, } impl ConsumerGroupStats { pub fn new(registry: Registry) -> Self { - let state_age = register_int_gauge_vec_with_registry!( + let state_age = register_guarded_int_gauge_vec_with_registry!( "rdkafka_consumer_group_state_age", "Age of the consumer group state in seconds", &["id", "client_id", "state"], registry ) .unwrap(); - let rebalance_age = register_int_gauge_vec_with_registry!( + let rebalance_age = register_guarded_int_gauge_vec_with_registry!( "rdkafka_consumer_group_rebalance_age", "Age of the last rebalance in seconds", &["id", "client_id", "state"], registry ) .unwrap(); - let rebalance_cnt = register_int_gauge_vec_with_registry!( + let rebalance_cnt = register_guarded_int_gauge_vec_with_registry!( "rdkafka_consumer_group_rebalance_cnt", "Number of rebalances", &["id", "client_id", "state"], registry ) .unwrap(); - let assignment_size = register_int_gauge_vec_with_registry!( + let assignment_size = register_guarded_int_gauge_vec_with_registry!( "rdkafka_consumer_group_assignment_size", "Number of assigned partitions", &["id", "client_id", "state"], @@ -164,16 +167,16 @@ impl ConsumerGroupStats { pub fn report(&self, id: &str, client_id: &str, stats: &ConsumerGroup) { let state = stats.state.as_str(); self.state_age - .with_label_values(&[id, client_id, state]) + .with_guarded_label_values(&[id, client_id, state]) .set(stats.stateage); self.rebalance_age - .with_label_values(&[id, client_id, state]) + .with_guarded_label_values(&[id, client_id, state]) .set(stats.rebalance_age); self.rebalance_cnt - .with_label_values(&[id, client_id, state]) + .with_guarded_label_values(&[id, client_id, state]) .set(stats.rebalance_cnt); self.assignment_size - .with_label_values(&[id, client_id, state]) + .with_guarded_label_values(&[id, client_id, state]) .set(stats.assignment_size as i64); } } @@ -181,98 +184,98 @@ impl ConsumerGroupStats { impl StatsWindow { pub fn new(registry: Registry, path: &str) -> Self { let get_metric_name = |name: &str| format!("rdkafka_{}_{}", path, name); - let min = register_int_gauge_vec_with_registry!( + let min = register_guarded_int_gauge_vec_with_registry!( get_metric_name("min"), "Minimum value", &["id", "client_id", "broker", "topic"], registry ) .unwrap(); - let max = register_int_gauge_vec_with_registry!( + let max = register_guarded_int_gauge_vec_with_registry!( get_metric_name("max"), "Maximum value", &["id", "client_id", "broker", "topic"], registry ) .unwrap(); - let avg = register_int_gauge_vec_with_registry!( + let avg = register_guarded_int_gauge_vec_with_registry!( get_metric_name("avg"), "Average value", &["id", "client_id", "broker", "topic"], registry ) .unwrap(); - let sum = register_int_gauge_vec_with_registry!( + let sum = register_guarded_int_gauge_vec_with_registry!( get_metric_name("sum"), "Sum of values", &["id", "client_id", "broker", "topic"], registry ) .unwrap(); - let cnt = register_int_gauge_vec_with_registry!( + let cnt = register_guarded_int_gauge_vec_with_registry!( get_metric_name("cnt"), "Count of values", &["id", "client_id", "broker", "topic"], registry ) .unwrap(); - let stddev = register_int_gauge_vec_with_registry!( + let stddev = register_guarded_int_gauge_vec_with_registry!( get_metric_name("stddev"), "Standard deviation", &["id", "client_id", "broker", "topic"], registry ) .unwrap(); - let hdr_size = register_int_gauge_vec_with_registry!( + let hdr_size = register_guarded_int_gauge_vec_with_registry!( get_metric_name("hdrsize"), "Size of the histogram header", &["id", "client_id", "broker", "topic"], registry ) .unwrap(); - let p50 = register_int_gauge_vec_with_registry!( + let p50 = register_guarded_int_gauge_vec_with_registry!( get_metric_name("p50"), "50th percentile", &["id", "client_id", "broker", "topic"], registry ) .unwrap(); - let p75 = register_int_gauge_vec_with_registry!( + let p75 = register_guarded_int_gauge_vec_with_registry!( get_metric_name("p75"), "75th percentile", &["id", "client_id", "broker", "topic"], registry ) .unwrap(); - let p90 = register_int_gauge_vec_with_registry!( + let p90 = register_guarded_int_gauge_vec_with_registry!( get_metric_name("p90"), "90th percentile", &["id", "client_id", "broker", "topic"], registry ) .unwrap(); - let p95 = register_int_gauge_vec_with_registry!( + let p95 = register_guarded_int_gauge_vec_with_registry!( get_metric_name("p95"), "95th percentile", &["id", "client_id", "broker", "topic"], registry ) .unwrap(); - let p99 = register_int_gauge_vec_with_registry!( + let p99 = register_guarded_int_gauge_vec_with_registry!( get_metric_name("p99"), "99th percentile", &["id", "client_id", "broker", "topic"], registry ) .unwrap(); - let p99_99 = register_int_gauge_vec_with_registry!( + let p99_99 = register_guarded_int_gauge_vec_with_registry!( get_metric_name("p99_99"), "99.99th percentile", &["id", "client_id", "broker", "topic"], registry ) .unwrap(); - let out_of_range = register_int_gauge_vec_with_registry!( + let out_of_range = register_guarded_int_gauge_vec_with_registry!( get_metric_name("out_of_range"), "Out of range values", &["id", "client_id", "broker", "topic"], @@ -302,26 +305,32 @@ impl StatsWindow { pub fn report(&self, id: &str, client_id: &str, broker: &str, topic: &str, stats: &Window) { let labels = [id, client_id, broker, topic]; - self.min.with_label_values(&labels).set(stats.min); - self.max.with_label_values(&labels).set(stats.max); - self.avg.with_label_values(&labels).set(stats.avg); - self.sum.with_label_values(&labels).set(stats.sum); - self.cnt.with_label_values(&labels).set(stats.cnt); - self.stddev.with_label_values(&labels).set(stats.stddev); - self.hdr_size.with_label_values(&labels).set(stats.hdrsize); - self.p50.with_label_values(&labels).set(stats.p50); - self.p75.with_label_values(&labels).set(stats.p75); - self.p90.with_label_values(&labels).set(stats.p90); - self.p99_99.with_label_values(&labels).set(stats.p99_99); + self.min.with_guarded_label_values(&labels).set(stats.min); + self.max.with_guarded_label_values(&labels).set(stats.max); + self.avg.with_guarded_label_values(&labels).set(stats.avg); + self.sum.with_guarded_label_values(&labels).set(stats.sum); + self.cnt.with_guarded_label_values(&labels).set(stats.cnt); + self.stddev + .with_guarded_label_values(&labels) + .set(stats.stddev); + self.hdr_size + .with_guarded_label_values(&labels) + .set(stats.hdrsize); + self.p50.with_guarded_label_values(&labels).set(stats.p50); + self.p75.with_guarded_label_values(&labels).set(stats.p75); + self.p90.with_guarded_label_values(&labels).set(stats.p90); + self.p99_99 + .with_guarded_label_values(&labels) + .set(stats.p99_99); self.out_of_range - .with_label_values(&labels) + .with_guarded_label_values(&labels) .set(stats.outofrange); } } impl TopicStats { pub fn new(registry: Registry) -> Self { - let metadata_age = register_int_gauge_vec_with_registry!( + let metadata_age = register_guarded_int_gauge_vec_with_registry!( "rdkafka_topic_metadata_age", "Age of the topic metadata in milliseconds", &["id", "client_id", "topic"], @@ -348,7 +357,7 @@ impl TopicStats { fn report_inner(&self, id: &str, client_id: &str, topic: &str, stats: &Topic) { self.metadata_age - .with_label_values(&[id, client_id, topic]) + .with_guarded_label_values(&[id, client_id, topic]) .set(stats.metadata_age); self.batch_size .report(id, client_id, "", topic, &stats.batchsize); @@ -362,212 +371,212 @@ impl TopicStats { pub struct PartitionStats { pub registry: Registry, - pub msgq_cnt: IntGaugeVec, - pub msgq_bytes: GenericGaugeVec, - pub xmit_msgq_cnt: IntGaugeVec, - pub xmit_msgq_bytes: GenericGaugeVec, - pub fetchq_cnt: IntGaugeVec, - pub fetchq_size: GenericGaugeVec, - pub query_offset: IntGaugeVec, - pub next_offset: IntGaugeVec, - pub app_offset: IntGaugeVec, - pub stored_offset: IntGaugeVec, - pub committed_offset: IntGaugeVec, - pub eof_offset: IntGaugeVec, - pub lo_offset: IntGaugeVec, - pub hi_offset: IntGaugeVec, - pub consumer_lag: IntGaugeVec, - pub consumer_lag_store: IntGaugeVec, - pub txmsgs: GenericGaugeVec, - pub txbytes: GenericGaugeVec, - pub rxmsgs: GenericGaugeVec, - pub rxbytes: GenericGaugeVec, - pub msgs: GenericGaugeVec, - pub rx_ver_drops: GenericGaugeVec, - pub msgs_inflight: IntGaugeVec, - pub next_ack_seq: IntGaugeVec, - pub next_err_seq: IntGaugeVec, - pub acked_msgid: GenericGaugeVec, + pub msgq_cnt: LabelGuardedIntGaugeVec<4>, + pub msgq_bytes: LabelGuardedUintGaugeVec<4>, + pub xmit_msgq_cnt: LabelGuardedIntGaugeVec<4>, + pub xmit_msgq_bytes: LabelGuardedUintGaugeVec<4>, + pub fetchq_cnt: LabelGuardedIntGaugeVec<4>, + pub fetchq_size: LabelGuardedUintGaugeVec<4>, + pub query_offset: LabelGuardedIntGaugeVec<4>, + pub next_offset: LabelGuardedIntGaugeVec<4>, + pub app_offset: LabelGuardedIntGaugeVec<4>, + pub stored_offset: LabelGuardedIntGaugeVec<4>, + pub committed_offset: LabelGuardedIntGaugeVec<4>, + pub eof_offset: LabelGuardedIntGaugeVec<4>, + pub lo_offset: LabelGuardedIntGaugeVec<4>, + pub hi_offset: LabelGuardedIntGaugeVec<4>, + pub consumer_lag: LabelGuardedIntGaugeVec<4>, + pub consumer_lag_store: LabelGuardedIntGaugeVec<4>, + pub txmsgs: LabelGuardedUintGaugeVec<4>, + pub txbytes: LabelGuardedUintGaugeVec<4>, + pub rxmsgs: LabelGuardedUintGaugeVec<4>, + pub rxbytes: LabelGuardedUintGaugeVec<4>, + pub msgs: LabelGuardedUintGaugeVec<4>, + pub rx_ver_drops: LabelGuardedUintGaugeVec<4>, + pub msgs_inflight: LabelGuardedIntGaugeVec<4>, + pub next_ack_seq: LabelGuardedIntGaugeVec<4>, + pub next_err_seq: LabelGuardedIntGaugeVec<4>, + pub acked_msgid: LabelGuardedUintGaugeVec<4>, } impl PartitionStats { pub fn new(registry: Registry) -> Self { - let msgq_cnt = register_int_gauge_vec_with_registry!( + let msgq_cnt = register_guarded_int_gauge_vec_with_registry!( "rdkafka_topic_partition_msgq_cnt", "Number of messages in the producer queue", &["id", "client_id", "topic", "partition"], registry ) .unwrap(); - let msgq_bytes = register_uint_gauge_vec_with_registry!( + let msgq_bytes = register_guarded_uint_gauge_vec_with_registry!( "rdkafka_topic_partition_msgq_bytes", "Size of messages in the producer queue", &["id", "client_id", "topic", "partition"], registry ) .unwrap(); - let xmit_msgq_cnt = register_int_gauge_vec_with_registry!( + let xmit_msgq_cnt = register_guarded_int_gauge_vec_with_registry!( "rdkafka_topic_partition_xmit_msgq_cnt", "Number of messages in the transmit queue", &["id", "client_id", "topic", "partition"], registry ) .unwrap(); - let xmit_msgq_bytes = register_uint_gauge_vec_with_registry!( + let xmit_msgq_bytes = register_guarded_uint_gauge_vec_with_registry!( "rdkafka_topic_partition_xmit_msgq_bytes", "Size of messages in the transmit queue", &["id", "client_id", "topic", "partition"], registry ) .unwrap(); - let fetchq_cnt = register_int_gauge_vec_with_registry!( + let fetchq_cnt = register_guarded_int_gauge_vec_with_registry!( "rdkafka_topic_partition_fetchq_cnt", "Number of messages in the fetch queue", &["id", "client_id", "topic", "partition"], registry ) .unwrap(); - let fetchq_size = register_uint_gauge_vec_with_registry!( + let fetchq_size = register_guarded_uint_gauge_vec_with_registry!( "rdkafka_topic_partition_fetchq_size", "Size of messages in the fetch queue", &["id", "client_id", "topic", "partition"], registry ) .unwrap(); - let query_offset = register_int_gauge_vec_with_registry!( + let query_offset = register_guarded_int_gauge_vec_with_registry!( "rdkafka_topic_partition_query_offset", "Current query offset", &["id", "client_id", "topic", "partition"], registry ) .unwrap(); - let next_offset = register_int_gauge_vec_with_registry!( + let next_offset = register_guarded_int_gauge_vec_with_registry!( "rdkafka_topic_partition_next_offset", "Next offset to query", &["id", "client_id", "topic", "partition"], registry ) .unwrap(); - let app_offset = register_int_gauge_vec_with_registry!( + let app_offset = register_guarded_int_gauge_vec_with_registry!( "rdkafka_topic_partition_app_offset", "Last acknowledged offset", &["id", "client_id", "topic", "partition"], registry ) .unwrap(); - let stored_offset = register_int_gauge_vec_with_registry!( + let stored_offset = register_guarded_int_gauge_vec_with_registry!( "rdkafka_topic_partition_stored_offset", "Last stored offset", &["id", "client_id", "topic", "partition"], registry ) .unwrap(); - let committed_offset = register_int_gauge_vec_with_registry!( + let committed_offset = register_guarded_int_gauge_vec_with_registry!( "rdkafka_topic_partition_committed_offset", "Last committed offset", &["id", "client_id", "topic", "partition"], registry ) .unwrap(); - let eof_offset = register_int_gauge_vec_with_registry!( + let eof_offset = register_guarded_int_gauge_vec_with_registry!( "rdkafka_topic_partition_eof_offset", "Last offset in broker log", &["id", "client_id", "topic", "partition"], registry ) .unwrap(); - let lo_offset = register_int_gauge_vec_with_registry!( + let lo_offset = register_guarded_int_gauge_vec_with_registry!( "rdkafka_topic_partition_lo_offset", "Low offset", &["id", "client_id", "topic", "partition"], registry ) .unwrap(); - let hi_offset = register_int_gauge_vec_with_registry!( + let hi_offset = register_guarded_int_gauge_vec_with_registry!( "rdkafka_topic_partition_hi_offset", "High offset", &["id", "client_id", "topic", "partition"], registry ) .unwrap(); - let consumer_lag = register_int_gauge_vec_with_registry!( + let consumer_lag = register_guarded_int_gauge_vec_with_registry!( "rdkafka_topic_partition_consumer_lag", "Consumer lag", &["id", "client_id", "topic", "partition"], registry ) .unwrap(); - let consumer_lag_store = register_int_gauge_vec_with_registry!( + let consumer_lag_store = register_guarded_int_gauge_vec_with_registry!( "rdkafka_topic_partition_consumer_lag_store", "Consumer lag stored", &["id", "client_id", "topic", "partition"], registry ) .unwrap(); - let txmsgs = register_uint_gauge_vec_with_registry!( + let txmsgs = register_guarded_uint_gauge_vec_with_registry!( "rdkafka_topic_partition_txmsgs", "Number of transmitted messages", &["id", "client_id", "topic", "partition"], registry ) .unwrap(); - let txbytes = register_uint_gauge_vec_with_registry!( + let txbytes = register_guarded_uint_gauge_vec_with_registry!( "rdkafka_topic_partition_txbytes", "Number of transmitted bytes", &["id", "client_id", "topic", "partition"], registry ) .unwrap(); - let rxmsgs = register_uint_gauge_vec_with_registry!( + let rxmsgs = register_guarded_uint_gauge_vec_with_registry!( "rdkafka_topic_partition_rxmsgs", "Number of received messages", &["id", "client_id", "topic", "partition"], registry ) .unwrap(); - let rxbytes = register_uint_gauge_vec_with_registry!( + let rxbytes = register_guarded_uint_gauge_vec_with_registry!( "rdkafka_topic_partition_rxbytes", "Number of received bytes", &["id", "client_id", "topic", "partition"], registry ) .unwrap(); - let msgs = register_uint_gauge_vec_with_registry!( + let msgs = register_guarded_uint_gauge_vec_with_registry!( "rdkafka_topic_partition_msgs", "Number of messages in partition", &["id", "client_id", "topic", "partition"], registry ) .unwrap(); - let rx_ver_drops = register_uint_gauge_vec_with_registry!( + let rx_ver_drops = register_guarded_uint_gauge_vec_with_registry!( "rdkafka_topic_partition_rx_ver_drops", "Number of received messages dropped due to version mismatch", &["id", "client_id", "topic", "partition"], registry ) .unwrap(); - let msgs_inflight = register_int_gauge_vec_with_registry!( + let msgs_inflight = register_guarded_int_gauge_vec_with_registry!( "rdkafka_topic_partition_msgs_inflight", "Number of messages in-flight", &["id", "client_id", "topic", "partition"], registry ) .unwrap(); - let next_ack_seq = register_int_gauge_vec_with_registry!( + let next_ack_seq = register_guarded_int_gauge_vec_with_registry!( "rdkafka_topic_partition_next_ack_seq", "Next ack sequence number", &["id", "client_id", "topic", "partition"], registry ) .unwrap(); - let next_err_seq = register_int_gauge_vec_with_registry!( + let next_err_seq = register_guarded_int_gauge_vec_with_registry!( "rdkafka_topic_partition_next_err_seq", "Next error sequence number", &["id", "client_id", "topic", "partition"], registry ) .unwrap(); - let acked_msgid = register_uint_gauge_vec_with_registry!( + let acked_msgid = register_guarded_uint_gauge_vec_with_registry!( "rdkafka_topic_partition_acked_msgid", "Acknowledged message ID", &["id", "client_id", "topic", "partition"], @@ -615,78 +624,88 @@ impl PartitionStats { fn report_inner(&self, id: &str, client_id: &str, topic: &str, stats: &Partition) { let labels = [id, client_id, topic, &stats.partition.to_string()]; - self.msgq_cnt.with_label_values(&labels).set(stats.msgq_cnt); + self.msgq_cnt + .with_guarded_label_values(&labels) + .set(stats.msgq_cnt); self.msgq_bytes - .with_label_values(&labels) + .with_guarded_label_values(&labels) .set(stats.msgq_bytes); self.xmit_msgq_cnt - .with_label_values(&labels) + .with_guarded_label_values(&labels) .set(stats.xmit_msgq_cnt); self.xmit_msgq_bytes - .with_label_values(&labels) + .with_guarded_label_values(&labels) .set(stats.xmit_msgq_bytes); self.fetchq_cnt - .with_label_values(&labels) + .with_guarded_label_values(&labels) .set(stats.fetchq_cnt); self.fetchq_size - .with_label_values(&labels) + .with_guarded_label_values(&labels) .set(stats.fetchq_size); self.query_offset - .with_label_values(&labels) + .with_guarded_label_values(&labels) .set(stats.query_offset); self.next_offset - .with_label_values(&labels) + .with_guarded_label_values(&labels) .set(stats.next_offset); self.app_offset - .with_label_values(&labels) + .with_guarded_label_values(&labels) .set(stats.app_offset); self.stored_offset - .with_label_values(&labels) + .with_guarded_label_values(&labels) .set(stats.stored_offset); self.committed_offset - .with_label_values(&labels) + .with_guarded_label_values(&labels) .set(stats.committed_offset); self.eof_offset - .with_label_values(&labels) + .with_guarded_label_values(&labels) .set(stats.eof_offset); self.lo_offset - .with_label_values(&labels) + .with_guarded_label_values(&labels) .set(stats.lo_offset); self.hi_offset - .with_label_values(&labels) + .with_guarded_label_values(&labels) .set(stats.hi_offset); self.consumer_lag - .with_label_values(&labels) + .with_guarded_label_values(&labels) .set(stats.consumer_lag); self.consumer_lag_store - .with_label_values(&labels) + .with_guarded_label_values(&labels) .set(stats.consumer_lag_stored); - self.txmsgs.with_label_values(&labels).set(stats.txmsgs); - self.txbytes.with_label_values(&labels).set(stats.txbytes); - self.rxmsgs.with_label_values(&labels).set(stats.rxmsgs); - self.rxbytes.with_label_values(&labels).set(stats.rxbytes); - self.msgs.with_label_values(&labels).set(stats.msgs); + self.txmsgs + .with_guarded_label_values(&labels) + .set(stats.txmsgs); + self.txbytes + .with_guarded_label_values(&labels) + .set(stats.txbytes); + self.rxmsgs + .with_guarded_label_values(&labels) + .set(stats.rxmsgs); + self.rxbytes + .with_guarded_label_values(&labels) + .set(stats.rxbytes); + self.msgs.with_guarded_label_values(&labels).set(stats.msgs); self.rx_ver_drops - .with_label_values(&labels) + .with_guarded_label_values(&labels) .set(stats.rx_ver_drops); self.msgs_inflight - .with_label_values(&labels) + .with_guarded_label_values(&labels) .set(stats.msgs_inflight); self.next_ack_seq - .with_label_values(&labels) + .with_guarded_label_values(&labels) .set(stats.next_ack_seq); self.next_err_seq - .with_label_values(&labels) + .with_guarded_label_values(&labels) .set(stats.next_err_seq); self.acked_msgid - .with_label_values(&labels) + .with_guarded_label_values(&labels) .set(stats.acked_msgid); } } impl RdKafkaStats { pub fn new(registry: Registry) -> Self { - let ts = register_int_gauge_vec_with_registry!( + let ts = register_guarded_int_gauge_vec_with_registry!( "rdkafka_top_ts", "librdkafka's internal monotonic clock (microseconds)", // we cannot tell whether it is for consumer or producer, @@ -695,119 +714,119 @@ impl RdKafkaStats { registry ) .unwrap(); - let time = register_int_gauge_vec_with_registry!( + let time = register_guarded_int_gauge_vec_with_registry!( "rdkafka_top_time", "Wall clock time in seconds since the epoch", &["id", "client_id"], registry ) .unwrap(); - let age = register_int_gauge_vec_with_registry!( + let age = register_guarded_int_gauge_vec_with_registry!( "rdkafka_top_age", "Age of the topic metadata in milliseconds", &["id", "client_id"], registry ) .unwrap(); - let replyq = register_int_gauge_vec_with_registry!( + let replyq = register_guarded_int_gauge_vec_with_registry!( "rdkafka_top_replyq", "Number of replies waiting to be served", &["id", "client_id"], registry ) .unwrap(); - let msg_cnt = register_uint_gauge_vec_with_registry!( + let msg_cnt = register_guarded_uint_gauge_vec_with_registry!( "rdkafka_top_msg_cnt", "Number of messages in all topics", &["id", "client_id"], registry ) .unwrap(); - let msg_size = register_uint_gauge_vec_with_registry!( + let msg_size = register_guarded_uint_gauge_vec_with_registry!( "rdkafka_top_msg_size", "Size of messages in all topics", &["id", "client_id"], registry ) .unwrap(); - let msg_max = register_uint_gauge_vec_with_registry!( + let msg_max = register_guarded_uint_gauge_vec_with_registry!( "rdkafka_top_msg_max", "Maximum message size in all topics", &["id", "client_id"], registry ) .unwrap(); - let msg_size_max = register_uint_gauge_vec_with_registry!( + let msg_size_max = register_guarded_uint_gauge_vec_with_registry!( "rdkafka_top_msg_size_max", "Maximum message size in all topics", &["id", "client_id"], registry ) .unwrap(); - let tx = register_int_gauge_vec_with_registry!( + let tx = register_guarded_int_gauge_vec_with_registry!( "rdkafka_top_tx", "Number of transmitted messages", &["id", "client_id"], registry ) .unwrap(); - let tx_bytes = register_int_gauge_vec_with_registry!( + let tx_bytes = register_guarded_int_gauge_vec_with_registry!( "rdkafka_top_tx_bytes", "Number of transmitted bytes", &["id", "client_id"], registry ) .unwrap(); - let rx = register_int_gauge_vec_with_registry!( + let rx = register_guarded_int_gauge_vec_with_registry!( "rdkafka_top_rx", "Number of received messages", &["id", "client_id"], registry ) .unwrap(); - let rx_bytes = register_int_gauge_vec_with_registry!( + let rx_bytes = register_guarded_int_gauge_vec_with_registry!( "rdkafka_top_rx_bytes", "Number of received bytes", &["id", "client_id"], registry ) .unwrap(); - let tx_msgs = register_int_gauge_vec_with_registry!( + let tx_msgs = register_guarded_int_gauge_vec_with_registry!( "rdkafka_top_tx_msgs", "Number of transmitted messages", &["id", "client_id"], registry ) .unwrap(); - let tx_msgs_bytes = register_int_gauge_vec_with_registry!( + let tx_msgs_bytes = register_guarded_int_gauge_vec_with_registry!( "rdkafka_top_tx_msgs_bytes", "Number of transmitted bytes", &["id", "client_id"], registry ) .unwrap(); - let rx_msgs = register_int_gauge_vec_with_registry!( + let rx_msgs = register_guarded_int_gauge_vec_with_registry!( "rdkafka_top_rx_msgs", "Number of received messages", &["id", "client_id"], registry ) .unwrap(); - let rx_msgs_bytes = register_int_gauge_vec_with_registry!( + let rx_msgs_bytes = register_guarded_int_gauge_vec_with_registry!( "rdkafka_top_rx_msgs_bytes", "Number of received bytes", &["id", "client_id"], registry ) .unwrap(); - let simple_cnt = register_int_gauge_vec_with_registry!( + let simple_cnt = register_guarded_int_gauge_vec_with_registry!( "rdkafka_top_simple_cnt", "Number of simple consumer queues", &["id", "client_id"], registry ) .unwrap(); - let metadata_cache_cnt = register_int_gauge_vec_with_registry!( + let metadata_cache_cnt = register_guarded_int_gauge_vec_with_registry!( "rdkafka_top_metadata_cache_cnt", "Number of entries in the metadata cache", &["id", "client_id"], @@ -846,51 +865,59 @@ impl RdKafkaStats { pub fn report(&self, id: &str, stats: &Statistics) { let client_id = stats.name.as_str(); - self.ts.with_label_values(&[id, client_id]).set(stats.ts); + self.ts + .with_guarded_label_values(&[id, client_id]) + .set(stats.ts); self.time - .with_label_values(&[id, client_id]) + .with_guarded_label_values(&[id, client_id]) .set(stats.time); - self.age.with_label_values(&[id, client_id]).set(stats.age); + self.age + .with_guarded_label_values(&[id, client_id]) + .set(stats.age); self.replyq - .with_label_values(&[id, client_id]) + .with_guarded_label_values(&[id, client_id]) .set(stats.replyq); self.msg_cnt - .with_label_values(&[id, client_id]) + .with_guarded_label_values(&[id, client_id]) .set(stats.msg_cnt); self.msg_size - .with_label_values(&[id, client_id]) + .with_guarded_label_values(&[id, client_id]) .set(stats.msg_size); self.msg_max - .with_label_values(&[id, client_id]) + .with_guarded_label_values(&[id, client_id]) .set(stats.msg_max); self.msg_size_max - .with_label_values(&[id, client_id]) + .with_guarded_label_values(&[id, client_id]) .set(stats.msg_size_max); - self.tx.with_label_values(&[id, client_id]).set(stats.tx); + self.tx + .with_guarded_label_values(&[id, client_id]) + .set(stats.tx); self.tx_bytes - .with_label_values(&[id, client_id]) + .with_guarded_label_values(&[id, client_id]) .set(stats.tx_bytes); - self.rx.with_label_values(&[id, client_id]).set(stats.rx); + self.rx + .with_guarded_label_values(&[id, client_id]) + .set(stats.rx); self.rx_bytes - .with_label_values(&[id, client_id]) + .with_guarded_label_values(&[id, client_id]) .set(stats.rx_bytes); self.tx_msgs - .with_label_values(&[id, client_id]) + .with_guarded_label_values(&[id, client_id]) .set(stats.txmsgs); self.tx_msgs_bytes - .with_label_values(&[id, client_id]) + .with_guarded_label_values(&[id, client_id]) .set(stats.txmsg_bytes); self.rx_msgs - .with_label_values(&[id, client_id]) + .with_guarded_label_values(&[id, client_id]) .set(stats.rxmsgs); self.rx_msgs_bytes - .with_label_values(&[id, client_id]) + .with_guarded_label_values(&[id, client_id]) .set(stats.rxmsg_bytes); self.simple_cnt - .with_label_values(&[id, client_id]) + .with_guarded_label_values(&[id, client_id]) .set(stats.simple_cnt); self.metadata_cache_cnt - .with_label_values(&[id, client_id]) + .with_guarded_label_values(&[id, client_id]) .set(stats.metadata_cache_cnt); self.broker_stats.report(id, client_id, stats); @@ -903,161 +930,161 @@ impl RdKafkaStats { impl BrokerStats { pub fn new(registry: Registry) -> Self { - let state_age = register_int_gauge_vec_with_registry!( + let state_age = register_guarded_int_gauge_vec_with_registry!( "rdkafka_broker_state_age", "Age of the broker state in seconds", &["id", "client_id", "broker", "state"], registry ) .unwrap(); - let outbuf_cnt = register_int_gauge_vec_with_registry!( + let outbuf_cnt = register_guarded_int_gauge_vec_with_registry!( "rdkafka_broker_outbuf_cnt", "Number of messages waiting to be sent to broker", &["id", "client_id", "broker", "state"], registry ) .unwrap(); - let outbuf_msg_cnt = register_int_gauge_vec_with_registry!( + let outbuf_msg_cnt = register_guarded_int_gauge_vec_with_registry!( "rdkafka_broker_outbuf_msg_cnt", "Number of messages waiting to be sent to broker", &["id", "client_id", "broker", "state"], registry ) .unwrap(); - let waitresp_cnt = register_int_gauge_vec_with_registry!( + let waitresp_cnt = register_guarded_int_gauge_vec_with_registry!( "rdkafka_broker_waitresp_cnt", "Number of requests waiting for response", &["id", "client_id", "broker", "state"], registry ) .unwrap(); - let waitresp_msg_cnt = register_int_gauge_vec_with_registry!( + let waitresp_msg_cnt = register_guarded_int_gauge_vec_with_registry!( "rdkafka_broker_waitresp_msg_cnt", "Number of messages waiting for response", &["id", "client_id", "broker", "state"], registry ) .unwrap(); - let tx = register_uint_gauge_vec_with_registry!( + let tx = register_guarded_uint_gauge_vec_with_registry!( "rdkafka_broker_tx", "Number of transmitted messages", &["id", "client_id", "broker", "state"], registry ) .unwrap(); - let tx_bytes = register_uint_gauge_vec_with_registry!( + let tx_bytes = register_guarded_uint_gauge_vec_with_registry!( "rdkafka_broker_tx_bytes", "Number of transmitted bytes", &["id", "client_id", "broker", "state"], registry ) .unwrap(); - let tx_errs = register_uint_gauge_vec_with_registry!( + let tx_errs = register_guarded_uint_gauge_vec_with_registry!( "rdkafka_broker_tx_errs", "Number of failed transmitted messages", &["id", "client_id", "broker", "state"], registry ) .unwrap(); - let tx_retries = register_uint_gauge_vec_with_registry!( + let tx_retries = register_guarded_uint_gauge_vec_with_registry!( "rdkafka_broker_tx_retries", "Number of message retries", &["id", "client_id", "broker", "state"], registry ) .unwrap(); - let tx_idle = register_int_gauge_vec_with_registry!( + let tx_idle = register_guarded_int_gauge_vec_with_registry!( "rdkafka_broker_tx_idle", "Number of idle transmit connections", &["id", "client_id", "broker", "state"], registry ) .unwrap(); - let req_timeouts = register_uint_gauge_vec_with_registry!( + let req_timeouts = register_guarded_uint_gauge_vec_with_registry!( "rdkafka_broker_req_timeouts", "Number of request timeouts", &["id", "client_id", "broker", "state"], registry ) .unwrap(); - let rx = register_uint_gauge_vec_with_registry!( + let rx = register_guarded_uint_gauge_vec_with_registry!( "rdkafka_broker_rx", "Number of received messages", &["id", "client_id", "broker", "state"], registry ) .unwrap(); - let rx_bytes = register_uint_gauge_vec_with_registry!( + let rx_bytes = register_guarded_uint_gauge_vec_with_registry!( "rdkafka_broker_rx_bytes", "Number of received bytes", &["id", "client_id", "broker", "state"], registry ) .unwrap(); - let rx_errs = register_uint_gauge_vec_with_registry!( + let rx_errs = register_guarded_uint_gauge_vec_with_registry!( "rdkafka_broker_rx_errs", "Number of failed received messages", &["id", "client_id", "broker", "state"], registry ) .unwrap(); - let rx_corriderrs = register_uint_gauge_vec_with_registry!( + let rx_corriderrs = register_guarded_uint_gauge_vec_with_registry!( "rdkafka_broker_rx_corriderrs", "Number of received messages with invalid correlation id", &["id", "client_id", "broker", "state"], registry ) .unwrap(); - let rx_partial = register_uint_gauge_vec_with_registry!( + let rx_partial = register_guarded_uint_gauge_vec_with_registry!( "rdkafka_broker_rx_partial", "Number of partial messages received", &["id", "client_id", "broker", "state"], registry ) .unwrap(); - let rx_idle = register_int_gauge_vec_with_registry!( + let rx_idle = register_guarded_int_gauge_vec_with_registry!( "rdkafka_broker_rx_idle", "Number of idle receive connections", &["id", "client_id", "broker", "state"], registry ) .unwrap(); - let req = register_int_gauge_vec_with_registry!( + let req = register_guarded_int_gauge_vec_with_registry!( "rdkafka_broker_req", "Number of requests in flight", &["id", "client_id", "broker", "state", "type"], registry ) .unwrap(); - let zbuf_grow = register_uint_gauge_vec_with_registry!( + let zbuf_grow = register_guarded_uint_gauge_vec_with_registry!( "rdkafka_broker_zbuf_grow", "Number of times the broker's output buffer has been reallocated", &["id", "client_id", "broker", "state"], registry ) .unwrap(); - let buf_grow = register_uint_gauge_vec_with_registry!( + let buf_grow = register_guarded_uint_gauge_vec_with_registry!( "rdkafka_broker_buf_grow", "Number of times the broker's input buffer has been reallocated", &["id", "client_id", "broker", "state"], registry ) .unwrap(); - let wakeups = register_uint_gauge_vec_with_registry!( + let wakeups = register_guarded_uint_gauge_vec_with_registry!( "rdkafka_broker_wakeups", "Number of wakeups", &["id", "client_id", "broker", "state"], registry ) .unwrap(); - let connects = register_int_gauge_vec_with_registry!( + let connects = register_guarded_int_gauge_vec_with_registry!( "rdkafka_broker_connects", "Number of connection attempts", &["id", "client_id", "broker", "state"], registry ) .unwrap(); - let disconnects = register_int_gauge_vec_with_registry!( + let disconnects = register_guarded_int_gauge_vec_with_registry!( "rdkafka_broker_disconnects", "Number of disconnects", &["id", "client_id", "broker", "state"], @@ -1113,57 +1140,75 @@ impl BrokerStats { let labels = [id, client_id, broker, state]; self.state_age - .with_label_values(&labels) + .with_guarded_label_values(&labels) .set(stats.stateage); self.outbuf_cnt - .with_label_values(&labels) + .with_guarded_label_values(&labels) .set(stats.outbuf_cnt); self.outbuf_msg_cnt - .with_label_values(&labels) + .with_guarded_label_values(&labels) .set(stats.outbuf_msg_cnt); self.waitresp_cnt - .with_label_values(&labels) + .with_guarded_label_values(&labels) .set(stats.waitresp_cnt); self.waitresp_msg_cnt - .with_label_values(&labels) + .with_guarded_label_values(&labels) .set(stats.waitresp_msg_cnt); - self.tx.with_label_values(&labels).set(stats.tx); - self.tx_bytes.with_label_values(&labels).set(stats.txbytes); - self.tx_errs.with_label_values(&labels).set(stats.txerrs); + self.tx.with_guarded_label_values(&labels).set(stats.tx); + self.tx_bytes + .with_guarded_label_values(&labels) + .set(stats.txbytes); + self.tx_errs + .with_guarded_label_values(&labels) + .set(stats.txerrs); self.tx_retries - .with_label_values(&labels) + .with_guarded_label_values(&labels) .set(stats.txretries); - self.tx_idle.with_label_values(&labels).set(stats.txidle); + self.tx_idle + .with_guarded_label_values(&labels) + .set(stats.txidle); self.req_timeouts - .with_label_values(&labels) + .with_guarded_label_values(&labels) .set(stats.req_timeouts); - self.rx.with_label_values(&labels).set(stats.rx); - self.rx_bytes.with_label_values(&labels).set(stats.rxbytes); - self.rx_errs.with_label_values(&labels).set(stats.rxerrs); + self.rx.with_guarded_label_values(&labels).set(stats.rx); + self.rx_bytes + .with_guarded_label_values(&labels) + .set(stats.rxbytes); + self.rx_errs + .with_guarded_label_values(&labels) + .set(stats.rxerrs); self.rx_corriderrs - .with_label_values(&labels) + .with_guarded_label_values(&labels) .set(stats.rxcorriderrs); self.rx_partial - .with_label_values(&labels) + .with_guarded_label_values(&labels) .set(stats.rxpartial); - self.rx_idle.with_label_values(&labels).set(stats.rxidle); + self.rx_idle + .with_guarded_label_values(&labels) + .set(stats.rxidle); for (req_type, req_cnt) in &stats.req { self.req - .with_label_values(&[id, client_id, broker, state, req_type]) + .with_guarded_label_values(&[id, client_id, broker, state, req_type]) .set(*req_cnt); } self.zbuf_grow - .with_label_values(&labels) + .with_guarded_label_values(&labels) .set(stats.zbuf_grow); - self.buf_grow.with_label_values(&labels).set(stats.buf_grow); + self.buf_grow + .with_guarded_label_values(&labels) + .set(stats.buf_grow); if let Some(wakeups) = stats.wakeups { - self.wakeups.with_label_values(&labels).set(wakeups); + self.wakeups.with_guarded_label_values(&labels).set(wakeups); } if let Some(connects) = stats.connects { - self.connects.with_label_values(&labels).set(connects); + self.connects + .with_guarded_label_values(&labels) + .set(connects); } if let Some(disconnects) = stats.disconnects { - self.disconnects.with_label_values(&labels).set(disconnects); + self.disconnects + .with_guarded_label_values(&labels) + .set(disconnects); } if let Some(int_latency) = &stats.int_latency { self.int_latency diff --git a/src/connector/src/source/nexmark/source/reader.rs b/src/connector/src/source/nexmark/source/reader.rs index ebcbc0b0aaf32..aea85c5c551cf 100644 --- a/src/connector/src/source/nexmark/source/reader.rs +++ b/src/connector/src/source/nexmark/source/reader.rs @@ -115,31 +115,30 @@ impl SplitReader for NexmarkSplitReader { let split_id = self.split_id.clone(); let metrics = self.source_ctx.metrics.clone(); + let partition_input_count_metric = + metrics.partition_input_count.with_guarded_label_values(&[ + &actor_id, + &source_id, + &split_id, + &source_name, + &fragment_id, + ]); + let partition_input_bytes_metric = + metrics.partition_input_bytes.with_guarded_label_values(&[ + &actor_id, + &source_id, + &split_id, + &source_name, + &fragment_id, + ]); + // Will buffer at most 4 event chunks. const BUFFER_SIZE: usize = 4; spawn_data_generation_stream( self.into_native_stream() .inspect_ok(move |chunk: &StreamChunk| { - metrics - .partition_input_count - .with_guarded_label_values(&[ - &actor_id, - &source_id, - &split_id, - &source_name, - &fragment_id, - ]) - .inc_by(chunk.cardinality() as u64); - metrics - .partition_input_bytes - .with_guarded_label_values(&[ - &actor_id, - &source_id, - &split_id, - &source_name, - &fragment_id, - ]) - .inc_by(chunk.estimated_size() as u64); + partition_input_count_metric.inc_by(chunk.cardinality() as u64); + partition_input_bytes_metric.inc_by(chunk.estimated_size() as u64); }), BUFFER_SIZE, ) diff --git a/src/connector/src/source/pulsar/mod.rs b/src/connector/src/source/pulsar/mod.rs index 5d6d111b13bff..ffbc3be495bf9 100644 --- a/src/connector/src/source/pulsar/mod.rs +++ b/src/connector/src/source/pulsar/mod.rs @@ -74,6 +74,16 @@ pub struct PulsarProperties { #[serde(rename = "iceberg.bucket", default)] pub iceberg_bucket: Option, + /// Specify a custom consumer group id prefix for the source. + /// Defaults to `rw-consumer`. + /// + /// Notes: + /// - Each job (materialized view) will have multiple subscriptions and + /// contains a generated suffix in the subscription name. + /// The subscription name will be `{subscription_name_prefix}-{fragment_id}-{actor_id}`. + #[serde(rename = "subscription.name.prefix")] + pub subscription_name_prefix: Option, + #[serde(flatten)] pub unknown_fields: HashMap, } diff --git a/src/connector/src/source/pulsar/source/reader.rs b/src/connector/src/source/pulsar/source/reader.rs index 212c459388b25..20f6872474e88 100644 --- a/src/connector/src/source/pulsar/source/reader.rs +++ b/src/connector/src/source/pulsar/source/reader.rs @@ -42,6 +42,8 @@ use crate::source::{ SplitMetaData, SplitReader, }; +const PULSAR_DEFAULT_SUBSCRIPTION_PREFIX: &str = "rw-consumer"; + pub enum PulsarSplitReader { Broker(PulsarBrokerReader), Iceberg(PulsarIcebergReader), @@ -174,8 +176,12 @@ impl SplitReader for PulsarBrokerReader { .with_topic(&topic) .with_subscription_type(SubType::Exclusive) .with_subscription(format!( - "rw-consumer-{}-{}", - source_ctx.fragment_id, source_ctx.actor_id + "{}-{}-{}", + props + .subscription_name_prefix + .unwrap_or(PULSAR_DEFAULT_SUBSCRIPTION_PREFIX.to_string()), + source_ctx.fragment_id, + source_ctx.actor_id )); let builder = match split.start_offset.clone() { diff --git a/src/connector/src/source/reader/reader.rs b/src/connector/src/source/reader/reader.rs index 95764792c0025..b3a1cb5380d8c 100644 --- a/src/connector/src/source/reader/reader.rs +++ b/src/connector/src/source/reader/reader.rs @@ -279,7 +279,6 @@ async fn build_opendal_fs_list_stream( { yield res } else { - // Currrntly due to the lack of prefix list, we just skip the unmatched files. continue; } } diff --git a/src/connector/src/test_data/any-schema.pb b/src/connector/src/test_data/any-schema.pb deleted file mode 100644 index 977f64cec3775..0000000000000 --- a/src/connector/src/test_data/any-schema.pb +++ /dev/null @@ -1,30 +0,0 @@ - -ä -google/protobuf/any.protogoogle.protobuf"6 -Any -type_url ( RtypeUrl -value ( RvalueBv -com.google.protobufBAnyProtoPZ,google.golang.org/protobuf/types/known/anypb¢GPBªGoogle.Protobuf.WellKnownTypesbproto3 -á -any-schema.prototestgoogle/protobuf/any.proto"L -TestAny -id (Rid1 - any_value ( 2.google.protobuf.AnyRanyValue"# - StringValue -value ( Rvalue"" - -Int32Value -value (Rvalue"v -AnyValue4 - any_value_1 ( 2.google.protobuf.AnyR anyValue14 - any_value_2 ( 2.google.protobuf.AnyR anyValue2"@ -StringInt32Value -first ( Rfirst -second (Rsecond"Ž -StringStringInt32Value -first ( Rfirst. -second ( 2.test.StringInt32ValueRsecond. -third ( 2.test.Float32StringValueRthird"B -Float32StringValue -first (Rfirst -second ( Rsecondbproto3 \ No newline at end of file diff --git a/src/connector/src/test_data/complex-schema b/src/connector/src/test_data/complex-schema deleted file mode 100644 index ff7cd64120883..0000000000000 Binary files a/src/connector/src/test_data/complex-schema and /dev/null differ diff --git a/src/connector/src/test_data/proto_recursive/recursive.proto b/src/connector/src/test_data/proto_recursive/recursive.proto deleted file mode 100644 index 93f177055788c..0000000000000 --- a/src/connector/src/test_data/proto_recursive/recursive.proto +++ /dev/null @@ -1,95 +0,0 @@ -syntax = "proto3"; - -import "google/protobuf/timestamp.proto"; -import "google/protobuf/duration.proto"; -import "google/protobuf/any.proto"; -import "google/protobuf/wrappers.proto"; - -package recursive; - -message ComplexRecursiveMessage { - string node_name = 1; - int32 node_id = 2; - - message Attributes { - string key = 1; - string value = 2; - } - - repeated Attributes attributes = 3; - - message Parent { - string parent_name = 1; - int32 parent_id = 2; - repeated ComplexRecursiveMessage siblings = 3; - } - - Parent parent = 4; - repeated ComplexRecursiveMessage children = 5; -} - -message AllTypes { - // standard types - double double_field = 1; - float float_field = 2; - int32 int32_field = 3; - int64 int64_field = 4; - uint32 uint32_field = 5; - uint64 uint64_field = 6; - sint32 sint32_field = 7; - sint64 sint64_field = 8; - fixed32 fixed32_field = 9; - fixed64 fixed64_field = 10; - sfixed32 sfixed32_field = 11; - sfixed64 sfixed64_field = 12; - bool bool_field = 13; - string string_field = 14; - - bytes bytes_field = 15; - - // enum - enum EnumType { - DEFAULT = 0; - OPTION1 = 1; - OPTION2 = 2; - } - EnumType enum_field = 16; - - // nested message - message NestedMessage { - int32 id = 1; - string name = 2; - } - NestedMessage nested_message_field = 17; - - // repeated field - repeated int32 repeated_int_field = 18; - - // oneof field - oneof example_oneof { - string oneof_string = 19; - int32 oneof_int32 = 20; - EnumType oneof_enum = 21; - } - - // // map field - // map map_field = 22; - - // timestamp - google.protobuf.Timestamp timestamp_field = 23; - - // duration - google.protobuf.Duration duration_field = 24; - - // any - google.protobuf.Any any_field = 25; - - // -- Unsupported - // // struct - // import "google/protobuf/struct.proto"; - // google.protobuf.Struct struct_field = 26; - - // wrapper types - google.protobuf.Int32Value int32_value_field = 27; - google.protobuf.StringValue string_value_field = 28; -} \ No newline at end of file diff --git a/src/connector/src/test_data/simple-schema b/src/connector/src/test_data/simple-schema deleted file mode 100644 index 97686ce9c478d..0000000000000 --- a/src/connector/src/test_data/simple-schema +++ /dev/null @@ -1,11 +0,0 @@ - -² -simple-schema.prototest"Œ - -TestRecord -id (Rid -address ( Raddress -city ( Rcity -zipcode (Rzipcode -rate (Rrate -date ( Rdatebproto3 \ No newline at end of file diff --git a/src/connector/with_options_sink.yaml b/src/connector/with_options_sink.yaml index e8a8efff68801..1af3435eaea24 100644 --- a/src/connector/with_options_sink.yaml +++ b/src/connector/with_options_sink.yaml @@ -115,7 +115,7 @@ ClickHouseConfig: field_type: u64 comments: Commit every n(>0) checkpoints, default is 10. required: false - default: DEFAULT_COMMIT_CHECKPOINT_INTERVAL + default: DEFAULT_COMMIT_CHECKPOINT_INTERVAL_WITH_SINK_DECOUPLE - name: r#type field_type: String required: true @@ -143,7 +143,7 @@ DeltaLakeConfig: field_type: u64 comments: Commit every n(>0) checkpoints, default is 10. required: false - default: DEFAULT_COMMIT_CHECKPOINT_INTERVAL + default: DEFAULT_COMMIT_CHECKPOINT_INTERVAL_WITH_SINK_DECOUPLE - name: r#type field_type: String required: true @@ -339,7 +339,7 @@ IcebergConfig: field_type: u64 comments: Commit every n(>0) checkpoints, default is 10. required: false - default: DEFAULT_COMMIT_CHECKPOINT_INTERVAL + default: DEFAULT_COMMIT_CHECKPOINT_INTERVAL_WITH_SINK_DECOUPLE - name: create_table_if_not_exists field_type: bool required: false @@ -1021,7 +1021,7 @@ StarrocksConfig: also, in this time, the `sink_decouple` option should be enabled as well. Defaults to 10 if commit_checkpoint_interval <= 0 required: false - default: DEFAULT_COMMIT_CHECKPOINT_INTERVAL + default: DEFAULT_COMMIT_CHECKPOINT_INTERVAL_WITH_SINK_DECOUPLE - name: starrocks.partial_update field_type: String comments: Enable partial update diff --git a/src/connector/with_options_source.yaml b/src/connector/with_options_source.yaml index a6a19e80c89a3..92208f5d83257 100644 --- a/src/connector/with_options_source.yaml +++ b/src/connector/with_options_source.yaml @@ -1000,6 +1000,17 @@ PulsarProperties: field_type: String required: false default: Default::default + - name: subscription.name.prefix + field_type: String + comments: |- + Specify a custom consumer group id prefix for the source. + Defaults to `rw-consumer`. + + Notes: + - Each job (materialized view) will have multiple subscriptions and + contains a generated suffix in the subscription name. + The subscription name will be `{subscription_name_prefix}-{fragment_id}-{actor_id}`. + required: false S3Properties: fields: - name: s3.region_name diff --git a/src/ctl/src/cmd_impl/hummock/compaction_group.rs b/src/ctl/src/cmd_impl/hummock/compaction_group.rs index a0395d236d504..c41b4c6e25b9e 100644 --- a/src/ctl/src/cmd_impl/hummock/compaction_group.rs +++ b/src/ctl/src/cmd_impl/hummock/compaction_group.rs @@ -131,10 +131,11 @@ pub async fn split_compaction_group( context: &CtlContext, group_id: CompactionGroupId, table_ids_to_new_group: &[StateTableId], + partition_vnode_count: u32, ) -> anyhow::Result<()> { let meta_client = context.meta_client().await?; let new_group_id = meta_client - .split_compaction_group(group_id, table_ids_to_new_group) + .split_compaction_group(group_id, table_ids_to_new_group, partition_vnode_count) .await?; println!( "Succeed: split compaction group {}. tables {:#?} are moved to new group {}.", @@ -284,3 +285,15 @@ pub async fn cancel_compact_task(context: &CtlContext, task_id: u64) -> anyhow:: Ok(()) } + +pub async fn merge_compaction_group( + context: &CtlContext, + left_group_id: CompactionGroupId, + right_group_id: CompactionGroupId, +) -> anyhow::Result<()> { + let meta_client = context.meta_client().await?; + meta_client + .merge_compaction_group(left_group_id, right_group_id) + .await?; + Ok(()) +} diff --git a/src/ctl/src/lib.rs b/src/ctl/src/lib.rs index 34c5be6ace21b..b35b8d1e42cb2 100644 --- a/src/ctl/src/lib.rs +++ b/src/ctl/src/lib.rs @@ -276,6 +276,8 @@ enum HummockCommands { compaction_group_id: u64, #[clap(long, value_delimiter = ',')] table_ids: Vec, + #[clap(long, default_value_t = 0)] + partition_vnode_count: u32, }, /// Pause version checkpoint, which subsequently pauses GC of delta log and SST object. PauseVersionCheckpoint, @@ -340,6 +342,12 @@ enum HummockCommands { #[clap(long)] record_hybrid_fetch_threshold_ms: Option, }, + MergeCompactionGroup { + #[clap(long)] + left_group_id: u64, + #[clap(long)] + right_group_id: u64, + }, } #[derive(Subcommand)] @@ -711,9 +719,15 @@ async fn start_impl(opts: CliOpts, context: &CtlContext) -> Result<()> { Commands::Hummock(HummockCommands::SplitCompactionGroup { compaction_group_id, table_ids, + partition_vnode_count, }) => { - cmd_impl::hummock::split_compaction_group(context, compaction_group_id, &table_ids) - .await?; + cmd_impl::hummock::split_compaction_group( + context, + compaction_group_id, + &table_ids, + partition_vnode_count, + ) + .await?; } Commands::Hummock(HummockCommands::PauseVersionCheckpoint) => { cmd_impl::hummock::pause_version_checkpoint(context).await?; @@ -790,6 +804,13 @@ async fn start_impl(opts: CliOpts, context: &CtlContext) -> Result<()> { ) .await? } + Commands::Hummock(HummockCommands::MergeCompactionGroup { + left_group_id, + right_group_id, + }) => { + cmd_impl::hummock::merge_compaction_group(context, left_group_id, right_group_id) + .await? + } Commands::Table(TableCommands::Scan { mv_name, data_dir, diff --git a/src/dml/src/lib.rs b/src/dml/src/lib.rs index a15a4dfb3fba9..f0034a630a823 100644 --- a/src/dml/src/lib.rs +++ b/src/dml/src/lib.rs @@ -14,7 +14,6 @@ #![allow(clippy::derive_partial_eq_without_eq)] #![feature(trait_alias)] -#![feature(lint_reasons)] #![feature(coroutines)] #![feature(hash_extract_if)] #![feature(type_alias_impl_trait)] diff --git a/src/error/src/lib.rs b/src/error/src/lib.rs index 4dde816be458b..010308bf95cc8 100644 --- a/src/error/src/lib.rs +++ b/src/error/src/lib.rs @@ -21,7 +21,6 @@ //! access if `risingwave_common` is already a dependency. #![feature(error_generic_member_access)] -#![feature(lint_reasons)] #![feature(register_tool)] #![register_tool(rw)] #![feature(trait_alias)] diff --git a/src/expr/core/src/lib.rs b/src/expr/core/src/lib.rs index d45d4ca11f80a..73e3b6a6ed2e3 100644 --- a/src/expr/core/src/lib.rs +++ b/src/expr/core/src/lib.rs @@ -13,7 +13,6 @@ // limitations under the License. #![feature(let_chains)] -#![feature(lint_reasons)] #![feature(iterator_try_collect)] #![feature(coroutines)] #![feature(never_type)] diff --git a/src/expr/impl/Cargo.toml b/src/expr/impl/Cargo.toml index e493037c200b7..c0e506889ef77 100644 --- a/src/expr/impl/Cargo.toml +++ b/src/expr/impl/Cargo.toml @@ -51,7 +51,7 @@ itertools = { workspace = true } jsonbb = { workspace = true } linkme = { version = "0.3", features = ["used_linker"] } md5 = "0.7" -moka = { version = "0.12", features = ["sync"] } +moka = { version = "0.12.0", features = ["sync"] } num-traits = "0.2" openssl = "0.10" regex = "1" diff --git a/src/expr/impl/src/lib.rs b/src/expr/impl/src/lib.rs index e5c69c2660eeb..e710749a122d6 100644 --- a/src/expr/impl/src/lib.rs +++ b/src/expr/impl/src/lib.rs @@ -23,7 +23,6 @@ #![allow(non_snake_case)] // for `ctor` generated code #![feature(let_chains)] #![feature(assert_matches)] -#![feature(lint_reasons)] #![feature(iterator_try_collect)] #![feature(coroutines)] #![feature(test)] diff --git a/src/expr/impl/src/scalar/array.rs b/src/expr/impl/src/scalar/array.rs index d5f53213bf277..7b7d272000597 100644 --- a/src/expr/impl/src/scalar/array.rs +++ b/src/expr/impl/src/scalar/array.rs @@ -15,7 +15,7 @@ use risingwave_common::array::{ListValue, StructValue}; use risingwave_common::row::Row; use risingwave_common::types::{ - DataType, ListRef, MapRef, MapType, MapValue, ScalarRefImpl, ToOwnedDatum, + DataType, ListRef, MapRef, MapType, MapValue, ScalarRef, ScalarRefImpl, ToOwnedDatum, }; use risingwave_expr::expr::Context; use risingwave_expr::{function, ExprError}; @@ -241,6 +241,60 @@ fn map_delete(map: MapRef<'_>, key: Option>) -> MapValue { MapValue::delete(map, key) } +/// # Example +/// +/// ```slt +/// query T +/// select map_keys(map{'a':1, 'b':2, 'c':3}); +/// ---- +/// {a,b,c} +/// ``` +#[function( + "map_keys(anymap) -> anyarray", + type_infer = "|args|{ + Ok(DataType::List(Box::new(args[0].as_map().key().clone()))) + }" +)] +fn map_keys(map: MapRef<'_>) -> ListValue { + map.into_kv().0.to_owned_scalar() +} + +/// # Example +/// +/// ```slt +/// query T +/// select map_values(map{'a':1, 'b':2, 'c':3}); +/// ---- +/// {1,2,3} +/// ``` +#[function( + "map_values(anymap) -> anyarray", + type_infer = "|args|{ + Ok(DataType::List(Box::new(args[0].as_map().value().clone()))) + }" +)] +fn map_values(map: MapRef<'_>) -> ListValue { + map.into_kv().1.to_owned_scalar() +} + +/// # Example +/// +/// ```slt +/// query T +/// select map_entries(map{'a':1, 'b':2, 'c':3}); +/// ---- +/// {"(a,1)","(b,2)","(c,3)"} +/// ``` +#[function( + "map_entries(anymap) -> anyarray", + type_infer = "|args|{ + Ok(args[0].as_map().clone().into_list()) + }" +)] +fn map_entries(map: MapRef<'_>) -> ListValue { + map.into_inner().to_owned() +} + #[cfg(test)] mod tests { use risingwave_common::array::DataChunk; diff --git a/src/expr/impl/src/udf/external.rs b/src/expr/impl/src/udf/external.rs index 5c400df26c179..0d6ba0e409386 100644 --- a/src/expr/impl/src/udf/external.rs +++ b/src/expr/impl/src/udf/external.rs @@ -25,6 +25,7 @@ use ginepro::{LoadBalancedChannel, ResolutionStrategy}; use risingwave_common::array::arrow::{ToArrow, UdfArrowConvert}; use risingwave_common::util::addr::HostAddr; use thiserror_ext::AsReport; +use tokio::runtime::Runtime; use super::*; @@ -174,9 +175,16 @@ fn get_or_create_flight_client(link: &str) -> Result> { // reuse existing client Ok(client) } else { + static RUNTIME: LazyLock = LazyLock::new(|| { + tokio::runtime::Builder::new_multi_thread() + .thread_name("rw-udf") + .enable_all() + .build() + .expect("failed to build udf runtime") + }); // create new client let client = Arc::new(tokio::task::block_in_place(|| { - tokio::runtime::Handle::current().block_on(async { + RUNTIME.block_on(async { let channel = connect_tonic(link).await?; Ok(Client::new(channel).await?) as Result<_> }) diff --git a/src/expr/macro/src/lib.rs b/src/expr/macro/src/lib.rs index 8fd03e344db89..630c82a87701b 100644 --- a/src/expr/macro/src/lib.rs +++ b/src/expr/macro/src/lib.rs @@ -12,7 +12,6 @@ // See the License for the specific language governing permissions and // limitations under the License. -#![feature(lint_reasons)] #![feature(let_chains)] use std::vec; diff --git a/src/frontend/src/catalog/system_catalog/rw_catalog/rw_iceberg_snapshots.rs b/src/frontend/src/catalog/system_catalog/rw_catalog/rw_iceberg_snapshots.rs index e2bbcb486b926..3c60236f96e66 100644 --- a/src/frontend/src/catalog/system_catalog/rw_catalog/rw_iceberg_snapshots.rs +++ b/src/frontend/src/catalog/system_catalog/rw_catalog/rw_iceberg_snapshots.rs @@ -17,6 +17,7 @@ use std::ops::Deref; use iceberg::table::Table; use jsonbb::{Value, ValueRef}; use risingwave_common::types::{Fields, JsonbVal, Timestamptz}; +use risingwave_connector::error::ConnectorResult; use risingwave_connector::sink::iceberg::IcebergConfig; use risingwave_connector::source::ConnectorProperties; use risingwave_connector::WithPropertiesExt; @@ -62,25 +63,32 @@ async fn read(reader: &SysCatalogReaderImpl) -> Result> let iceberg_config: IcebergConfig = iceberg_properties.to_iceberg_config(); let table: Table = iceberg_config.load_table_v2().await?; - result.extend(table.metadata().snapshots().map(|snapshot| { - RwIcebergSnapshots { - source_id: source.id as i32, - schema_name: schema_name.clone(), - source_name: source.name.clone(), - sequence_number: snapshot.sequence_number(), - snapshot_id: snapshot.snapshot_id(), - timestamp_ms: Timestamptz::from_millis(snapshot.timestamp().timestamp_millis()), - manifest_list: snapshot.manifest_list().to_string(), - summary: Value::object( - snapshot - .summary() - .other - .iter() - .map(|(k, v)| (k.as_str(), ValueRef::String(v))), - ) - .into(), - } - })); + let snapshots: ConnectorResult> = table + .metadata() + .snapshots() + .map(|snapshot| { + Ok(RwIcebergSnapshots { + source_id: source.id as i32, + schema_name: schema_name.clone(), + source_name: source.name.clone(), + sequence_number: snapshot.sequence_number(), + snapshot_id: snapshot.snapshot_id(), + timestamp_ms: Timestamptz::from_millis( + snapshot.timestamp()?.timestamp_millis(), + ), + manifest_list: snapshot.manifest_list().to_string(), + summary: Value::object( + snapshot + .summary() + .other + .iter() + .map(|(k, v)| (k.as_str(), ValueRef::String(v))), + ) + .into(), + }) + }) + .collect(); + result.extend(snapshots?); } } Ok(result) diff --git a/src/frontend/src/expr/mod.rs b/src/frontend/src/expr/mod.rs index f650fa3cb521b..c7acdfa5c4a3c 100644 --- a/src/frontend/src/expr/mod.rs +++ b/src/frontend/src/expr/mod.rs @@ -988,10 +988,9 @@ impl ExprImpl { _ => return None, }; let list: Vec<_> = inputs - .map(|expr| { + .inspect(|expr| { // Non constant IN will be bound to OR assert!(expr.is_const()); - expr }) .collect(); diff --git a/src/frontend/src/lib.rs b/src/frontend/src/lib.rs index d8b484e3d6fa2..d3d5d1623bd58 100644 --- a/src/frontend/src/lib.rs +++ b/src/frontend/src/lib.rs @@ -23,7 +23,6 @@ #![feature(if_let_guard)] #![feature(let_chains)] #![feature(assert_matches)] -#![feature(lint_reasons)] #![feature(box_patterns)] #![feature(macro_metavar_expr)] #![feature(min_specialization)] @@ -142,8 +141,9 @@ pub struct FrontendOpts { pub config_path: String, /// Used for control the metrics level, similar to log level. - /// 0 = disable metrics - /// >0 = enable metrics + /// + /// level = 0: disable metrics + /// level > 0: enable metrics #[clap(long, hide = true, env = "RW_METRICS_LEVEL")] #[override_opts(path = server.metrics_level)] pub metrics_level: Option, diff --git a/src/frontend/src/optimizer/delta_join_solver.rs b/src/frontend/src/optimizer/delta_join_solver.rs index 5dc1bb30cc9f9..470fc0426d7d5 100644 --- a/src/frontend/src/optimizer/delta_join_solver.rs +++ b/src/frontend/src/optimizer/delta_join_solver.rs @@ -66,7 +66,8 @@ //! possible that every lookup path produces different distribution. We need to shuffle them //! before feeding data to union. -#![expect(dead_code)] +// FIXME: https://github.com/rust-lang/rust-analyzer/issues/17685 +#![allow(dead_code)] use std::collections::{BTreeMap, BTreeSet}; diff --git a/src/frontend/src/optimizer/plan_node/logical_over_window.rs b/src/frontend/src/optimizer/plan_node/logical_over_window.rs index 7a81b164fbafe..bb78380482752 100644 --- a/src/frontend/src/optimizer/plan_node/logical_over_window.rs +++ b/src/frontend/src/optimizer/plan_node/logical_over_window.rs @@ -548,11 +548,10 @@ impl ColPrunable for LogicalOverWindow { let new_window_functions = req_cols_win_func_part .indices() .map(|idx| self.window_functions()[idx - input_len].clone()) - .map(|func| { + .inspect(|func| { tmp.extend(func.args.iter().map(|x| x.index())); tmp.extend(func.partition_by.iter().map(|x| x.index())); tmp.extend(func.order_by.iter().map(|x| x.column_index)); - func }) .collect_vec(); (tmp, new_window_functions) diff --git a/src/frontend/src/optimizer/plan_node/stream_sink.rs b/src/frontend/src/optimizer/plan_node/stream_sink.rs index 2717c454e6435..3e34475c8d4bb 100644 --- a/src/frontend/src/optimizer/plan_node/stream_sink.rs +++ b/src/frontend/src/optimizer/plan_node/stream_sink.rs @@ -212,7 +212,7 @@ impl StreamSink { partition_info: Option, ) -> Result { let columns = derive_columns(input.schema(), out_names, &user_cols)?; - let (input, sink) = Self::derive_sink_desc( + let (input, mut sink) = Self::derive_sink_desc( input, user_distributed_by, name, @@ -241,8 +241,11 @@ impl StreamSink { if connector == TABLE_SINK && sink.target_table.is_none() { unsupported_sink(TABLE_SINK) } else { + SinkType::set_default_commit_checkpoint_interval( + &mut sink, + &input.ctx().session_ctx().config().sink_decouple(), + )?; SinkType::is_sink_decouple( - &sink, &input.ctx().session_ctx().config().sink_decouple(), ) } diff --git a/src/frontend/src/optimizer/rule/index_selection_rule.rs b/src/frontend/src/optimizer/rule/index_selection_rule.rs index 548fda7b92af4..a995dd9878620 100644 --- a/src/frontend/src/optimizer/rule/index_selection_rule.rs +++ b/src/frontend/src/optimizer/rule/index_selection_rule.rs @@ -48,7 +48,7 @@ use std::cmp::min; use std::collections::hash_map::Entry::{Occupied, Vacant}; -use std::collections::{BTreeMap, HashMap, HashSet}; +use std::collections::{BTreeMap, HashMap}; use std::rc::Rc; use itertools::Itertools; @@ -962,17 +962,6 @@ impl ExprVisitor for TableScanIoEstimator<'_> { } } -#[derive(Default)] -struct ExprInputRefFinder { - pub input_ref_index_set: HashSet, -} - -impl ExprVisitor for ExprInputRefFinder { - fn visit_input_ref(&mut self, input_ref: &InputRef) { - self.input_ref_index_set.insert(input_ref.index); - } -} - struct ShiftInputRefRewriter { offset: usize, } diff --git a/src/frontend/src/scheduler/distributed/query_manager.rs b/src/frontend/src/scheduler/distributed/query_manager.rs index 86a54cf9c0f98..2d977cfb675e6 100644 --- a/src/frontend/src/scheduler/distributed/query_manager.rs +++ b/src/frontend/src/scheduler/distributed/query_manager.rs @@ -230,14 +230,13 @@ impl QueryManager { self.query_metrics.clone(), ) .await - .map_err(|err| { + .inspect_err(|_| { // Clean up query execution on error. context .session() .env() .query_manager() .delete_query(&query_id); - err })?; Ok(query_result_fetcher.stream_from_channel()) } diff --git a/src/license/src/feature.rs b/src/license/src/feature.rs index 583ef93a45863..0b888986db5c2 100644 --- a/src/license/src/feature.rs +++ b/src/license/src/feature.rs @@ -57,7 +57,6 @@ macro_rules! for_all_features { { SqlServerCdcSource, Paid, "CDC source connector for Sql Server." }, { CdcAutoSchemaChange, Paid, "Auto replicate upstream DDL to CDC Table." }, { IcebergSinkWithGlue, Paid, "Delivering data to Iceberg with Glue catalog." }, - { FileSink, Paid, "Delivering data to object storage."}, } }; } diff --git a/src/meta/model_v2/migration/src/lib.rs b/src/meta/model_v2/migration/src/lib.rs index 08291e5b163d5..0b09f3c4d4e11 100644 --- a/src/meta/model_v2/migration/src/lib.rs +++ b/src/meta/model_v2/migration/src/lib.rs @@ -20,6 +20,7 @@ mod m20240702_080451_system_param_value; mod m20240702_084927_unnecessary_fk; mod m20240726_063833_auto_schema_change; mod m20240806_143329_add_rate_limit_to_source_catalog; +mod m20240820_081248_add_time_travel_per_table_epoch; pub struct Migrator; @@ -45,6 +46,7 @@ impl MigratorTrait for Migrator { Box::new(m20240702_084927_unnecessary_fk::Migration), Box::new(m20240726_063833_auto_schema_change::Migration), Box::new(m20240806_143329_add_rate_limit_to_source_catalog::Migration), + Box::new(m20240820_081248_add_time_travel_per_table_epoch::Migration), ] } } diff --git a/src/meta/model_v2/migration/src/m20240820_081248_add_time_travel_per_table_epoch.rs b/src/meta/model_v2/migration/src/m20240820_081248_add_time_travel_per_table_epoch.rs new file mode 100644 index 0000000000000..85d9475aa8f01 --- /dev/null +++ b/src/meta/model_v2/migration/src/m20240820_081248_add_time_travel_per_table_epoch.rs @@ -0,0 +1,197 @@ +use sea_orm_migration::prelude::*; + +#[derive(DeriveMigrationName)] +pub struct Migration; + +const TABLE_NAME: &str = "hummock_epoch_to_version"; + +#[async_trait::async_trait] +impl MigrationTrait for Migration { + async fn up(&self, manager: &SchemaManager) -> Result<(), DbErr> { + // modify PK + match manager.get_database_backend() { + sea_orm::DatabaseBackend::MySql => { + manager + .alter_table( + Table::alter() + .table(HummockEpochToVersion::Table) + .add_column( + ColumnDef::new(HummockEpochToVersion::TableId).big_integer(), + ) + .to_owned(), + ) + .await?; + manager + .get_connection() + .execute(sea_orm::Statement::from_string( + sea_orm::DatabaseBackend::MySql, + format!("ALTER TABLE {TABLE_NAME} DROP PRIMARY KEY, ADD PRIMARY KEY (epoch, table_id)"), + )) + .await?; + } + sea_orm::DatabaseBackend::Postgres => { + manager + .alter_table( + Table::alter() + .table(HummockEpochToVersion::Table) + .add_column( + ColumnDef::new(HummockEpochToVersion::TableId).big_integer(), + ) + .to_owned(), + ) + .await?; + manager + .get_connection() + .execute(sea_orm::Statement::from_string( + sea_orm::DatabaseBackend::Postgres, + format!("ALTER TABLE {TABLE_NAME} DROP CONSTRAINT {TABLE_NAME}_pkey"), + )) + .await?; + manager + .get_connection() + .execute(sea_orm::Statement::from_string( + sea_orm::DatabaseBackend::Postgres, + format!("ALTER TABLE {TABLE_NAME} ADD PRIMARY KEY (epoch, table_id)"), + )) + .await?; + } + sea_orm::DatabaseBackend::Sqlite => { + // sqlite is not for prod usage, so recreating the table is fine. + manager + .drop_table( + sea_orm_migration::prelude::Table::drop() + .table(HummockEpochToVersion::Table) + .if_exists() + .cascade() + .to_owned(), + ) + .await?; + + manager + .create_table( + Table::create() + .table(HummockEpochToVersion::Table) + .if_not_exists() + .col( + ColumnDef::new(HummockEpochToVersion::Epoch) + .big_integer() + .not_null(), + ) + .col( + ColumnDef::new(HummockEpochToVersion::TableId) + .big_integer() + .not_null(), + ) + .col( + ColumnDef::new(HummockEpochToVersion::VersionId) + .big_integer() + .not_null(), + ) + .primary_key( + Index::create() + .col(HummockEpochToVersion::Epoch) + .col(HummockEpochToVersion::TableId), + ) + .to_owned(), + ) + .await?; + } + } + Ok(()) + } + + async fn down(&self, manager: &SchemaManager) -> Result<(), DbErr> { + // The downgrade for MySql and Postgres may not work due to PK confliction. + match manager.get_database_backend() { + sea_orm::DatabaseBackend::MySql => { + manager + .get_connection() + .execute(sea_orm::Statement::from_string( + sea_orm::DatabaseBackend::MySql, + format!("ALTER TABLE {TABLE_NAME} DROP PRIMARY KEY"), + )) + .await?; + manager + .alter_table( + Table::alter() + .table(HummockEpochToVersion::Table) + .drop_column(HummockEpochToVersion::TableId) + .to_owned(), + ) + .await?; + manager + .get_connection() + .execute(sea_orm::Statement::from_string( + sea_orm::DatabaseBackend::MySql, + format!("ALTER TABLE {TABLE_NAME} ADD PRIMARY KEY (epoch)"), + )) + .await?; + } + sea_orm::DatabaseBackend::Postgres => { + manager + .get_connection() + .execute(sea_orm::Statement::from_string( + sea_orm::DatabaseBackend::Postgres, + format!("ALTER TABLE {TABLE_NAME} DROP CONSTRAINT {TABLE_NAME}_pkey"), + )) + .await?; + manager + .alter_table( + Table::alter() + .table(HummockEpochToVersion::Table) + .drop_column(HummockEpochToVersion::TableId) + .to_owned(), + ) + .await?; + manager + .get_connection() + .execute(sea_orm::Statement::from_string( + sea_orm::DatabaseBackend::Postgres, + format!("ALTER TABLE {TABLE_NAME} ADD PRIMARY KEY (epoch)"), + )) + .await?; + } + sea_orm::DatabaseBackend::Sqlite => { + manager + .drop_table( + sea_orm_migration::prelude::Table::drop() + .table(HummockEpochToVersion::Table) + .if_exists() + .cascade() + .to_owned(), + ) + .await?; + + manager + .create_table( + Table::create() + .table(HummockEpochToVersion::Table) + .if_not_exists() + .col( + ColumnDef::new(HummockEpochToVersion::Epoch) + .big_integer() + .not_null() + .primary_key(), + ) + .col( + ColumnDef::new(HummockEpochToVersion::VersionId) + .big_integer() + .not_null(), + ) + .to_owned(), + ) + .await?; + } + } + + Ok(()) + } +} + +#[derive(DeriveIden)] +enum HummockEpochToVersion { + Table, + Epoch, + TableId, + VersionId, +} diff --git a/src/meta/model_v2/src/hummock_epoch_to_version.rs b/src/meta/model_v2/src/hummock_epoch_to_version.rs index 181b1b320bc54..f54551aa80178 100644 --- a/src/meta/model_v2/src/hummock_epoch_to_version.rs +++ b/src/meta/model_v2/src/hummock_epoch_to_version.rs @@ -22,6 +22,8 @@ use crate::{Epoch, HummockVersionId}; pub struct Model { #[sea_orm(primary_key, auto_increment = false)] pub epoch: Epoch, + #[sea_orm(primary_key, auto_increment = false)] + pub table_id: i64, pub version_id: HummockVersionId, } diff --git a/src/meta/node/src/lib.rs b/src/meta/node/src/lib.rs index 88a76d1a1c706..6fa88fd412e31 100644 --- a/src/meta/node/src/lib.rs +++ b/src/meta/node/src/lib.rs @@ -12,7 +12,6 @@ // See the License for the specific language governing permissions and // limitations under the License. -#![feature(lint_reasons)] #![feature(let_chains)] #![cfg_attr(coverage, feature(coverage_attribute))] diff --git a/src/meta/service/src/hummock_service.rs b/src/meta/service/src/hummock_service.rs index 21e203d8440bd..c3fc2da229585 100644 --- a/src/meta/service/src/hummock_service.rs +++ b/src/meta/service/src/hummock_service.rs @@ -457,7 +457,7 @@ impl HummockManagerService for HummockServiceImpl { let req = request.into_inner(); let new_group_id = self .hummock_manager - .split_compaction_group(req.group_id, &req.table_ids) + .split_compaction_group(req.group_id, &req.table_ids, req.partition_vnode_count) .await?; Ok(Response::new(SplitCompactionGroupResponse { new_group_id })) } @@ -710,12 +710,26 @@ impl HummockManagerService for HummockServiceImpl { &self, request: Request, ) -> Result, Status> { - let GetVersionByEpochRequest { epoch } = request.into_inner(); - let version = self.hummock_manager.epoch_to_version(epoch).await?; + let GetVersionByEpochRequest { epoch, table_id } = request.into_inner(); + let version = self + .hummock_manager + .epoch_to_version(epoch, table_id) + .await?; Ok(Response::new(GetVersionByEpochResponse { version: Some(version.to_protobuf()), })) } + + async fn merge_compaction_group( + &self, + request: Request, + ) -> Result, Status> { + let req = request.into_inner(); + self.hummock_manager + .merge_compaction_group(req.left_group_id, req.right_group_id) + .await?; + Ok(Response::new(MergeCompactionGroupResponse {})) + } } #[cfg(test)] diff --git a/src/meta/service/src/lib.rs b/src/meta/service/src/lib.rs index e2f57d4a26bbb..2e327dc47a59e 100644 --- a/src/meta/service/src/lib.rs +++ b/src/meta/service/src/lib.rs @@ -12,7 +12,6 @@ // See the License for the specific language governing permissions and // limitations under the License. -#![feature(lint_reasons)] #![feature(let_chains)] #![feature(impl_trait_in_assoc_type)] #![cfg_attr(coverage, feature(coverage_attribute))] diff --git a/src/meta/src/barrier/command.rs b/src/meta/src/barrier/command.rs index 577a0bef25360..927374da31586 100644 --- a/src/meta/src/barrier/command.rs +++ b/src/meta/src/barrier/command.rs @@ -524,14 +524,8 @@ impl Command { } Command::SourceSplitAssignment(change) => { - let mut checked_assignment = change.clone(); - checked_assignment - .iter_mut() - .for_each(|(_, assignment)| validate_assignment(assignment)); - let mut diff = HashMap::new(); - - for actor_splits in checked_assignment.values() { + for actor_splits in change.values() { diff.extend(actor_splits.clone()); } @@ -583,7 +577,11 @@ impl Command { let mut checked_split_assignment = split_assignment.clone(); checked_split_assignment .iter_mut() - .for_each(|(_, assignment)| validate_assignment(assignment)); + .for_each(|(_, assignment)| { + // No related actor running before, we don't need to check the mutation + // should be wrapped with pause. + validate_assignment(assignment); + }); let actor_splits = checked_split_assignment .values() .flat_map(build_actor_connector_splits) @@ -791,7 +789,8 @@ impl Command { for reschedule in reschedules.values() { let mut checked_assignment = reschedule.actor_splits.clone(); - validate_assignment(&mut checked_assignment); + // Update mutation always wrapped by Pause and Resume mutation. no further action needed. + _ = validate_assignment(&mut checked_assignment); for (actor_id, splits) in &checked_assignment { actor_splits.insert( diff --git a/src/meta/src/barrier/recovery.rs b/src/meta/src/barrier/recovery.rs index 63cd4c16d9aaf..9fe0033f7e91b 100644 --- a/src/meta/src/barrier/recovery.rs +++ b/src/meta/src/barrier/recovery.rs @@ -40,7 +40,6 @@ use crate::barrier::rpc::ControlStreamManager; use crate::barrier::schedule::ScheduledBarriers; use crate::barrier::state::BarrierManagerState; use crate::barrier::{BarrierKind, GlobalBarrierManager, GlobalBarrierManagerContext}; -use crate::controller::catalog::ReleaseContext; use crate::manager::{ActiveStreamingWorkerNodes, MetadataManager, WorkerId}; use crate::model::{MetadataModel, MigrationPlan, TableFragments, TableParallelism}; use crate::stream::{build_actor_connector_splits, RescheduleOptions, TableResizePolicy}; @@ -100,8 +99,7 @@ impl GlobalBarrierManagerContext { } MetadataManager::V2(mgr) => { mgr.catalog_controller.clean_dirty_subscription().await?; - let ReleaseContext { source_ids, .. } = - mgr.catalog_controller.clean_dirty_creating_jobs().await?; + let source_ids = mgr.catalog_controller.clean_dirty_creating_jobs().await?; // unregister cleaned sources. self.source_manager diff --git a/src/meta/src/barrier/rpc.rs b/src/meta/src/barrier/rpc.rs index 1e7d9b5dfa759..97b3636e8dba3 100644 --- a/src/meta/src/barrier/rpc.rs +++ b/src/meta/src/barrier/rpc.rs @@ -14,14 +14,13 @@ use std::collections::{HashMap, HashSet}; use std::error::Error; -use std::future::Future; use std::time::Duration; use anyhow::anyhow; use fail::fail_point; use futures::future::try_join_all; use futures::stream::{BoxStream, FuturesUnordered}; -use futures::{FutureExt, StreamExt}; +use futures::StreamExt; use itertools::Itertools; use risingwave_common::catalog::TableId; use risingwave_common::hash::ActorId; @@ -58,33 +57,47 @@ struct ControlStreamNode { sender: UnboundedSender, } -fn into_future( - worker_id: WorkerId, - stream: BoxStream< - 'static, - risingwave_rpc_client::error::Result, - >, -) -> ResponseStreamFuture { - stream.into_future().map(move |(opt, stream)| { - ( - worker_id, - stream, - opt.ok_or_else(|| anyhow!("end of stream").into()) - .and_then(|result| result.map_err(|e| e.into())), - ) - }) +mod response_stream_future { + use std::future::Future; + + use anyhow::anyhow; + use futures::stream::BoxStream; + use futures::{FutureExt, StreamExt}; + use risingwave_pb::stream_service::StreamingControlStreamResponse; + + use crate::manager::WorkerId; + use crate::MetaResult; + + pub(super) fn into_future( + worker_id: WorkerId, + stream: BoxStream< + 'static, + risingwave_rpc_client::error::Result, + >, + ) -> ResponseStreamFuture { + stream.into_future().map(move |(opt, stream)| { + ( + worker_id, + stream, + opt.ok_or_else(|| anyhow!("end of stream").into()) + .and_then(|result| result.map_err(|e| e.into())), + ) + }) + } + + pub(super) type ResponseStreamFuture = impl Future< + Output = ( + WorkerId, + BoxStream< + 'static, + risingwave_rpc_client::error::Result, + >, + MetaResult, + ), + > + 'static; } -type ResponseStreamFuture = impl Future< - Output = ( - WorkerId, - BoxStream< - 'static, - risingwave_rpc_client::error::Result, - >, - MetaResult, - ), - > + 'static; +use response_stream_future::*; pub(super) struct ControlStreamManager { context: GlobalBarrierManagerContext, @@ -360,7 +373,7 @@ impl ControlStreamManager { self.nodes .iter_mut() - .map(|(node_id, node)| { + .try_for_each(|(node_id, node)| { let actor_ids_to_collect: Vec<_> = pre_applied_graph_info .actor_ids_to_collect(*node_id) .collect(); @@ -427,7 +440,6 @@ impl ControlStreamManager { Result::<_, MetaError>::Ok(()) } }) - .try_collect() .inspect_err(|e| { // Record failure in event log. use risingwave_pb::meta::event_log; diff --git a/src/meta/src/controller/catalog.rs b/src/meta/src/controller/catalog.rs index ffab5160b5d9f..c9c3210dd6c67 100644 --- a/src/meta/src/controller/catalog.rs +++ b/src/meta/src/controller/catalog.rs @@ -40,7 +40,7 @@ use risingwave_pb::catalog::subscription::SubscriptionState; use risingwave_pb::catalog::table::PbTableType; use risingwave_pb::catalog::{ PbComment, PbConnection, PbDatabase, PbFunction, PbIndex, PbSchema, PbSecret, PbSink, PbSource, - PbSubscription, PbTable, PbView, + PbStreamJobStatus, PbSubscription, PbTable, PbView, }; use risingwave_pb::meta::cancel_creating_jobs_request::PbCreatingJobInfo; use risingwave_pb::meta::list_object_dependencies_response::PbObjectDependencies; @@ -728,11 +728,11 @@ impl CatalogController { } /// `clean_dirty_creating_jobs` cleans up creating jobs that are creating in Foreground mode or in Initial status. - pub async fn clean_dirty_creating_jobs(&self) -> MetaResult { + pub async fn clean_dirty_creating_jobs(&self) -> MetaResult> { let inner = self.inner.write().await; let txn = inner.db.begin().await?; - let mut dirty_objs: Vec = streaming_job::Entity::find() + let dirty_job_objs: Vec = streaming_job::Entity::find() .select_only() .column(streaming_job::Column::JobId) .columns([ @@ -755,36 +755,46 @@ impl CatalogController { let changed = Self::clean_dirty_sink_downstreams(&txn).await?; - if dirty_objs.is_empty() { + if dirty_job_objs.is_empty() { if changed { txn.commit().await?; } - return Ok(ReleaseContext::default()); + return Ok(vec![]); } - self.log_cleaned_dirty_jobs(&dirty_objs, &txn).await?; + self.log_cleaned_dirty_jobs(&dirty_job_objs, &txn).await?; - let dirty_job_ids = dirty_objs.iter().map(|obj| obj.oid).collect::>(); + let dirty_job_ids = dirty_job_objs.iter().map(|obj| obj.oid).collect::>(); // Filter out dummy objs for replacement. // todo: we'd better introduce a new dummy object type for replacement. - let all_dirty_table_ids = dirty_objs + let all_dirty_table_ids = dirty_job_objs .iter() .filter(|obj| obj.obj_type == ObjectType::Table) .map(|obj| obj.oid) .collect_vec(); - let dirty_table_ids: HashSet = Table::find() + let dirty_table_type_map: HashMap = Table::find() .select_only() .column(table::Column::TableId) + .column(table::Column::TableType) .filter(table::Column::TableId.is_in(all_dirty_table_ids)) - .into_tuple::() + .into_tuple::<(ObjectId, TableType)>() .all(&txn) .await? .into_iter() .collect(); - dirty_objs - .retain(|obj| obj.obj_type != ObjectType::Table || dirty_table_ids.contains(&obj.oid)); + + // Only notify delete for failed materialized views. + let dirty_mview_objs = dirty_job_objs + .into_iter() + .filter(|obj| { + matches!( + dirty_table_type_map.get(&obj.oid), + Some(TableType::MaterializedView) + ) + }) + .collect_vec(); let associated_source_ids: Vec = Table::find() .select_only() @@ -797,15 +807,16 @@ impl CatalogController { .into_tuple() .all(&txn) .await?; - let dirty_source_objs: Vec = Object::find() - .filter(object::Column::Oid.is_in(associated_source_ids.clone())) - .into_partial_model() + + let dirty_state_table_ids: Vec = Table::find() + .select_only() + .column(table::Column::TableId) + .filter(table::Column::BelongsToJobId.is_in(dirty_job_ids.clone())) + .into_tuple() .all(&txn) .await?; - dirty_objs.extend(dirty_source_objs); - let mut dirty_state_table_ids = vec![]; - let to_drop_internal_table_objs: Vec = Object::find() + let dirty_mview_internal_table_objs = Object::find() .select_only() .columns([ object::Column::Oid, @@ -814,17 +825,15 @@ impl CatalogController { object::Column::DatabaseId, ]) .join(JoinType::InnerJoin, object::Relation::Table.def()) - .filter(table::Column::BelongsToJobId.is_in(dirty_job_ids.clone())) + .filter(table::Column::BelongsToJobId.is_in(dirty_mview_objs.iter().map(|obj| obj.oid))) .into_partial_model() .all(&txn) .await?; - dirty_state_table_ids.extend(to_drop_internal_table_objs.iter().map(|obj| obj.oid)); - dirty_objs.extend(to_drop_internal_table_objs); let to_delete_objs: HashSet = dirty_job_ids .clone() .into_iter() - .chain(dirty_state_table_ids.clone().into_iter()) + .chain(dirty_state_table_ids.into_iter()) .chain(associated_source_ids.clone().into_iter()) .collect(); @@ -836,17 +845,18 @@ impl CatalogController { txn.commit().await?; - let relation_group = build_relation_group(dirty_objs); + let relation_group = build_relation_group( + dirty_mview_objs + .into_iter() + .chain(dirty_mview_internal_table_objs.into_iter()) + .collect_vec(), + ); let _version = self .notify_frontend(NotificationOperation::Delete, relation_group) .await; - Ok(ReleaseContext { - state_table_ids: dirty_state_table_ids, - source_ids: associated_source_ids, - ..Default::default() - }) + Ok(associated_source_ids) } async fn log_cleaned_dirty_jobs( @@ -3137,12 +3147,16 @@ impl CatalogControllerInner { Ok(table_ids) } - /// `list_tables` return all `CREATED` tables and internal tables that belong to `CREATED` streaming jobs. + /// `list_tables` return all `CREATED` tables, `CREATING` materialized views and internal tables that belong to them. async fn list_tables(&self) -> MetaResult> { let table_objs = Table::find() .find_also_related(Object) .join(JoinType::LeftJoin, object::Relation::StreamingJob.def()) - .filter(streaming_job::Column::JobStatus.eq(JobStatus::Created)) + .filter( + streaming_job::Column::JobStatus + .eq(JobStatus::Created) + .or(table::Column::TableType.eq(TableType::MaterializedView)), + ) .all(&self.db) .await?; @@ -3154,12 +3168,18 @@ impl CatalogControllerInner { .all(&self.db) .await?; + let job_ids: HashSet = table_objs + .iter() + .map(|(t, _)| t.table_id) + .chain(created_streaming_job_ids.iter().cloned()) + .collect(); + let internal_table_objs = Table::find() .find_also_related(Object) .filter( table::Column::TableType .eq(TableType::Internal) - .and(table::Column::BelongsToJobId.is_in(created_streaming_job_ids)), + .and(table::Column::BelongsToJobId.is_in(job_ids)), ) .all(&self.db) .await?; @@ -3167,7 +3187,19 @@ impl CatalogControllerInner { Ok(table_objs .into_iter() .chain(internal_table_objs.into_iter()) - .map(|(table, obj)| ObjectModel(table, obj.unwrap()).into()) + .map(|(table, obj)| { + // Correctly set the stream job status for creating materialized views and internal tables. + let is_created = created_streaming_job_ids.contains(&table.table_id) + || (table.table_type == TableType::Internal + && created_streaming_job_ids.contains(&table.belongs_to_job_id.unwrap())); + let mut pb_table: PbTable = ObjectModel(table, obj.unwrap()).into(); + pb_table.stream_job_status = if is_created { + PbStreamJobStatus::Created.into() + } else { + PbStreamJobStatus::Creating.into() + }; + pb_table + }) .collect()) } diff --git a/src/meta/src/controller/mod.rs b/src/meta/src/controller/mod.rs index e22b0f20ee86e..3e903802b86ee 100644 --- a/src/meta/src/controller/mod.rs +++ b/src/meta/src/controller/mod.rs @@ -150,7 +150,7 @@ impl From> for PbTable { Epoch::from_unix_millis(value.1.created_at.and_utc().timestamp_millis() as _).0, ), cleaned_by_watermark: value.0.cleaned_by_watermark, - stream_job_status: PbStreamJobStatus::Created as _, // todo: deprecate it. + stream_job_status: PbStreamJobStatus::Created as _, create_type: PbCreateType::Foreground as _, version: value.0.version.map(|v| v.to_protobuf()), optional_associated_source_id: value @@ -236,7 +236,7 @@ impl From> for PbSink { ), db_name: value.0.db_name, sink_from_name: value.0.sink_from_name, - stream_job_status: PbStreamJobStatus::Created as _, // todo: deprecate it. + stream_job_status: PbStreamJobStatus::Created as _, format_desc: value.0.sink_format_desc.map(|desc| desc.to_protobuf()), target_table: value.0.target_table.map(|id| id as _), initialized_at_cluster_version: value.1.initialized_at_cluster_version, @@ -299,7 +299,7 @@ impl From> for PbIndex { created_at_epoch: Some( Epoch::from_unix_millis(value.1.created_at.and_utc().timestamp_millis() as _).0, ), - stream_job_status: PbStreamJobStatus::Created as _, // todo: deprecate it. + stream_job_status: PbStreamJobStatus::Created as _, initialized_at_cluster_version: value.1.initialized_at_cluster_version, created_at_cluster_version: value.1.created_at_cluster_version, } diff --git a/src/meta/src/controller/streaming_job.rs b/src/meta/src/controller/streaming_job.rs index fd12630fd1649..c0035f0b2bc7d 100644 --- a/src/meta/src/controller/streaming_job.rs +++ b/src/meta/src/controller/streaming_job.rs @@ -26,6 +26,7 @@ use risingwave_meta_model_v2::prelude::{ Actor, ActorDispatcher, Fragment, Index, Object, ObjectDependency, Sink, Source, StreamingJob as StreamingJobModel, Table, }; +use risingwave_meta_model_v2::table::TableType; use risingwave_meta_model_v2::{ actor, actor_dispatcher, fragment, index, object, object_dependency, sink, source, streaming_job, table, ActorId, ActorUpstreamActors, ColumnCatalogArray, CreateType, DatabaseId, @@ -208,9 +209,6 @@ impl CatalogController { sink.id = job_id as _; let sink_model: sink::ActiveModel = sink.clone().into(); Sink::insert(sink_model).exec(&txn).await?; - relations.push(Relation { - relation_info: Some(RelationInfo::Sink(sink.to_owned())), - }); } StreamingJob::Table(src, table, _) => { let job_id = Self::create_streaming_job_obj( @@ -242,15 +240,9 @@ impl CatalogController { ); let source: source::ActiveModel = src.clone().into(); Source::insert(source).exec(&txn).await?; - relations.push(Relation { - relation_info: Some(RelationInfo::Source(src.to_owned())), - }); } let table_model: table::ActiveModel = table.clone().into(); Table::insert(table_model).exec(&txn).await?; - relations.push(Relation { - relation_info: Some(RelationInfo::Table(table.to_owned())), - }); } StreamingJob::Index(index, table) => { ensure_object_id(ObjectType::Table, index.primary_table_id as _, &txn).await?; @@ -282,12 +274,6 @@ impl CatalogController { Table::insert(table_model).exec(&txn).await?; let index_model: index::ActiveModel = index.clone().into(); Index::insert(index_model).exec(&txn).await?; - relations.push(Relation { - relation_info: Some(RelationInfo::Table(table.to_owned())), - }); - relations.push(Relation { - relation_info: Some(RelationInfo::Index(index.to_owned())), - }); } StreamingJob::Source(src) => { let job_id = Self::create_streaming_job_obj( @@ -304,9 +290,6 @@ impl CatalogController { src.id = job_id as _; let source_model: source::ActiveModel = src.clone().into(); Source::insert(source_model).exec(&txn).await?; - relations.push(Relation { - relation_info: Some(RelationInfo::Source(src.to_owned())), - }); } } @@ -331,21 +314,23 @@ impl CatalogController { txn.commit().await?; - let _version = self - .notify_frontend( + if !relations.is_empty() { + self.notify_frontend( Operation::Add, Info::RelationGroup(RelationGroup { relations }), ) .await; + } Ok(()) } pub async fn create_internal_table_catalog( &self, - job_id: ObjectId, + job: &StreamingJob, mut internal_tables: Vec, ) -> MetaResult> { + let job_id = job.id() as ObjectId; let inner = self.inner.write().await; let txn = inner.db.begin().await?; let mut table_id_map = HashMap::new(); @@ -363,13 +348,14 @@ impl CatalogController { table.id = table_id as _; let mut table_model: table::ActiveModel = table.clone().into(); table_model.table_id = Set(table_id as _); - table_model.belongs_to_job_id = Set(Some(job_id as _)); + table_model.belongs_to_job_id = Set(Some(job_id)); table_model.fragment_id = NotSet; Table::insert(table_model).exec(&txn).await?; } txn.commit().await?; - let _version = self - .notify_frontend( + + if job.is_materialized_view() { + self.notify_frontend( Operation::Add, Info::RelationGroup(RelationGroup { relations: internal_tables @@ -381,6 +367,8 @@ impl CatalogController { }), ) .await; + } + Ok(table_id_map) } @@ -497,64 +485,52 @@ impl CatalogController { .all(&txn) .await?; - let associated_source_id: Option = Table::find_by_id(job_id) - .select_only() - .column(table::Column::OptionalAssociatedSourceId) - .filter(table::Column::OptionalAssociatedSourceId.is_not_null()) - .into_tuple() - .one(&txn) - .await?; - - // Get notification info + // Get the notification info if the job is a materialized view. + let table_obj = Table::find_by_id(job_id).one(&txn).await?; let mut objs = vec![]; - let obj: Option = Object::find_by_id(job_id) - .select_only() - .columns([ - object::Column::Oid, - object::Column::ObjType, - object::Column::SchemaId, - object::Column::DatabaseId, - ]) - .into_partial_model() - .one(&txn) - .await?; - let obj = obj.ok_or_else(|| MetaError::catalog_id_not_found("streaming job", job_id))?; - objs.push(obj); - let internal_table_objs: Vec = Object::find() - .select_only() - .columns([ - object::Column::Oid, - object::Column::ObjType, - object::Column::SchemaId, - object::Column::DatabaseId, - ]) - .join(JoinType::InnerJoin, object::Relation::Table.def()) - .filter(table::Column::BelongsToJobId.eq(job_id)) - .into_partial_model() - .all(&txn) - .await?; - objs.extend(internal_table_objs); - if let Some(source_id) = associated_source_id { - let source_obj = Object::find_by_id(source_id) + if let Some(table) = &table_obj + && table.table_type == TableType::MaterializedView + { + let obj: Option = Object::find_by_id(job_id) .select_only() - .column(object::Column::ObjType) + .columns([ + object::Column::Oid, + object::Column::ObjType, + object::Column::SchemaId, + object::Column::DatabaseId, + ]) .into_partial_model() .one(&txn) - .await? - .ok_or_else(|| MetaError::catalog_id_not_found("source", source_id))?; - objs.push(source_obj); + .await?; + let obj = + obj.ok_or_else(|| MetaError::catalog_id_not_found("streaming job", job_id))?; + objs.push(obj); + let internal_table_objs: Vec = Object::find() + .select_only() + .columns([ + object::Column::Oid, + object::Column::ObjType, + object::Column::SchemaId, + object::Column::DatabaseId, + ]) + .join(JoinType::InnerJoin, object::Relation::Table.def()) + .filter(table::Column::BelongsToJobId.eq(job_id)) + .into_partial_model() + .all(&txn) + .await?; + objs.extend(internal_table_objs); } - let relation_group = build_relation_group(objs); - // Can delete objects after queried notification info Object::delete_by_id(job_id).exec(&txn).await?; if !internal_table_ids.is_empty() { Object::delete_many() - .filter(object::Column::Oid.is_in(internal_table_ids.iter().cloned())) + .filter(object::Column::Oid.is_in(internal_table_ids)) .exec(&txn) .await?; } - if let Some(source_id) = associated_source_id { + if let Some(t) = &table_obj + && let Some(source_id) = t.optional_associated_source_id + { Object::delete_by_id(source_id).exec(&txn).await?; } @@ -576,9 +552,10 @@ impl CatalogController { } txn.commit().await?; - let _version = self - .notify_frontend(Operation::Delete, relation_group) - .await; + if !objs.is_empty() { + self.notify_frontend(Operation::Delete, build_relation_group(objs)) + .await; + } Ok(true) } @@ -778,6 +755,7 @@ impl CatalogController { )), }) .collect_vec(); + let mut notification_op = NotificationOperation::Add; match job_type { ObjectType::Table => { @@ -786,6 +764,10 @@ impl CatalogController { .one(&txn) .await? .ok_or_else(|| MetaError::catalog_id_not_found("table", job_id))?; + if table.table_type == TableType::MaterializedView { + notification_op = NotificationOperation::Update; + } + if let Some(source_id) = table.optional_associated_source_id { let (src, obj) = Source::find_by_id(source_id) .find_also_related(Object) @@ -892,7 +874,7 @@ impl CatalogController { let mut version = self .notify_frontend( - NotificationOperation::Update, + notification_op, NotificationInfo::RelationGroup(PbRelationGroup { relations }), ) .await; diff --git a/src/meta/src/controller/utils.rs b/src/meta/src/controller/utils.rs index 2d517272a3d00..43fed3380d6bc 100644 --- a/src/meta/src/controller/utils.rs +++ b/src/meta/src/controller/utils.rs @@ -230,7 +230,7 @@ pub fn construct_sink_cycle_check_query( .to_owned() } -#[derive(Clone, DerivePartialModel, FromQueryResult)] +#[derive(Clone, DerivePartialModel, FromQueryResult, Debug)] #[sea_orm(entity = "Object")] pub struct PartialObject { pub oid: ObjectId, diff --git a/src/meta/src/hummock/compactor_manager.rs b/src/meta/src/hummock/compactor_manager.rs index 252f92c404015..f8d8ae7f23c4e 100644 --- a/src/meta/src/hummock/compactor_manager.rs +++ b/src/meta/src/hummock/compactor_manager.rs @@ -476,16 +476,18 @@ impl CompactorManager { #[cfg(test)] mod tests { + use std::sync::Arc; use std::time::Duration; use risingwave_hummock_sdk::compaction_group::StaticCompactionGroupId; use risingwave_pb::hummock::CompactTaskProgress; + use risingwave_rpc_client::HummockMetaClient; use crate::hummock::compaction::selector::default_compaction_selector; use crate::hummock::test_utils::{ add_ssts, register_table_ids_to_compaction_group, setup_compute_env, }; - use crate::hummock::CompactorManager; + use crate::hummock::{CompactorManager, MockHummockMetaClient}; #[tokio::test] async fn test_compactor_manager() { @@ -493,6 +495,9 @@ mod tests { let (env, context_id) = { let (env, hummock_manager, _cluster_manager, worker_node) = setup_compute_env(80).await; let context_id = worker_node.id; + let hummock_meta_client: Arc = Arc::new( + MockHummockMetaClient::new(hummock_manager.clone(), worker_node.id), + ); let compactor_manager = hummock_manager.compactor_manager_ref_for_test(); register_table_ids_to_compaction_group( hummock_manager.as_ref(), @@ -500,7 +505,8 @@ mod tests { StaticCompactionGroupId::StateDefault.into(), ) .await; - let _sst_infos = add_ssts(1, hummock_manager.as_ref(), context_id).await; + let _sst_infos = + add_ssts(1, hummock_manager.as_ref(), hummock_meta_client.clone()).await; let _receiver = compactor_manager.add_compactor(context_id); hummock_manager .get_compact_task( diff --git a/src/meta/src/hummock/manager/checkpoint.rs b/src/meta/src/hummock/manager/checkpoint.rs index bc3701a6b9d82..f678014d440c8 100644 --- a/src/meta/src/hummock/manager/checkpoint.rs +++ b/src/meta/src/hummock/manager/checkpoint.rs @@ -156,8 +156,8 @@ impl HummockManager { .hummock_version_deltas .range((Excluded(old_checkpoint_id), Included(new_checkpoint_id))) { - for group_deltas in version_delta.group_deltas.values() { - let summary = summarize_group_deltas(group_deltas); + for (group_id, group_deltas) in &version_delta.group_deltas { + let summary = summarize_group_deltas(group_deltas, *group_id); object_sizes.extend( summary .insert_table_infos diff --git a/src/meta/src/hummock/manager/commit_epoch.rs b/src/meta/src/hummock/manager/commit_epoch.rs index e7bca768437f2..8c021509dcbb2 100644 --- a/src/meta/src/hummock/manager/commit_epoch.rs +++ b/src/meta/src/hummock/manager/commit_epoch.rs @@ -70,37 +70,6 @@ pub struct CommitEpochInfo { } impl HummockManager { - #[cfg(any(test, feature = "test"))] - pub async fn commit_epoch_for_test( - &self, - epoch: u64, - sstables: Vec>, - sst_to_context: HashMap, - ) -> Result<()> { - let tables = self - .versioning - .read() - .await - .current_version - .state_table_info - .info() - .keys() - .cloned() - .collect(); - let info = CommitEpochInfo { - sstables: sstables.into_iter().map(Into::into).collect(), - new_table_watermarks: HashMap::new(), - sst_to_context, - new_table_fragment_info: NewTableFragmentInfo::None, - change_log_delta: HashMap::new(), - committed_epoch: epoch, - tables_to_commit: tables, - is_visible_table_committed_epoch: true, - }; - self.commit_epoch(info).await?; - Ok(()) - } - /// Caller should ensure `epoch` > `max_committed_epoch` pub async fn commit_epoch( &self, @@ -232,7 +201,7 @@ impl HummockManager { is_visible_table_committed_epoch, new_compaction_group, commit_sstables, - new_table_ids, + &new_table_ids, new_table_watermarks, change_log_delta, ); @@ -289,6 +258,9 @@ impl HummockManager { .values() .map(|g| (g.group_id, g.parent_group_id)) .collect(); + let time_travel_tables_to_commit = table_compaction_group_mapping + .iter() + .filter(|(table_id, _)| tables_to_commit.contains(table_id)); let mut txn = sql_store.conn.begin().await?; let version_snapshot_sst_ids = self .write_time_travel_metadata( @@ -297,6 +269,8 @@ impl HummockManager { time_travel_delta, &group_parents, &versioning.last_time_travel_snapshot_sst_ids, + time_travel_tables_to_commit, + committed_epoch, ) .await?; commit_multi_var_with_provided_txn!( diff --git a/src/meta/src/hummock/manager/compaction_group_manager.rs b/src/meta/src/hummock/manager/compaction/compaction_group_manager.rs similarity index 96% rename from src/meta/src/hummock/manager/compaction_group_manager.rs rename to src/meta/src/hummock/manager/compaction/compaction_group_manager.rs index c68fc4222f283..807ba6f3fd35f 100644 --- a/src/meta/src/hummock/manager/compaction_group_manager.rs +++ b/src/meta/src/hummock/manager/compaction/compaction_group_manager.rs @@ -54,7 +54,7 @@ use crate::model::{ type CompactionGroupTransaction<'a> = BTreeMapTransaction<'a, CompactionGroupId, CompactionGroup>; impl CompactionGroupManager { - pub(super) async fn new(env: &MetaSrvEnv) -> Result { + pub(crate) async fn new(env: &MetaSrvEnv) -> Result { let default_config = match env.opts.compaction_config.as_ref() { None => CompactionConfigBuilder::new().build(), Some(opt) => CompactionConfigBuilder::with_opt(opt).build(), @@ -62,7 +62,7 @@ impl CompactionGroupManager { Self::new_with_config(env, default_config).await } - pub(super) async fn new_with_config( + pub(crate) async fn new_with_config( env: &MetaSrvEnv, default_config: CompactionConfig, ) -> Result { @@ -231,12 +231,9 @@ impl HummockManager { let mut is_group_init = false; group_id = *new_compaction_group_id .get_or_try_init(|| async { - next_compaction_group_id(&self.env) - .await - .map(|new_group_id| { - is_group_init = true; - new_group_id - }) + next_compaction_group_id(&self.env).await.inspect(|_| { + is_group_init = true; + }) }) .await?; if is_group_init { @@ -428,24 +425,6 @@ impl HummockManager { results } - /// Splits a compaction group into two. The new one will contain `table_ids`. - /// Returns the newly created compaction group id. - pub async fn split_compaction_group( - &self, - parent_group_id: CompactionGroupId, - table_ids: &[StateTableId], - ) -> Result { - let result = self - .move_state_table_to_compaction_group( - parent_group_id, - table_ids, - self.env.opts.partition_vnode_count, - ) - .await?; - - Ok(result) - } - /// move some table to another compaction-group. Create a new compaction group if it does not /// exist. pub async fn move_state_table_to_compaction_group( @@ -651,7 +630,7 @@ impl HummockManager { infos } - pub(super) async fn initial_compaction_group_config_after_load( + pub(crate) async fn initial_compaction_group_config_after_load( &self, versioning_guard: &Versioning, compaction_group_manager: &mut CompactionGroupManager, @@ -675,7 +654,7 @@ impl HummockManager { /// 1. initialize default static compaction group. /// 2. register new table to new compaction group. /// 3. move existent table to new compaction group. -pub(super) struct CompactionGroupManager { +pub(crate) struct CompactionGroupManager { compaction_groups: BTreeMap, default_config: Arc, /// Tables that write limit is trigger for. @@ -709,7 +688,7 @@ impl CompactionGroupManager { } /// Tries to get compaction group config for `compaction_group_id`. - pub(super) fn try_get_compaction_group_config( + pub(crate) fn try_get_compaction_group_config( &self, compaction_group_id: CompactionGroupId, ) -> Option { @@ -717,7 +696,7 @@ impl CompactionGroupManager { } /// Tries to get compaction group config for `compaction_group_id`. - pub(super) fn default_compaction_config(&self) -> Arc { + pub(crate) fn default_compaction_config(&self) -> Arc { self.default_config.clone() } } @@ -814,7 +793,7 @@ impl<'a> CompactionGroupTransaction<'a> { } /// Tries to get compaction group config for `compaction_group_id`. - pub(super) fn try_get_compaction_group_config( + pub(crate) fn try_get_compaction_group_config( &self, compaction_group_id: CompactionGroupId, ) -> Option<&CompactionGroup> { @@ -822,7 +801,7 @@ impl<'a> CompactionGroupTransaction<'a> { } /// Removes stale group configs. - fn purge(&mut self, existing_groups: HashSet) { + pub fn purge(&mut self, existing_groups: HashSet) { let stale_group = self .tree_ref() .keys() @@ -837,7 +816,7 @@ impl<'a> CompactionGroupTransaction<'a> { } } - pub(super) fn update_compaction_config( + pub(crate) fn update_compaction_config( &mut self, compaction_group_ids: &[CompactionGroupId], config_to_update: &[MutableConfig], diff --git a/src/meta/src/hummock/manager/compaction/compaction_group_schedule.rs b/src/meta/src/hummock/manager/compaction/compaction_group_schedule.rs new file mode 100644 index 0000000000000..93103ca87abf5 --- /dev/null +++ b/src/meta/src/hummock/manager/compaction/compaction_group_schedule.rs @@ -0,0 +1,359 @@ +// Copyright 2024 RisingWave Labs +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use std::collections::{HashMap, HashSet, VecDeque}; +use std::ops::DerefMut; + +use itertools::Itertools; +use risingwave_common::catalog::TableId; +use risingwave_hummock_sdk::compact_task::ReportTask; +use risingwave_hummock_sdk::compaction_group::hummock_version_ext::TableGroupInfo; +use risingwave_hummock_sdk::compaction_group::{StateTableId, StaticCompactionGroupId}; +use risingwave_hummock_sdk::version::{GroupDelta, GroupDeltas}; +use risingwave_hummock_sdk::{can_concat, CompactionGroupId}; +use risingwave_pb::hummock::compact_task::TaskStatus; +use risingwave_pb::hummock::{PbGroupMerge, PbStateTableInfoDelta}; +use thiserror_ext::AsReport; + +use crate::hummock::error::{Error, Result}; +use crate::hummock::manager::transaction::HummockVersionTransaction; +use crate::hummock::manager::{commit_multi_var, HummockManager}; +use crate::hummock::metrics_utils::remove_compaction_group_in_sst_stat; + +impl HummockManager { + /// Splits a compaction group into two. The new one will contain `table_ids`. + /// Returns the newly created compaction group id. + pub async fn split_compaction_group( + &self, + parent_group_id: CompactionGroupId, + table_ids: &[StateTableId], + partition_vnode_count: u32, + ) -> Result { + let result = self + .move_state_table_to_compaction_group(parent_group_id, table_ids, partition_vnode_count) + .await?; + + Ok(result) + } + + pub async fn merge_compaction_group( + &self, + group_1: CompactionGroupId, + group_2: CompactionGroupId, + ) -> Result<()> { + let compaction_guard = self.compaction.write().await; + let mut versioning_guard = self.versioning.write().await; + let versioning = versioning_guard.deref_mut(); + // Validate parameters. + if !versioning.current_version.levels.contains_key(&group_1) { + return Err(Error::CompactionGroup(format!("invalid group {}", group_1))); + } + + if !versioning.current_version.levels.contains_key(&group_2) { + return Err(Error::CompactionGroup(format!("invalid group {}", group_2))); + } + + let state_table_info = versioning.current_version.state_table_info.clone(); + let mut member_table_ids_1 = state_table_info + .compaction_group_member_table_ids(group_1) + .iter() + .cloned() + .collect_vec(); + + let mut member_table_ids_2 = state_table_info + .compaction_group_member_table_ids(group_2) + .iter() + .cloned() + .collect_vec(); + + debug_assert!(!member_table_ids_1.is_empty()); + debug_assert!(!member_table_ids_2.is_empty()); + assert!(member_table_ids_1.is_sorted()); + assert!(member_table_ids_2.is_sorted()); + + // Make sure `member_table_ids_1` is smaller than `member_table_ids_2` + let (left_group_id, right_group_id) = + if member_table_ids_1.first().unwrap() < member_table_ids_2.first().unwrap() { + (group_1, group_2) + } else { + std::mem::swap(&mut member_table_ids_1, &mut member_table_ids_2); + (group_2, group_1) + }; + + // We can only merge two groups with non-overlapping member table ids + if member_table_ids_1.last().unwrap() >= member_table_ids_2.first().unwrap() { + return Err(Error::CompactionGroup(format!( + "invalid merge group_1 {} group_2 {}", + left_group_id, right_group_id + ))); + } + + let combined_member_table_ids = member_table_ids_1 + .iter() + .chain(member_table_ids_2.iter()) + .collect_vec(); + assert!(combined_member_table_ids.is_sorted()); + + // check duplicated sst_id + let mut sst_id_set = HashSet::new(); + for sst_id in versioning + .current_version + .get_sst_ids_by_group_id(left_group_id) + .chain( + versioning + .current_version + .get_sst_ids_by_group_id(right_group_id), + ) + { + if !sst_id_set.insert(sst_id) { + return Err(Error::CompactionGroup(format!( + "invalid merge group_1 {} group_2 {} duplicated sst_id {}", + left_group_id, right_group_id, sst_id + ))); + } + } + + // check branched sst on non-overlap level + { + let left_levels = versioning + .current_version + .get_compaction_group_levels(group_1); + + let right_levels = versioning + .current_version + .get_compaction_group_levels(group_2); + + // we can not check the l0 sub level, because the sub level id will be rewritten when merge + // This check will ensure that other non-overlapping level ssts can be concat and that the key_range is correct. + let max_level = std::cmp::max(left_levels.levels.len(), right_levels.levels.len()); + for level_idx in 1..=max_level { + let left_level = left_levels.get_level(level_idx); + let right_level = right_levels.get_level(level_idx); + if left_level.table_infos.is_empty() || right_level.table_infos.is_empty() { + continue; + } + + let left_last_sst = left_level.table_infos.last().unwrap().clone(); + let right_first_sst = right_level.table_infos.first().unwrap().clone(); + let left_sst_id = left_last_sst.sst_id; + let right_sst_id = right_first_sst.sst_id; + let left_obj_id = left_last_sst.object_id; + let right_obj_id = right_first_sst.object_id; + + // Since the sst key_range within a group is legal, we only need to check the ssts adjacent to the two groups. + if !can_concat(&[left_last_sst, right_first_sst]) { + return Err(Error::CompactionGroup(format!( + "invalid merge group_1 {} group_2 {} level_idx {} left_last_sst_id {} right_first_sst_id {} left_obj_id {} right_obj_id {}", + left_group_id, right_group_id, level_idx, left_sst_id, right_sst_id, left_obj_id, right_obj_id + ))); + } + } + } + + let mut version = HummockVersionTransaction::new( + &mut versioning.current_version, + &mut versioning.hummock_version_deltas, + self.env.notification_manager(), + &self.metrics, + ); + let mut new_version_delta = version.new_delta(); + + let target_compaction_group_id = { + // merge right_group_id to left_group_id and remove right_group_id + new_version_delta.group_deltas.insert( + left_group_id, + GroupDeltas { + group_deltas: vec![GroupDelta::GroupMerge(PbGroupMerge { + left_group_id, + right_group_id, + })], + }, + ); + left_group_id + }; + + // TODO: remove compaciton group_id from state_table_info + // rewrite compaction_group_id for all tables + new_version_delta.with_latest_version(|version, new_version_delta| { + for table_id in combined_member_table_ids { + let table_id = TableId::new(table_id.table_id()); + let info = version + .state_table_info + .info() + .get(&table_id) + .expect("have check exist previously"); + assert!(new_version_delta + .state_table_info_delta + .insert( + table_id, + PbStateTableInfoDelta { + committed_epoch: info.committed_epoch, + safe_epoch: info.safe_epoch, + compaction_group_id: target_compaction_group_id, + } + ) + .is_none()); + } + }); + + { + let mut compaction_group_manager = self.compaction_group_manager.write().await; + let mut compaction_groups_txn = compaction_group_manager.start_compaction_groups_txn(); + + // for metrics reclaim + { + let right_group_max_level = new_version_delta + .latest_version() + .get_compaction_group_levels(right_group_id) + .levels + .len(); + + remove_compaction_group_in_sst_stat( + &self.metrics, + right_group_id, + right_group_max_level, + ); + } + + new_version_delta.pre_apply(); + + // remove right_group_id + compaction_groups_txn.remove(right_group_id); + commit_multi_var!(self.meta_store_ref(), version, compaction_groups_txn)?; + } + + // Instead of handling DeltaType::GroupConstruct for time travel, simply enforce a version snapshot. + versioning.mark_next_time_travel_version_snapshot(); + + // cancel tasks + let mut canceled_tasks = vec![]; + // after merge, all tasks in right_group_id should be canceled + // otherwise, pending size calculation by level handler will make some mistake + for task_assignment in compaction_guard.compact_task_assignment.values() { + if let Some(task) = task_assignment.compact_task.as_ref() { + let need_cancel = task.compaction_group_id == right_group_id; + if need_cancel { + canceled_tasks.push(ReportTask { + task_id: task.task_id, + task_status: TaskStatus::ManualCanceled, + table_stats_change: HashMap::default(), + sorted_output_ssts: vec![], + }); + } + } + } + + drop(versioning_guard); + drop(compaction_guard); + self.report_compact_tasks(canceled_tasks).await?; + + Ok(()) + } + + pub async fn try_split_compaction_group( + &self, + table_write_throughput: &HashMap>, + checkpoint_secs: u64, + group: &TableGroupInfo, + created_tables: &HashSet, + ) { + // split high throughput table to dedicated compaction group + for (table_id, table_size) in &group.table_statistic { + self.try_move_table_to_dedicated_cg( + table_write_throughput, + table_id, + table_size, + !created_tables.contains(table_id), + checkpoint_secs, + group.group_id, + group.group_size, + ) + .await; + } + } + + pub async fn try_move_table_to_dedicated_cg( + &self, + table_write_throughput: &HashMap>, + table_id: &u32, + table_size: &u64, + is_creating_table: bool, + checkpoint_secs: u64, + parent_group_id: u64, + group_size: u64, + ) { + let default_group_id: CompactionGroupId = StaticCompactionGroupId::StateDefault.into(); + let mv_group_id: CompactionGroupId = StaticCompactionGroupId::MaterializedView.into(); + let partition_vnode_count = self.env.opts.partition_vnode_count; + let window_size = + self.env.opts.table_info_statistic_history_times / (checkpoint_secs as usize); + + let mut is_high_write_throughput = false; + let mut is_low_write_throughput = true; + if let Some(history) = table_write_throughput.get(table_id) { + if history.len() >= window_size { + is_high_write_throughput = history.iter().all(|throughput| { + *throughput / checkpoint_secs > self.env.opts.table_write_throughput_threshold + }); + is_low_write_throughput = history.iter().any(|throughput| { + *throughput / checkpoint_secs < self.env.opts.min_table_split_write_throughput + }); + } + } + + let state_table_size = *table_size; + + // 1. Avoid splitting a creating table + // 2. Avoid splitting a is_low_write_throughput creating table + // 3. Avoid splitting a non-high throughput medium-sized table + if is_creating_table + || (is_low_write_throughput) + || (state_table_size < self.env.opts.min_table_split_size && !is_high_write_throughput) + { + return; + } + + // do not split a large table and a small table because it would increase IOPS + // of small table. + if parent_group_id != default_group_id && parent_group_id != mv_group_id { + let rest_group_size = group_size - state_table_size; + if rest_group_size < state_table_size + && rest_group_size < self.env.opts.min_table_split_size + { + return; + } + } + + let ret = self + .move_state_table_to_compaction_group( + parent_group_id, + &[*table_id], + partition_vnode_count, + ) + .await; + match ret { + Ok(new_group_id) => { + tracing::info!("move state table [{}] from group-{} to group-{} success table_vnode_partition_count {:?}", table_id, parent_group_id, new_group_id, partition_vnode_count); + } + Err(e) => { + tracing::info!( + error = %e.as_report(), + "failed to move state table [{}] from group-{}", + table_id, + parent_group_id, + ) + } + } + } +} diff --git a/src/meta/src/hummock/manager/compaction.rs b/src/meta/src/hummock/manager/compaction/mod.rs similarity index 95% rename from src/meta/src/hummock/manager/compaction.rs rename to src/meta/src/hummock/manager/compaction/mod.rs index 4696c07452018..8f2ecc33c60b0 100644 --- a/src/meta/src/hummock/manager/compaction.rs +++ b/src/meta/src/hummock/manager/compaction/mod.rs @@ -27,7 +27,7 @@ // limitations under the License. use std::cmp::min; -use std::collections::{BTreeMap, HashMap, HashSet, VecDeque}; +use std::collections::{BTreeMap, HashMap, HashSet}; use std::sync::{Arc, LazyLock}; use std::time::{Instant, SystemTime}; @@ -43,7 +43,6 @@ use rand::thread_rng; use risingwave_common::util::epoch::Epoch; use risingwave_hummock_sdk::compact_task::{CompactTask, ReportTask}; use risingwave_hummock_sdk::compaction_group::hummock_version_ext::HummockLevelsExt; -use risingwave_hummock_sdk::compaction_group::StaticCompactionGroupId; use risingwave_hummock_sdk::key_range::KeyRange; use risingwave_hummock_sdk::level::{InputLevel, Level, Levels}; use risingwave_hummock_sdk::sstable_info::SstableInfo; @@ -96,6 +95,9 @@ use crate::hummock::{commit_multi_var, start_measure_real_process_timer, Hummock use crate::manager::{MetadataManager, META_NODE_ID}; use crate::model::BTreeMapTransaction; +pub mod compaction_group_manager; +pub mod compaction_group_schedule; + const MAX_SKIP_TIMES: usize = 8; const MAX_REPORT_COUNT: usize = 16; @@ -1567,80 +1569,6 @@ impl HummockManager { .retain(|table_id, _| compact_task.existing_table_ids.contains(table_id)); } } - - pub async fn try_move_table_to_dedicated_cg( - &self, - table_write_throughput: &HashMap>, - table_id: &u32, - table_size: &u64, - is_creating_table: bool, - checkpoint_secs: u64, - parent_group_id: u64, - group_size: u64, - ) { - let default_group_id: CompactionGroupId = StaticCompactionGroupId::StateDefault.into(); - let mv_group_id: CompactionGroupId = StaticCompactionGroupId::MaterializedView.into(); - let partition_vnode_count = self.env.opts.partition_vnode_count; - let window_size = - self.env.opts.table_info_statistic_history_times / (checkpoint_secs as usize); - - let mut is_high_write_throughput = false; - let mut is_low_write_throughput = true; - if let Some(history) = table_write_throughput.get(table_id) { - if history.len() >= window_size { - is_high_write_throughput = history.iter().all(|throughput| { - *throughput / checkpoint_secs > self.env.opts.table_write_throughput_threshold - }); - is_low_write_throughput = history.iter().any(|throughput| { - *throughput / checkpoint_secs < self.env.opts.min_table_split_write_throughput - }); - } - } - - let state_table_size = *table_size; - - // 1. Avoid splitting a creating table - // 2. Avoid splitting a is_low_write_throughput creating table - // 3. Avoid splitting a non-high throughput medium-sized table - if is_creating_table - || (is_low_write_throughput) - || (state_table_size < self.env.opts.min_table_split_size && !is_high_write_throughput) - { - return; - } - - // do not split a large table and a small table because it would increase IOPS - // of small table. - if parent_group_id != default_group_id && parent_group_id != mv_group_id { - let rest_group_size = group_size - state_table_size; - if rest_group_size < state_table_size - && rest_group_size < self.env.opts.min_table_split_size - { - return; - } - } - - let ret = self - .move_state_table_to_compaction_group( - parent_group_id, - &[*table_id], - partition_vnode_count, - ) - .await; - match ret { - Ok(new_group_id) => { - tracing::info!("move state table [{}] from group-{} to group-{} success table_vnode_partition_count {:?}", table_id, parent_group_id, new_group_id, partition_vnode_count); - } - Err(e) => { - tracing::info!( - error = %e.as_report(), - "failed to move state table [{}] from group-{}", - table_id, - parent_group_id, - ) - } - } - } } #[cfg(any(test, feature = "test"))] diff --git a/src/meta/src/hummock/manager/gc.rs b/src/meta/src/hummock/manager/gc.rs index 97a99945bcf41..596c36857907f 100644 --- a/src/meta/src/hummock/manager/gc.rs +++ b/src/meta/src/hummock/manager/gc.rs @@ -331,16 +331,24 @@ mod tests { use std::time::Duration; use itertools::Itertools; + use risingwave_hummock_sdk::compaction_group::StaticCompactionGroupId; use risingwave_hummock_sdk::HummockSstableObjectId; + use risingwave_rpc_client::HummockMetaClient; use super::ResponseEvent; use crate::hummock::test_utils::{add_test_tables, setup_compute_env}; + use crate::hummock::MockHummockMetaClient; use crate::MetaOpts; #[tokio::test] async fn test_full_gc() { let (mut env, hummock_manager, cluster_manager, worker_node) = setup_compute_env(80).await; let context_id = worker_node.id; + let hummock_meta_client: Arc = Arc::new(MockHummockMetaClient::new( + hummock_manager.clone(), + worker_node.id, + )); + let compaction_group_id = StaticCompactionGroupId::StateDefault.into(); let compactor_manager = hummock_manager.compactor_manager_ref_for_test(); // Use smaller spin interval to accelerate test. env.opts = Arc::new(MetaOpts { @@ -426,7 +434,12 @@ mod tests { ); // All committed SST ids should be excluded from GC. - let sst_infos = add_test_tables(hummock_manager.as_ref(), context_id).await; + let sst_infos = add_test_tables( + hummock_manager.as_ref(), + hummock_meta_client.clone(), + compaction_group_id, + ) + .await; let committed_object_ids = sst_infos .into_iter() .flatten() diff --git a/src/meta/src/hummock/manager/mod.rs b/src/meta/src/hummock/manager/mod.rs index ded8d507dbffc..d43b1cc6f5421 100644 --- a/src/meta/src/hummock/manager/mod.rs +++ b/src/meta/src/hummock/manager/mod.rs @@ -50,7 +50,6 @@ use crate::manager::{MetaSrvEnv, MetaStoreImpl, MetadataManager}; use crate::model::{ClusterId, MetadataModel, MetadataModelError}; use crate::rpc::metrics::MetaMetrics; -mod compaction_group_manager; mod context; mod gc; mod tests; diff --git a/src/meta/src/hummock/manager/tests.rs b/src/meta/src/hummock/manager/tests.rs index dca7311f4778f..09d43bf5fc72c 100644 --- a/src/meta/src/hummock/manager/tests.rs +++ b/src/meta/src/hummock/manager/tests.rs @@ -13,8 +13,6 @@ // limitations under the License. #![cfg(test)] - -use std::borrow::Borrow; use std::cmp::Ordering; use std::collections::HashMap; use std::sync::Arc; @@ -22,30 +20,34 @@ use std::sync::Arc; use itertools::Itertools; use prometheus::Registry; use risingwave_common::catalog::TableId; +use risingwave_common::hash::VirtualNode; use risingwave_common::util::epoch::{test_epoch, EpochExt, INVALID_EPOCH}; use risingwave_hummock_sdk::compact::compact_task_to_string; use risingwave_hummock_sdk::compact_task::CompactTask; use risingwave_hummock_sdk::compaction_group::hummock_version_ext::get_compaction_group_ssts; use risingwave_hummock_sdk::compaction_group::StaticCompactionGroupId; +use risingwave_hummock_sdk::key::{gen_key_from_str, FullKey}; use risingwave_hummock_sdk::key_range::KeyRange; use risingwave_hummock_sdk::sstable_info::SstableInfo; use risingwave_hummock_sdk::table_stats::{to_prost_table_stats_map, TableStats, TableStatsMap}; use risingwave_hummock_sdk::version::HummockVersion; use risingwave_hummock_sdk::{ CompactionGroupId, HummockContextId, HummockEpoch, HummockSstableObjectId, HummockVersionId, - LocalSstableInfo, FIRST_VERSION_ID, + LocalSstableInfo, SyncResult, FIRST_VERSION_ID, }; use risingwave_pb::common::{HostAddress, WorkerType}; use risingwave_pb::hummock::compact_task::TaskStatus; use risingwave_pb::hummock::{HummockPinnedSnapshot, HummockPinnedVersion, HummockSnapshot}; use risingwave_pb::meta::add_worker_node_request::Property; +use risingwave_rpc_client::HummockMetaClient; +use thiserror_ext::AsReport; use crate::hummock::compaction::compaction_config::CompactionConfigBuilder; use crate::hummock::compaction::selector::{default_compaction_selector, ManualCompactionOption}; use crate::hummock::error::Error; use crate::hummock::test_utils::*; -use crate::hummock::{HummockManager, HummockManagerRef}; -use crate::manager::{MetaSrvEnv, MetaStoreImpl, WorkerId}; +use crate::hummock::{HummockManagerRef, MockHummockMetaClient}; +use crate::manager::{MetaSrvEnv, MetaStoreImpl}; use crate::model::MetadataModel; use crate::rpc::metrics::MetaMetrics; @@ -60,12 +62,23 @@ fn pin_snapshots_epoch(pin_snapshots: &[HummockPinnedSnapshot]) -> Vec { .collect_vec() } -fn gen_sstable_info(sst_id: u64, idx: usize, table_ids: Vec) -> SstableInfo { +fn gen_sstable_info(sst_id: u64, table_ids: Vec, epoch: u64) -> SstableInfo { + let table_key_l = gen_key_from_str(VirtualNode::ZERO, "1"); + let table_key_r = gen_key_from_str(VirtualNode::MAX_FOR_TEST, "1"); + let full_key_l = FullKey::for_test( + TableId::new(*table_ids.first().unwrap()), + table_key_l, + epoch, + ) + .encode(); + let full_key_r = + FullKey::for_test(TableId::new(*table_ids.last().unwrap()), table_key_r, epoch).encode(); + SstableInfo { sst_id, key_range: KeyRange { - left: iterator_test_key_of_epoch(1, idx, 1).into(), - right: iterator_test_key_of_epoch(1, idx, 1).into(), + left: full_key_l.into(), + right: full_key_r.into(), right_exclusive: false, }, table_ids, @@ -78,9 +91,9 @@ fn gen_sstable_info(sst_id: u64, idx: usize, table_ids: Vec) -> SstableInfo } } -fn gen_local_sstable_info(sst_id: u64, idx: usize, table_ids: Vec) -> LocalSstableInfo { +fn gen_local_sstable_info(sst_id: u64, table_ids: Vec, epoch: u64) -> LocalSstableInfo { LocalSstableInfo { - sst_info: gen_sstable_info(sst_id, idx, table_ids), + sst_info: gen_sstable_info(sst_id, table_ids, epoch), table_stats: Default::default(), } } @@ -182,8 +195,12 @@ async fn test_unpin_snapshot_before() { #[tokio::test] async fn test_hummock_compaction_task() { - let (_, hummock_manager, _, _worker_node) = setup_compute_env(80).await; + let (_, hummock_manager, _, worker_node) = setup_compute_env(80).await; let sst_num = 2; + let hummock_meta_client: Arc = Arc::new(MockHummockMetaClient::new( + hummock_manager.clone(), + worker_node.id, + )); // No compaction task available. assert!(hummock_manager @@ -197,9 +214,10 @@ async fn test_hummock_compaction_task() { // Add some sstables and commit. let epoch = test_epoch(1); + let table_id = 1; let original_tables = generate_test_sstables_with_table_id( epoch, - 1, + table_id, get_sst_ids(&hummock_manager, sst_num).await, ); register_sstable_infos_to_compaction_group( @@ -208,20 +226,23 @@ async fn test_hummock_compaction_task() { StaticCompactionGroupId::StateDefault.into(), ) .await; - commit_from_meta_node( - hummock_manager.borrow(), - epoch, - to_local_sstable_info(&original_tables), - ) - .await - .unwrap(); + hummock_meta_client + .commit_epoch( + epoch, + SyncResult { + uncommitted_ssts: to_local_sstable_info(&original_tables), + ..Default::default() + }, + false, + ) + .await + .unwrap(); // Get a compaction task. + let compaction_group_id = + get_compaction_group_id_by_table_id(hummock_manager.clone(), table_id).await; let compact_task = hummock_manager - .get_compact_task( - StaticCompactionGroupId::StateDefault.into(), - &mut default_compaction_selector(), - ) + .get_compact_task(compaction_group_id, &mut default_compaction_selector()) .await .unwrap() .unwrap(); @@ -236,10 +257,7 @@ async fn test_hummock_compaction_task() { // Get a compaction task. let compact_task = hummock_manager - .get_compact_task( - StaticCompactionGroupId::StateDefault.into(), - &mut default_compaction_selector(), - ) + .get_compact_task(compaction_group_id, &mut default_compaction_selector()) .await .unwrap() .unwrap(); @@ -254,7 +272,11 @@ async fn test_hummock_compaction_task() { #[tokio::test] async fn test_hummock_table() { - let (_env, hummock_manager, _cluster_manager, _worker_node) = setup_compute_env(80).await; + let (_env, hummock_manager, _cluster_manager, worker_node) = setup_compute_env(80).await; + let hummock_meta_client: Arc = Arc::new(MockHummockMetaClient::new( + hummock_manager.clone(), + worker_node.id, + )); let epoch = test_epoch(1); let original_tables = generate_test_tables(epoch, get_sst_ids(&hummock_manager, 2).await); @@ -264,17 +286,21 @@ async fn test_hummock_table() { StaticCompactionGroupId::StateDefault.into(), ) .await; - commit_from_meta_node( - hummock_manager.borrow(), - epoch, - to_local_sstable_info(&original_tables), - ) - .await - .unwrap(); + hummock_meta_client + .commit_epoch( + epoch, + SyncResult { + uncommitted_ssts: to_local_sstable_info(&original_tables), + ..Default::default() + }, + false, + ) + .await + .unwrap(); + let compaction_group_id = StaticCompactionGroupId::StateDefault.into(); let pinned_version = hummock_manager.get_current_version().await; - let levels = - pinned_version.get_compaction_group_levels(StaticCompactionGroupId::StateDefault.into()); + let levels = pinned_version.get_compaction_group_levels(compaction_group_id); assert_eq!( Ordering::Equal, levels @@ -291,14 +317,18 @@ async fn test_hummock_table() { // Confirm tables got are equal to original tables assert_eq!( get_sorted_object_ids(&original_tables), - get_sorted_committed_object_ids(&pinned_version) + get_sorted_committed_object_ids(&pinned_version, compaction_group_id) ); } #[tokio::test] async fn test_hummock_transaction() { - let (_env, hummock_manager, _cluster_manager, _worker_node) = setup_compute_env(80).await; + let (_env, hummock_manager, _cluster_manager, worker_node) = setup_compute_env(80).await; let mut committed_tables = vec![]; + let hummock_meta_client: Arc = Arc::new(MockHummockMetaClient::new( + hummock_manager.clone(), + worker_node.id, + )); // Add and commit tables in epoch1. // BEFORE: committed_epochs = [] @@ -319,24 +349,30 @@ async fn test_hummock_transaction() { current_version.visible_table_committed_epoch(), INVALID_EPOCH ); - assert!(get_sorted_committed_object_ids(¤t_version).is_empty()); + let compaction_group_id = StaticCompactionGroupId::StateDefault.into(); + assert!(get_sorted_committed_object_ids(¤t_version, compaction_group_id).is_empty()); // Commit epoch1 - commit_from_meta_node( - hummock_manager.borrow(), - epoch1, - to_local_sstable_info(&tables_in_epoch1), - ) - .await - .unwrap(); + hummock_meta_client + .commit_epoch( + epoch1, + SyncResult { + uncommitted_ssts: to_local_sstable_info(&tables_in_epoch1), + ..Default::default() + }, + false, + ) + .await + .unwrap(); committed_tables.extend(tables_in_epoch1.clone()); // Get tables after committing epoch1. All tables committed in epoch1 should be returned let current_version = hummock_manager.get_current_version().await; + let compaction_group_id = StaticCompactionGroupId::StateDefault.into(); assert_eq!(current_version.visible_table_committed_epoch(), epoch1); assert_eq!( get_sorted_object_ids(&committed_tables), - get_sorted_committed_object_ids(¤t_version) + get_sorted_committed_object_ids(¤t_version, compaction_group_id) ); } @@ -356,29 +392,35 @@ async fn test_hummock_transaction() { // Get tables before committing epoch2. tables_in_epoch1 should be returned and // tables_in_epoch2 should be invisible. let current_version = hummock_manager.get_current_version().await; + let compaction_group_id = StaticCompactionGroupId::StateDefault.into(); assert_eq!(current_version.visible_table_committed_epoch(), epoch1); assert_eq!( get_sorted_object_ids(&committed_tables), - get_sorted_committed_object_ids(¤t_version) + get_sorted_committed_object_ids(¤t_version, compaction_group_id) ); // Commit epoch2 - commit_from_meta_node( - hummock_manager.borrow(), - epoch2, - to_local_sstable_info(&tables_in_epoch2), - ) - .await - .unwrap(); + hummock_meta_client + .commit_epoch( + epoch2, + SyncResult { + uncommitted_ssts: to_local_sstable_info(&tables_in_epoch2), + ..Default::default() + }, + false, + ) + .await + .unwrap(); committed_tables.extend(tables_in_epoch2); // Get tables after committing epoch2. tables_in_epoch1 and tables_in_epoch2 should be // returned let current_version = hummock_manager.get_current_version().await; + let compaction_group_id = StaticCompactionGroupId::StateDefault.into(); assert_eq!(current_version.visible_table_committed_epoch(), epoch2); assert_eq!( get_sorted_object_ids(&committed_tables), - get_sorted_committed_object_ids(¤t_version) + get_sorted_committed_object_ids(¤t_version, compaction_group_id) ); } } @@ -470,6 +512,10 @@ async fn test_context_id_validation() { async fn test_hummock_manager_basic() { let (_env, hummock_manager, cluster_manager, worker_node) = setup_compute_env(1).await; let context_id_1 = worker_node.id; + let hummock_meta_client: Arc = Arc::new(MockHummockMetaClient::new( + hummock_manager.clone(), + worker_node.id, + )); let fake_host_address_2 = HostAddress { host: "127.0.0.1".to_string(), @@ -502,7 +548,9 @@ async fn test_hummock_manager_basic() { let mut epoch = test_epoch(1); let mut register_log_count = 0; let mut commit_log_count = 0; - let commit_one = |epoch: HummockEpoch, hummock_manager: HummockManagerRef| async move { + let commit_one = |epoch: HummockEpoch, + hummock_manager: HummockManagerRef, + hummock_meta_client: Arc| async move { let original_tables = generate_test_tables(test_epoch(epoch), get_sst_ids(&hummock_manager, 2).await); register_sstable_infos_to_compaction_group( @@ -511,16 +559,21 @@ async fn test_hummock_manager_basic() { StaticCompactionGroupId::StateDefault.into(), ) .await; - commit_from_meta_node( - hummock_manager.borrow(), - epoch, - to_local_sstable_info(&original_tables), - ) - .await - .unwrap(); + + hummock_meta_client + .commit_epoch( + epoch, + SyncResult { + uncommitted_ssts: to_local_sstable_info(&original_tables), + ..Default::default() + }, + false, + ) + .await + .unwrap(); }; - commit_one(epoch, hummock_manager.clone()).await; + commit_one(epoch, hummock_manager.clone(), hummock_meta_client.clone()).await; register_log_count += 1; commit_log_count += 1; epoch.inc_epoch(); @@ -560,7 +613,7 @@ async fn test_hummock_manager_basic() { ); } - commit_one(epoch, hummock_manager.clone()).await; + commit_one(epoch, hummock_manager.clone(), hummock_meta_client.clone()).await; commit_log_count += 1; register_log_count += 1; @@ -619,6 +672,10 @@ async fn test_hummock_manager_basic() { #[tokio::test] async fn test_pin_snapshot_response_lost() { let (_env, hummock_manager, _cluster_manager, worker_node) = setup_compute_env(80).await; + let hummock_meta_client: Arc = Arc::new(MockHummockMetaClient::new( + hummock_manager.clone(), + worker_node.id, + )); let context_id = worker_node.id; let mut epoch = test_epoch(1); @@ -630,13 +687,17 @@ async fn test_pin_snapshot_response_lost() { ) .await; // [ ] -> [ e0 ] - commit_from_meta_node( - hummock_manager.borrow(), - epoch, - to_local_sstable_info(&test_tables), - ) - .await - .unwrap(); + hummock_meta_client + .commit_epoch( + epoch, + SyncResult { + uncommitted_ssts: to_local_sstable_info(&test_tables), + ..Default::default() + }, + false, + ) + .await + .unwrap(); epoch.inc_epoch(); // Pin a snapshot with smallest last_pin @@ -653,13 +714,17 @@ async fn test_pin_snapshot_response_lost() { ) .await; // [ e0:pinned ] -> [ e0:pinned, e1 ] - commit_from_meta_node( - hummock_manager.borrow(), - epoch, - to_local_sstable_info(&test_tables), - ) - .await - .unwrap(); + hummock_meta_client + .commit_epoch( + epoch, + SyncResult { + uncommitted_ssts: to_local_sstable_info(&test_tables), + ..Default::default() + }, + false, + ) + .await + .unwrap(); epoch.inc_epoch(); // Assume the response of the previous rpc is lost. @@ -684,13 +749,17 @@ async fn test_pin_snapshot_response_lost() { ) .await; // [ e0, e1:pinned ] -> [ e0, e1:pinned, e2 ] - commit_from_meta_node( - hummock_manager.borrow(), - epoch, - to_local_sstable_info(&test_tables), - ) - .await - .unwrap(); + hummock_meta_client + .commit_epoch( + epoch, + SyncResult { + uncommitted_ssts: to_local_sstable_info(&test_tables), + ..Default::default() + }, + false, + ) + .await + .unwrap(); epoch.inc_epoch(); // Use correct snapshot id. @@ -709,13 +778,17 @@ async fn test_pin_snapshot_response_lost() { ) .await; // [ e0, e1:pinned, e2:pinned ] -> [ e0, e1:pinned, e2:pinned, e3 ] - commit_from_meta_node( - hummock_manager.borrow(), - epoch, - to_local_sstable_info(&test_tables), - ) - .await - .unwrap(); + hummock_meta_client + .commit_epoch( + epoch, + SyncResult { + uncommitted_ssts: to_local_sstable_info(&test_tables), + ..Default::default() + }, + false, + ) + .await + .unwrap(); epoch.inc_epoch(); // Use u64::MAX as epoch to pin greatest snapshot @@ -729,31 +802,37 @@ async fn test_pin_snapshot_response_lost() { #[tokio::test] async fn test_print_compact_task() { - let (_, hummock_manager, _cluster_manager, _) = setup_compute_env(80).await; + let (_, hummock_manager, _cluster_manager, worker_node) = setup_compute_env(80).await; + let hummock_meta_client: Arc = Arc::new(MockHummockMetaClient::new( + hummock_manager.clone(), + worker_node.id, + )); // Add some sstables and commit. let epoch = test_epoch(1); let original_tables = generate_test_sstables_with_table_id(epoch, 1, get_sst_ids(&hummock_manager, 2).await); + let compaction_group_id = StaticCompactionGroupId::StateDefault.into(); register_sstable_infos_to_compaction_group( &hummock_manager, &original_tables, - StaticCompactionGroupId::StateDefault.into(), + compaction_group_id, ) .await; - commit_from_meta_node( - hummock_manager.borrow(), - epoch, - to_local_sstable_info(&original_tables), - ) - .await - .unwrap(); + hummock_meta_client + .commit_epoch( + epoch, + SyncResult { + uncommitted_ssts: to_local_sstable_info(&original_tables), + ..Default::default() + }, + false, + ) + .await + .unwrap(); // Get a compaction task. let compact_task = hummock_manager - .get_compact_task( - StaticCompactionGroupId::StateDefault.into(), - &mut default_compaction_selector(), - ) + .get_compact_task(compaction_group_id, &mut default_compaction_selector()) .await .unwrap() .unwrap(); @@ -766,33 +845,45 @@ async fn test_print_compact_task() { #[tokio::test] async fn test_invalid_sst_id() { let (_, hummock_manager, _cluster_manager, worker_node) = setup_compute_env(80).await; - let context_id = worker_node.id; + let hummock_meta_client: Arc = Arc::new(MockHummockMetaClient::new( + hummock_manager.clone(), + worker_node.id, + )); let epoch = test_epoch(1); let ssts = generate_test_tables(epoch, vec![1]); - register_sstable_infos_to_compaction_group( - &hummock_manager, - &ssts, - StaticCompactionGroupId::StateDefault.into(), - ) - .await; + let compaction_group_id = StaticCompactionGroupId::StateDefault.into(); + register_sstable_infos_to_compaction_group(&hummock_manager, &ssts, compaction_group_id).await; let ssts = to_local_sstable_info(&ssts); // reject due to invalid context id - let sst_to_worker = ssts - .iter() - .map(|LocalSstableInfo { sst_info, .. }| (sst_info.object_id, WorkerId::MAX)) - .collect(); - let error = hummock_manager - .commit_epoch_for_test(epoch, ssts.clone(), sst_to_worker) - .await - .unwrap_err(); - assert!(matches!(error, Error::InvalidSst(1))); + { + let hummock_meta_client: Arc = + Arc::new(MockHummockMetaClient::new(hummock_manager.clone(), 23333)); + let error = hummock_meta_client + .commit_epoch( + epoch, + SyncResult { + uncommitted_ssts: ssts.clone(), + ..Default::default() + }, + false, + ) + .await + .unwrap_err(); + assert_eq!( + error.as_report().to_string(), + "mock error: SST 1 is invalid" + ); + } - let sst_to_worker = ssts - .iter() - .map(|LocalSstableInfo { sst_info, .. }| (sst_info.object_id, context_id)) - .collect(); - hummock_manager - .commit_epoch_for_test(epoch, ssts, sst_to_worker) + hummock_meta_client + .commit_epoch( + epoch, + SyncResult { + uncommitted_ssts: ssts.clone(), + ..Default::default() + }, + false, + ) .await .unwrap(); } @@ -800,6 +891,10 @@ async fn test_invalid_sst_id() { #[tokio::test] async fn test_trigger_manual_compaction() { let (_, hummock_manager, _, worker_node) = setup_compute_env(80).await; + let hummock_meta_client: Arc = Arc::new(MockHummockMetaClient::new( + hummock_manager.clone(), + worker_node.id, + )); let context_id = worker_node.id; { @@ -832,7 +927,13 @@ async fn test_trigger_manual_compaction() { } // Generate data for compaction task - let _ = add_test_tables(&hummock_manager, context_id).await; + let compaction_group_id = StaticCompactionGroupId::StateDefault.into(); + let _ = add_test_tables( + &hummock_manager, + hummock_meta_client.clone(), + compaction_group_id, + ) + .await; { // to check compactor send task fail drop(receiver); @@ -880,6 +981,10 @@ async fn test_hummock_compaction_task_heartbeat() { use crate::hummock::HummockManager; let (_env, hummock_manager, _cluster_manager, worker_node) = setup_compute_env(80).await; + let hummock_meta_client: Arc = Arc::new(MockHummockMetaClient::new( + hummock_manager.clone(), + worker_node.id, + )); let context_id = worker_node.id; let sst_num = 2; @@ -911,13 +1016,17 @@ async fn test_hummock_compaction_task_heartbeat() { StaticCompactionGroupId::StateDefault.into(), ) .await; - commit_from_meta_node( - hummock_manager.borrow(), - epoch, - to_local_sstable_info(&original_tables), - ) - .await - .unwrap(); + hummock_meta_client + .commit_epoch( + epoch, + SyncResult { + uncommitted_ssts: to_local_sstable_info(&original_tables), + ..Default::default() + }, + false, + ) + .await + .unwrap(); // Get a compaction task. let compact_task = hummock_manager @@ -993,6 +1102,10 @@ async fn test_hummock_compaction_task_heartbeat_removal_on_node_removal() { use crate::hummock::HummockManager; let (_env, hummock_manager, cluster_manager, worker_node) = setup_compute_env(80).await; + let hummock_meta_client: Arc = Arc::new(MockHummockMetaClient::new( + hummock_manager.clone(), + worker_node.id, + )); let context_id = worker_node.id; let sst_num = 2; @@ -1024,13 +1137,17 @@ async fn test_hummock_compaction_task_heartbeat_removal_on_node_removal() { StaticCompactionGroupId::StateDefault.into(), ) .await; - commit_from_meta_node( - hummock_manager.borrow(), - epoch, - to_local_sstable_info(&original_tables), - ) - .await - .unwrap(); + hummock_meta_client + .commit_epoch( + epoch, + SyncResult { + uncommitted_ssts: to_local_sstable_info(&original_tables), + ..Default::default() + }, + false, + ) + .await + .unwrap(); // Get a compaction task. let compact_task = hummock_manager @@ -1072,8 +1189,18 @@ async fn test_hummock_compaction_task_heartbeat_removal_on_node_removal() { async fn test_extend_objects_to_delete() { let (_env, hummock_manager, _cluster_manager, worker_node) = setup_compute_env(80).await; let context_id = worker_node.id; + let hummock_meta_client: Arc = Arc::new(MockHummockMetaClient::new( + hummock_manager.clone(), + worker_node.id, + )); let _pinned_version1 = hummock_manager.pin_version(context_id).await.unwrap(); - let sst_infos = add_test_tables(hummock_manager.as_ref(), context_id).await; + let compaction_group_id = StaticCompactionGroupId::StateDefault.into(); + let sst_infos = add_test_tables( + hummock_manager.as_ref(), + hummock_meta_client.clone(), + compaction_group_id, + ) + .await; let max_committed_object_id = sst_infos .iter() .map(|ssts| { @@ -1151,11 +1278,14 @@ async fn test_extend_objects_to_delete() { let objects_to_delete = hummock_manager.get_objects_to_delete(); assert_eq!(objects_to_delete.len(), orphan_sst_num as usize); let new_epoch = pinned_version2.visible_table_committed_epoch().next_epoch(); - hummock_manager - .commit_epoch_for_test( + hummock_meta_client + .commit_epoch( new_epoch, - Vec::::new(), - Default::default(), + SyncResult { + uncommitted_ssts: vec![], + ..Default::default() + }, + false, ) .await .unwrap(); @@ -1180,6 +1310,11 @@ async fn test_extend_objects_to_delete() { #[tokio::test] async fn test_version_stats() { let (_env, hummock_manager, _cluster_manager, worker_node) = setup_compute_env(80).await; + let hummock_meta_client: Arc = Arc::new(MockHummockMetaClient::new( + hummock_manager.clone(), + worker_node.id, + )); + let init_stats = hummock_manager.get_version_stats().await; assert!(init_stats.table_stats.is_empty()); @@ -1223,12 +1358,15 @@ async fn test_version_stats() { .collect(), }) .collect_vec(); - let sst_to_worker = ssts - .iter() - .map(|LocalSstableInfo { sst_info, .. }| (sst_info.object_id, worker_node.id)) - .collect(); - hummock_manager - .commit_epoch_for_test(epoch, ssts, sst_to_worker) + hummock_meta_client + .commit_epoch( + epoch, + SyncResult { + uncommitted_ssts: ssts, + ..Default::default() + }, + false, + ) .await .unwrap(); @@ -1247,11 +1385,6 @@ async fn test_version_stats() { assert_eq!(table3_stats.total_value_size, 100); assert_eq!(table3_stats.total_key_size, 1000); - // Report compaction - hummock_manager - .compactor_manager_ref_for_test() - .add_compactor(worker_node.id); - let compact_task = hummock_manager .get_compact_task( StaticCompactionGroupId::StateDefault.into(), @@ -1306,13 +1439,12 @@ async fn test_version_stats() { #[tokio::test] async fn test_split_compaction_group_on_commit() { let (_env, hummock_manager, _, worker_node) = setup_compute_env(80).await; - let context_id = worker_node.id; - hummock_manager - .register_table_ids_for_test(&[(100, 2)]) - .await - .unwrap(); + let hummock_meta_client: Arc = Arc::new(MockHummockMetaClient::new( + hummock_manager.clone(), + worker_node.id, + )); hummock_manager - .register_table_ids_for_test(&[(101, 3)]) + .register_table_ids_for_test(&[(100, 2), (101, 3)]) .await .unwrap(); let sst_1 = LocalSstableInfo { @@ -1344,8 +1476,15 @@ async fn test_split_compaction_group_on_commit() { ), ]), }; - hummock_manager - .commit_epoch_for_test(30, vec![sst_1], HashMap::from([(10, context_id)])) + hummock_meta_client + .commit_epoch( + test_epoch(30), + SyncResult { + uncommitted_ssts: vec![sst_1], + ..Default::default() + }, + false, + ) .await .unwrap(); let current_version = hummock_manager.get_current_version().await; @@ -1381,7 +1520,10 @@ async fn test_split_compaction_group_on_commit() { #[tokio::test] async fn test_split_compaction_group_on_demand_basic() { let (_env, hummock_manager, _, worker_node) = setup_compute_env(80).await; - let context_id = worker_node.id; + let hummock_meta_client: Arc = Arc::new(MockHummockMetaClient::new( + hummock_manager.clone(), + worker_node.id, + )); let original_groups = hummock_manager .get_current_version() .await @@ -1393,13 +1535,13 @@ async fn test_split_compaction_group_on_demand_basic() { assert_eq!(original_groups, vec![2, 3]); let err = hummock_manager - .split_compaction_group(100, &[0]) + .split_compaction_group(100, &[0], 0) .await .unwrap_err(); assert_eq!("compaction group error: invalid group 100", err.to_string()); let err = hummock_manager - .split_compaction_group(2, &[100]) + .split_compaction_group(2, &[100], 0) .await .unwrap_err(); assert_eq!( @@ -1408,11 +1550,7 @@ async fn test_split_compaction_group_on_demand_basic() { ); hummock_manager - .register_table_ids_for_test(&[(100, 2)]) - .await - .unwrap(); - hummock_manager - .register_table_ids_for_test(&[(101, 2)]) + .register_table_ids_for_test(&[(100, 2), (101, 2)]) .await .unwrap(); let sst_1 = LocalSstableInfo { @@ -1451,17 +1589,21 @@ async fn test_split_compaction_group_on_demand_basic() { }, table_stats: Default::default(), }; - hummock_manager - .commit_epoch_for_test( - 30, - vec![sst_1, sst_2], - HashMap::from([(10, context_id), (11, context_id)]), + hummock_meta_client + .commit_epoch( + test_epoch(30), + SyncResult { + uncommitted_ssts: vec![sst_1, sst_2], + ..Default::default() + }, + false, ) .await .unwrap(); + let compaction_group_id = StaticCompactionGroupId::StateDefault.into(); let err = hummock_manager - .split_compaction_group(2, &[100, 101]) + .split_compaction_group(compaction_group_id, &[100, 101], 0) .await .unwrap_err(); assert_eq!( @@ -1477,25 +1619,29 @@ async fn test_split_compaction_group_on_demand_basic() { .unwrap(); hummock_manager - .split_compaction_group(2, &[100, 101]) + .split_compaction_group(compaction_group_id, &[100, 101], 0) .await .unwrap(); let current_version = hummock_manager.get_current_version().await; assert_eq!(current_version.levels.len(), 3); - let new_group_id = current_version.levels.keys().max().cloned().unwrap(); - assert!(new_group_id > StaticCompactionGroupId::End as u64); + let new_compaction_group_id = + get_compaction_group_id_by_table_id(hummock_manager.clone(), 100).await; + assert!(new_compaction_group_id > StaticCompactionGroupId::End as u64); + + let old_compaction_group_id = + get_compaction_group_id_by_table_id(hummock_manager.clone(), 102).await; assert_eq!( - get_compaction_group_object_ids(¤t_version, 2), + get_compaction_group_object_ids(¤t_version, old_compaction_group_id), Vec::::new() ); assert_eq!( - get_compaction_group_object_ids(¤t_version, new_group_id), + get_compaction_group_object_ids(¤t_version, new_compaction_group_id), vec![10, 11] ); assert_eq!( current_version .state_table_info - .compaction_group_member_table_ids(2) + .compaction_group_member_table_ids(old_compaction_group_id) .iter() .map(|table_id| table_id.table_id) .collect_vec(), @@ -1504,7 +1650,7 @@ async fn test_split_compaction_group_on_demand_basic() { assert_eq!( current_version .state_table_info - .compaction_group_member_table_ids(new_group_id) + .compaction_group_member_table_ids(new_compaction_group_id) .iter() .map(|table_id| table_id.table_id) .sorted() @@ -1516,7 +1662,10 @@ async fn test_split_compaction_group_on_demand_basic() { #[tokio::test] async fn test_split_compaction_group_on_demand_non_trivial() { let (_env, hummock_manager, _, worker_node) = setup_compute_env(80).await; - let context_id = worker_node.id; + let hummock_meta_client: Arc = Arc::new(MockHummockMetaClient::new( + hummock_manager.clone(), + worker_node.id, + )); let sst_1 = LocalSstableInfo { sst_info: SstableInfo { object_id: 10, @@ -1532,39 +1681,46 @@ async fn test_split_compaction_group_on_demand_non_trivial() { table_stats: Default::default(), }; hummock_manager - .register_table_ids_for_test(&[(100, 2)]) + .register_table_ids_for_test(&[(100, 2), (101, 2)]) .await .unwrap(); - hummock_manager - .register_table_ids_for_test(&[(101, 2)]) - .await - .unwrap(); - hummock_manager - .commit_epoch_for_test(30, vec![sst_1], HashMap::from([(10, context_id)])) + hummock_meta_client + .commit_epoch( + 30, + SyncResult { + uncommitted_ssts: vec![sst_1], + ..Default::default() + }, + false, + ) .await .unwrap(); + let compaction_group_id = StaticCompactionGroupId::StateDefault.into(); hummock_manager - .split_compaction_group(2, &[100]) + .split_compaction_group(compaction_group_id, &[100], 0) .await .unwrap(); let current_version = hummock_manager.get_current_version().await; assert_eq!(current_version.levels.len(), 3); - let new_group_id = current_version.levels.keys().max().cloned().unwrap(); - assert!(new_group_id > StaticCompactionGroupId::End as u64); + let new_compaction_group_id = + get_compaction_group_id_by_table_id(hummock_manager.clone(), 100).await; + assert!(new_compaction_group_id > StaticCompactionGroupId::End as u64); + let old_compaction_group_id = + get_compaction_group_id_by_table_id(hummock_manager.clone(), 101).await; assert_eq!( - get_compaction_group_object_ids(¤t_version, 2), + get_compaction_group_object_ids(¤t_version, old_compaction_group_id), vec![10] ); assert_eq!( - get_compaction_group_object_ids(¤t_version, new_group_id), + get_compaction_group_object_ids(¤t_version, new_compaction_group_id), vec![10] ); assert_eq!( current_version .state_table_info - .compaction_group_member_table_ids(2) + .compaction_group_member_table_ids(old_compaction_group_id) .iter() .map(|table_id| table_id.table_id) .collect_vec(), @@ -1573,7 +1729,7 @@ async fn test_split_compaction_group_on_demand_non_trivial() { assert_eq!( current_version .state_table_info - .compaction_group_member_table_ids(new_group_id) + .compaction_group_member_table_ids(new_compaction_group_id) .iter() .map(|table_id| table_id.table_id) .collect_vec(), @@ -1584,7 +1740,10 @@ async fn test_split_compaction_group_on_demand_non_trivial() { #[tokio::test] async fn test_split_compaction_group_trivial_expired() { let (_env, hummock_manager, _, worker_node) = setup_compute_env(80).await; - let context_id = worker_node.id; + let hummock_meta_client: Arc = Arc::new(MockHummockMetaClient::new( + hummock_manager.clone(), + worker_node.id, + )); let original_groups = hummock_manager .get_current_version() .await @@ -1594,14 +1753,9 @@ async fn test_split_compaction_group_trivial_expired() { .sorted() .collect_vec(); assert_eq!(original_groups, vec![2, 3]); - hummock_manager.compactor_manager.add_compactor(context_id); hummock_manager - .register_table_ids_for_test(&[(100, 2)]) - .await - .unwrap(); - hummock_manager - .register_table_ids_for_test(&[(101, 2)]) + .register_table_ids_for_test(&[(100, 2), (101, 2)]) .await .unwrap(); let sst_1 = LocalSstableInfo { @@ -1646,16 +1800,14 @@ async fn test_split_compaction_group_trivial_expired() { sst_3.sst_info.object_id = 8; sst_4.sst_info.sst_id = 9; sst_4.sst_info.object_id = 9; - hummock_manager - .commit_epoch_for_test( + hummock_meta_client + .commit_epoch( 30, - vec![sst_1, sst_2, sst_3, sst_4], - HashMap::from([ - (10, context_id), - (11, context_id), - (9, context_id), - (8, context_id), - ]), + SyncResult { + uncommitted_ssts: vec![sst_1, sst_2, sst_3, sst_4], + ..Default::default() + }, + false, ) .await .unwrap(); @@ -1672,19 +1824,23 @@ async fn test_split_compaction_group_trivial_expired() { .unwrap() .unwrap(); + let compaction_group_id = StaticCompactionGroupId::StateDefault.into(); hummock_manager - .split_compaction_group(2, &[100]) + .split_compaction_group(compaction_group_id, &[100], 0) .await .unwrap(); let current_version = hummock_manager.get_current_version().await; - let new_group_id = current_version.levels.keys().max().cloned().unwrap(); + let new_compaction_group_id = + get_compaction_group_id_by_table_id(hummock_manager.clone(), 100).await; + let old_compaction_group_id = + get_compaction_group_id_by_table_id(hummock_manager.clone(), 101).await; assert_eq!(current_version.levels.len(), 3); - assert!(new_group_id > StaticCompactionGroupId::End as u64); + assert!(new_compaction_group_id > StaticCompactionGroupId::End as u64); assert_eq!( current_version .state_table_info - .compaction_group_member_table_ids(2) + .compaction_group_member_table_ids(old_compaction_group_id) .iter() .map(|table_id| table_id.table_id) .sorted() @@ -1694,7 +1850,7 @@ async fn test_split_compaction_group_trivial_expired() { assert_eq!( current_version .state_table_info - .compaction_group_member_table_ids(new_group_id) + .compaction_group_member_table_ids(new_compaction_group_id) .iter() .map(|table_id| table_id.table_id) .collect_vec(), @@ -1702,7 +1858,7 @@ async fn test_split_compaction_group_trivial_expired() { ); let task2 = hummock_manager - .get_compact_task(new_group_id, &mut default_compaction_selector()) + .get_compact_task(new_compaction_group_id, &mut default_compaction_selector()) .await .unwrap() .unwrap(); @@ -1736,18 +1892,17 @@ async fn test_split_compaction_group_trivial_expired() { } async fn get_manual_compact_task( - hummock_manager: &HummockManager, - context_id: HummockContextId, + hummock_manager_ref: HummockManagerRef, + compaction_group_id: u64, + level: usize, ) -> CompactTask { - hummock_manager.compactor_manager.add_compactor(context_id); - hummock_manager - .manual_get_compact_task( - 2, - ManualCompactionOption { - level: 0, - ..Default::default() - }, - ) + let manual_compcation_option = ManualCompactionOption { + level, + ..Default::default() + }; + + hummock_manager_ref + .manual_get_compact_task(compaction_group_id, manual_compcation_option) .await .unwrap() .unwrap() @@ -1756,14 +1911,13 @@ async fn get_manual_compact_task( #[tokio::test] async fn test_split_compaction_group_on_demand_bottom_levels() { let (_env, hummock_manager, _, worker_node) = setup_compute_env(80).await; - let context_id = worker_node.id; + let hummock_meta_client: Arc = Arc::new(MockHummockMetaClient::new( + hummock_manager.clone(), + worker_node.id, + )); hummock_manager - .register_table_ids_for_test(&[(100, 2)]) - .await - .unwrap(); - hummock_manager - .register_table_ids_for_test(&[(101, 2)]) + .register_table_ids_for_test(&[(100, 2), (101, 2)]) .await .unwrap(); @@ -1785,12 +1939,22 @@ async fn test_split_compaction_group_on_demand_bottom_levels() { }, table_stats: Default::default(), }; - hummock_manager - .commit_epoch_for_test(30, vec![sst_1.clone()], HashMap::from([(10, context_id)])) + hummock_meta_client + .commit_epoch( + 30, + SyncResult { + uncommitted_ssts: vec![sst_1.clone()], + ..Default::default() + }, + false, + ) .await .unwrap(); + // Construct data via manual compaction - let compaction_task = get_manual_compact_task(&hummock_manager, context_id).await; + let compaction_group_id = StaticCompactionGroupId::StateDefault.into(); + let compaction_task = + get_manual_compact_task(hummock_manager.clone(), compaction_group_id, 0).await; let base_level: usize = 6; assert_eq!(compaction_task.input_ssts[0].table_infos.len(), 1); assert_eq!(compaction_task.target_level, base_level as u32); @@ -1833,43 +1997,56 @@ async fn test_split_compaction_group_on_demand_bottom_levels() { .unwrap()); let current_version = hummock_manager.get_current_version().await; assert!(current_version - .get_compaction_group_levels(2) + .get_compaction_group_levels(compaction_group_id) .l0 .sub_levels .is_empty()); assert_eq!( - current_version.get_compaction_group_levels(2).levels[base_level - 1] + current_version + .get_compaction_group_levels(compaction_group_id) + .levels[base_level - 1] .table_infos .len(), 2 ); hummock_manager - .split_compaction_group(2, &[100]) + .split_compaction_group(compaction_group_id, &[100], 0) .await .unwrap(); let current_version = hummock_manager.get_current_version().await; - let new_group_id = current_version.levels.keys().max().cloned().unwrap(); + let new_compaction_group_id = + get_compaction_group_id_by_table_id(hummock_manager.clone(), 100).await; + let old_compaction_group_id = + get_compaction_group_id_by_table_id(hummock_manager.clone(), 101).await; assert_eq!( - current_version.get_compaction_group_levels(2).levels[base_level - 1] + current_version + .get_compaction_group_levels(old_compaction_group_id) + .levels[base_level - 1] .table_infos .len(), 1 ); assert_eq!( - current_version.get_compaction_group_levels(2).levels[base_level - 1].table_infos[0] + current_version + .get_compaction_group_levels(old_compaction_group_id) + .levels[base_level - 1] + .table_infos[0] .object_id, sst_1.sst_info.object_id + 1, ); assert_eq!( - current_version.get_compaction_group_levels(2).levels[base_level - 1].table_infos[0] + current_version + .get_compaction_group_levels(old_compaction_group_id) + .levels[base_level - 1] + .table_infos[0] .table_ids, vec![101] ); assert_eq!( current_version - .get_compaction_group_levels(new_group_id) + .get_compaction_group_levels(new_compaction_group_id) .levels[base_level - 1] .table_infos .len(), @@ -1877,7 +2054,7 @@ async fn test_split_compaction_group_on_demand_bottom_levels() { ); assert_eq!( current_version - .get_compaction_group_levels(new_group_id) + .get_compaction_group_levels(new_compaction_group_id) .levels[base_level - 1] .table_infos[0] .table_ids, @@ -1885,7 +2062,7 @@ async fn test_split_compaction_group_on_demand_bottom_levels() { ); assert_eq!( current_version - .get_compaction_group_levels(new_group_id) + .get_compaction_group_levels(new_compaction_group_id) .levels[base_level - 1] .table_infos[1] .table_ids, @@ -1896,14 +2073,14 @@ async fn test_split_compaction_group_on_demand_bottom_levels() { #[tokio::test] async fn test_compaction_task_expiration_due_to_split_group() { let (_env, hummock_manager, _, worker_node) = setup_compute_env(80).await; - let context_id = worker_node.id; + let hummock_meta_client: Arc = Arc::new(MockHummockMetaClient::new( + hummock_manager.clone(), + worker_node.id, + )); + let compaction_group_id = StaticCompactionGroupId::StateDefault.into(); hummock_manager - .register_table_ids_for_test(&[(100, 2)]) - .await - .unwrap(); - hummock_manager - .register_table_ids_for_test(&[(101, 2)]) + .register_table_ids_for_test(&[(100, compaction_group_id), (101, compaction_group_id)]) .await .unwrap(); let sst_1 = LocalSstableInfo { @@ -1942,24 +2119,29 @@ async fn test_compaction_task_expiration_due_to_split_group() { }, table_stats: Default::default(), }; - hummock_manager - .commit_epoch_for_test( + + hummock_meta_client + .commit_epoch( 30, - vec![sst_1, sst_2], - HashMap::from([(10, context_id), (11, context_id)]), + SyncResult { + uncommitted_ssts: vec![sst_1, sst_2], + ..Default::default() + }, + false, ) .await .unwrap(); - let compaction_task = get_manual_compact_task(&hummock_manager, context_id).await; + let compaction_group_id = StaticCompactionGroupId::StateDefault.into(); + let compaction_task = + get_manual_compact_task(hummock_manager.clone(), compaction_group_id, 0).await; assert_eq!(compaction_task.input_ssts[0].table_infos.len(), 2); hummock_manager - .split_compaction_group(2, &[100]) + .split_compaction_group(compaction_group_id, &[100], 0) .await .unwrap(); let version_1 = hummock_manager.get_current_version().await; - // compaction_task.task_status = TaskStatus::Success.into(); assert!(!hummock_manager .report_compact_task(compaction_task.task_id, TaskStatus::Success, vec![], None) .await @@ -1970,7 +2152,8 @@ async fn test_compaction_task_expiration_due_to_split_group() { "version should not change because compaction task has been cancelled" ); - let compaction_task = get_manual_compact_task(&hummock_manager, context_id).await; + let compaction_task = + get_manual_compact_task(hummock_manager.clone(), compaction_group_id, 0).await; assert_eq!(compaction_task.input_ssts[0].table_infos.len(), 2); hummock_manager .report_compact_task(compaction_task.task_id, TaskStatus::Success, vec![], None) @@ -1987,75 +2170,73 @@ async fn test_compaction_task_expiration_due_to_split_group() { #[tokio::test] async fn test_move_tables_between_compaction_group() { let (_env, hummock_manager, _, worker_node) = setup_compute_env(80).await; - let context_id = worker_node.id; + let hummock_meta_client: Arc = Arc::new(MockHummockMetaClient::new( + hummock_manager.clone(), + worker_node.id, + )); hummock_manager - .register_table_ids_for_test(&[(100, 2)]) - .await - .unwrap(); - hummock_manager - .register_table_ids_for_test(&[(101, 2)]) + .register_table_ids_for_test(&[(100, 2), (101, 2), (102, 2)]) .await .unwrap(); - hummock_manager - .register_table_ids_for_test(&[(102, 2)]) - .await - .unwrap(); - let sst_1 = gen_local_sstable_info(10, 1, vec![100, 101, 102]); - hummock_manager - .commit_epoch_for_test(30, vec![sst_1.clone()], HashMap::from([(10, context_id)])) + let sst_1 = gen_local_sstable_info(10, vec![100, 101, 102], test_epoch(1)); + + hummock_meta_client + .commit_epoch( + 30, + SyncResult { + uncommitted_ssts: vec![sst_1.clone()], + ..Default::default() + }, + false, + ) .await .unwrap(); - // Construct data via manual compaction - let compaction_task = get_manual_compact_task(&hummock_manager, context_id).await; - let base_level: usize = 6; - assert_eq!(compaction_task.input_ssts[0].table_infos.len(), 1); - assert_eq!(compaction_task.target_level, base_level as u32); - assert!(hummock_manager - .report_compact_task( - compaction_task.task_id, - TaskStatus::Success, - vec![ - gen_sstable_info(11, 1, vec![100]), - gen_sstable_info(12, 2, vec![100, 101]), - gen_sstable_info(13, 3, vec![101, 102]), - ], - None, + + let sst_2 = gen_local_sstable_info(14, vec![101, 102], test_epoch(2)); + + hummock_meta_client + .commit_epoch( + 31, + SyncResult { + uncommitted_ssts: vec![sst_2.clone()], + ..Default::default() + }, + false, ) .await - .unwrap()); - let sst_2 = gen_local_sstable_info(14, 1, vec![101, 102]); - hummock_manager - .commit_epoch_for_test(31, vec![sst_2.clone()], HashMap::from([(14, context_id)])) - .await .unwrap(); + let current_version = hummock_manager.get_current_version().await; - assert_eq!( - current_version.get_compaction_group_levels(2).levels[base_level - 1] - .table_infos - .len(), - 3 - ); + let compaction_group_id = + get_compaction_group_id_by_table_id(hummock_manager.clone(), 100).await; + let sst_ids = current_version + .get_sst_ids_by_group_id(compaction_group_id) + .collect_vec(); + assert_eq!(2, sst_ids.len()); + assert!(sst_ids.contains(&10)); + assert!(sst_ids.contains(&14)); hummock_manager - .split_compaction_group(2, &[100]) + .split_compaction_group(2, &[100], 0) .await .unwrap(); + let compaction_group_id = + get_compaction_group_id_by_table_id(hummock_manager.clone(), 101).await; let current_version = hummock_manager.get_current_version().await; - let new_group_id = current_version.levels.keys().max().cloned().unwrap(); - assert_eq!( - current_version.get_compaction_group_levels(2).levels[base_level - 1] - .table_infos - .len(), - 2 - ); + let sst_ids = current_version + .get_sst_ids_by_group_id(compaction_group_id) + .collect_vec(); + assert_eq!(2, sst_ids.len()); + assert!(!sst_ids.contains(&10)); - let level = ¤t_version - .get_compaction_group_levels(new_group_id) - .levels[base_level - 1]; - assert_eq!(level.table_infos[0].table_ids, vec![100]); - assert_eq!(level.table_infos[1].table_ids, vec![100]); - assert_eq!(level.table_infos.len(), 2); + let compaction_group_id = + get_compaction_group_id_by_table_id(hummock_manager.clone(), 100).await; + let sst_ids = current_version + .get_sst_ids_by_group_id(compaction_group_id) + .collect_vec(); + assert_eq!(1, sst_ids.len()); + assert!(!sst_ids.contains(&10)); } #[tokio::test] @@ -2069,6 +2250,10 @@ async fn test_gc_stats() { let registry = Registry::new(); let (_env, hummock_manager, _, worker_node) = setup_compute_env_with_metric(80, config, Some(MetaMetrics::for_test(®istry))).await; + let hummock_meta_client: Arc = Arc::new(MockHummockMetaClient::new( + hummock_manager.clone(), + worker_node.id, + )); let context_id = worker_node.id; let assert_eq_gc_stats = |stale_object_size, stale_object_count, @@ -2107,8 +2292,14 @@ async fn test_gc_stats() { 0 ); + let compaction_group_id = StaticCompactionGroupId::StateDefault.into(); hummock_manager.pin_version(context_id).await.unwrap(); - let _ = add_test_tables(&hummock_manager, context_id).await; + let _ = add_test_tables( + &hummock_manager, + hummock_meta_client.clone(), + compaction_group_id, + ) + .await; assert_eq_gc_stats(0, 0, 0, 0, 0, 0); assert_ne!( hummock_manager.create_version_checkpoint(0).await.unwrap(), @@ -2126,7 +2317,6 @@ async fn test_gc_stats() { hummock_manager.create_version_checkpoint(0).await.unwrap(), 0 ); - assert_eq_gc_stats(6, 3, 0, 0, 2, 4); } #[tokio::test] @@ -2137,84 +2327,66 @@ async fn test_partition_level() { .level0_overlapping_sub_level_compact_level_count(3) .build(); let registry = Registry::new(); - let (_env, hummock_manager, _, worker_node) = + let (env, hummock_manager, _, worker_node) = setup_compute_env_with_metric(80, config.clone(), Some(MetaMetrics::for_test(®istry))) .await; - let config = Arc::new(config); - - let context_id = worker_node.id; - - hummock_manager - .register_table_ids_for_test(&[(100, 2)]) - .await - .unwrap(); - hummock_manager - .register_table_ids_for_test(&[(101, 2)]) - .await - .unwrap(); - let sst_1 = gen_local_sstable_info(10, 1, vec![100, 101]); + let hummock_meta_client: Arc = Arc::new(MockHummockMetaClient::new( + hummock_manager.clone(), + worker_node.id, + )); hummock_manager - .commit_epoch_for_test(30, vec![sst_1.clone()], HashMap::from([(10, context_id)])) + .register_table_ids_for_test(&[(100, 2), (101, 2)]) .await .unwrap(); - // Construct data via manual compaction - let compaction_task = get_manual_compact_task(&hummock_manager, context_id).await; - let base_level: usize = 6; - assert_eq!(compaction_task.input_ssts[0].table_infos.len(), 1); - assert_eq!(compaction_task.target_level, base_level as u32); - assert!(hummock_manager - .report_compact_task( - compaction_task.task_id, - TaskStatus::Success, - vec![ - gen_sstable_info(11, 1, vec![100]), - gen_sstable_info(12, 2, vec![101]), - ], - None, + let sst_1 = gen_local_sstable_info(10, vec![100, 101], test_epoch(1)); + + hummock_meta_client + .commit_epoch( + 30, + SyncResult { + uncommitted_ssts: vec![sst_1], + ..Default::default() + }, + false, ) .await - .unwrap()); + .unwrap(); + let compaction_group_id = StaticCompactionGroupId::StateDefault.into(); hummock_manager - .split_compaction_group(2, &[100]) + .split_compaction_group(compaction_group_id, &[100], env.opts.partition_vnode_count) .await .unwrap(); - let current_version = hummock_manager.get_current_version().await; - - let new_group_id = current_version.levels.keys().max().cloned().unwrap(); - assert_eq!( - current_version - .get_compaction_group_levels(new_group_id) - .levels[base_level - 1] - .table_infos - .len(), - 1 - ); - + let new_compaction_group_id = + get_compaction_group_id_by_table_id(hummock_manager.clone(), 100).await; let mut global_sst_id = 13; const MB: u64 = 1024 * 1024; let mut selector = default_compaction_selector(); for epoch in 31..100 { - let mut sst = gen_local_sstable_info(global_sst_id, 10, vec![100]); + let mut sst = gen_local_sstable_info(global_sst_id, vec![100], test_epoch(epoch)); sst.sst_info.file_size = 10 * MB; sst.sst_info.sst_size = 10 * MB; sst.sst_info.uncompressed_file_size = 10 * MB; - hummock_manager - .commit_epoch_for_test( + hummock_meta_client + .commit_epoch( epoch, - vec![sst], - HashMap::from([(global_sst_id, context_id)]), + SyncResult { + uncommitted_ssts: vec![sst], + ..Default::default() + }, + false, ) .await .unwrap(); + global_sst_id += 1; if let Some(task) = hummock_manager - .get_compact_task(new_group_id, &mut selector) + .get_compact_task(new_compaction_group_id, &mut selector) .await .unwrap() { - let mut sst = gen_sstable_info(global_sst_id, 10, vec![100]); + let mut sst = gen_sstable_info(global_sst_id, vec![100], test_epoch(epoch)); sst.file_size = task .input_ssts .iter() @@ -2236,7 +2408,7 @@ async fn test_partition_level() { } } let current_version = hummock_manager.get_current_version().await; - let group = current_version.get_compaction_group_levels(new_group_id); + let group = current_version.get_compaction_group_levels(new_compaction_group_id); for sub_level in &group.l0.sub_levels { if sub_level.total_file_size > config.sub_level_max_compaction_bytes { assert!(sub_level.vnode_partition_count > 0); @@ -2247,7 +2419,10 @@ async fn test_partition_level() { #[tokio::test] async fn test_unregister_moved_table() { let (_env, hummock_manager, _, worker_node) = setup_compute_env(80).await; - let context_id = worker_node.id; + let hummock_meta_client = Arc::new(MockHummockMetaClient::new( + hummock_manager.clone(), + worker_node.id, + )); let original_groups = hummock_manager .get_current_version() .await @@ -2265,11 +2440,7 @@ async fn test_unregister_moved_table() { ); hummock_manager - .register_table_ids_for_test(&[(100, 2)]) - .await - .unwrap(); - hummock_manager - .register_table_ids_for_test(&[(101, 2)]) + .register_table_ids_for_test(&[(100, 2), (101, 2)]) .await .unwrap(); let sst_1 = LocalSstableInfo { @@ -2308,25 +2479,30 @@ async fn test_unregister_moved_table() { }, table_stats: Default::default(), }; - hummock_manager - .commit_epoch_for_test( + + hummock_meta_client + .commit_epoch( 30, - vec![sst_1, sst_2], - HashMap::from([(10, context_id), (11, context_id)]), + SyncResult { + uncommitted_ssts: vec![sst_1, sst_2], + ..Default::default() + }, + false, ) .await .unwrap(); - let new_group_id = hummock_manager - .split_compaction_group(2, &[100]) + let compaction_group_id = StaticCompactionGroupId::StateDefault.into(); + let new_compaction_group_id = hummock_manager + .split_compaction_group(compaction_group_id, &[100], 0) .await .unwrap(); - assert_ne!(new_group_id, 2); - assert!(new_group_id > StaticCompactionGroupId::End as u64); + assert_ne!(new_compaction_group_id, 2); + assert!(new_compaction_group_id > StaticCompactionGroupId::End as u64); let current_version = hummock_manager.get_current_version().await; assert_eq!( - new_group_id, + new_compaction_group_id, current_version.levels.keys().max().cloned().unwrap() ); assert_eq!(current_version.levels.len(), 3); @@ -2335,7 +2511,7 @@ async fn test_unregister_moved_table() { vec![11] ); assert_eq!( - get_compaction_group_object_ids(¤t_version, new_group_id), + get_compaction_group_object_ids(¤t_version, new_compaction_group_id), vec![10, 11] ); assert_eq!( @@ -2350,7 +2526,7 @@ async fn test_unregister_moved_table() { assert_eq!( current_version .state_table_info - .compaction_group_member_table_ids(new_group_id) + .compaction_group_member_table_ids(new_compaction_group_id) .iter() .map(|table_id| table_id.table_id) .collect_vec(), @@ -2363,7 +2539,9 @@ async fn test_unregister_moved_table() { .unwrap(); let current_version = hummock_manager.get_current_version().await; assert_eq!(current_version.levels.len(), 2); - assert!(!current_version.levels.contains_key(&new_group_id)); + assert!(!current_version + .levels + .contains_key(&new_compaction_group_id)); assert_eq!( get_compaction_group_object_ids(¤t_version, 2), vec![11] diff --git a/src/meta/src/hummock/manager/time_travel.rs b/src/meta/src/hummock/manager/time_travel.rs index 61c1e820fab0c..0b6ef73e52605 100644 --- a/src/meta/src/hummock/manager/time_travel.rs +++ b/src/meta/src/hummock/manager/time_travel.rs @@ -16,6 +16,7 @@ use std::collections::{HashMap, HashSet, VecDeque}; use anyhow::anyhow; use itertools::Itertools; +use risingwave_common::catalog::TableId; use risingwave_common::system_param::reader::SystemParamsRead; use risingwave_common::util::epoch::Epoch; use risingwave_hummock_sdk::compaction_group::StaticCompactionGroupId; @@ -36,7 +37,7 @@ use risingwave_pb::hummock::{PbHummockVersion, PbHummockVersionDelta}; use sea_orm::sea_query::OnConflict; use sea_orm::ActiveValue::Set; use sea_orm::{ - ColumnTrait, DatabaseTransaction, EntityTrait, QueryFilter, QueryOrder, QuerySelect, + ColumnTrait, Condition, DatabaseTransaction, EntityTrait, QueryFilter, QueryOrder, QuerySelect, TransactionTrait, }; @@ -101,6 +102,7 @@ impl HummockManager { .lt(risingwave_meta_model_v2::Epoch::try_from(epoch_watermark).unwrap()), ) .order_by_desc(hummock_epoch_to_version::Column::Epoch) + .order_by_asc(hummock_epoch_to_version::Column::VersionId) .one(&txn) .await?; let Some(version_watermark) = version_watermark else { @@ -275,9 +277,19 @@ impl HummockManager { /// The version is retrieved from `hummock_epoch_to_version`, selecting the entry with the largest epoch that's lte `query_epoch`. /// /// The resulted version is complete, i.e. with correct `SstableInfo`. - pub async fn epoch_to_version(&self, query_epoch: HummockEpoch) -> Result { + pub async fn epoch_to_version( + &self, + query_epoch: HummockEpoch, + table_id: u32, + ) -> Result { let sql_store = self.sql_store().ok_or_else(require_sql_meta_store_err)?; let epoch_to_version = hummock_epoch_to_version::Entity::find() + .filter( + Condition::any() + .add(hummock_epoch_to_version::Column::TableId.eq(i64::from(table_id))) + // for backward compatibility + .add(hummock_epoch_to_version::Column::TableId.eq(0)), + ) .filter( hummock_epoch_to_version::Column::Epoch .lte(risingwave_meta_model_v2::Epoch::try_from(query_epoch).unwrap()), @@ -362,7 +374,19 @@ impl HummockManager { delta: HummockVersionDelta, group_parents: &HashMap, skip_sst_ids: &HashSet, + tables_to_commit: impl Iterator, + committed_epoch: u64, ) -> Result>> { + let select_groups = group_parents + .iter() + .filter_map(|(cg_id, _)| { + if should_ignore_group(find_root_group(*cg_id, group_parents)) { + None + } else { + Some(*cg_id) + } + }) + .collect::>(); async fn write_sstable_infos( sst_infos: impl Iterator, txn: &DatabaseTransaction, @@ -388,35 +412,23 @@ impl HummockManager { Ok(count) } - let epoch = delta.visible_table_committed_epoch(); - let version_id: u64 = delta.id.to_u64(); - let m = hummock_epoch_to_version::ActiveModel { - epoch: Set(epoch.try_into().unwrap()), - version_id: Set(version_id.try_into().unwrap()), - }; - hummock_epoch_to_version::Entity::insert(m) - .on_conflict( - OnConflict::column(hummock_epoch_to_version::Column::Epoch) - // The existing row must be inserted by the common committed epoch of created MVs. - // While any duplicate row must be inserted by MVs still in creation. - // So the row shouldn't be updated. - .do_nothing() - .to_owned(), - ) - .do_nothing() - .exec(txn) - .await?; + for (table_id, cg_id) in tables_to_commit { + if !select_groups.contains(cg_id) { + continue; + } + let version_id: u64 = delta.id.to_u64(); + let m = hummock_epoch_to_version::ActiveModel { + epoch: Set(committed_epoch.try_into().unwrap()), + table_id: Set(table_id.table_id.into()), + version_id: Set(version_id.try_into().unwrap()), + }; + // There should be no conflict rows. + hummock_epoch_to_version::Entity::insert(m) + .exec(txn) + .await?; + } + let mut version_sst_ids = None; - let select_groups = group_parents - .iter() - .filter_map(|(cg_id, _)| { - if should_ignore_group(find_root_group(*cg_id, group_parents)) { - None - } else { - Some(*cg_id) - } - }) - .collect::>(); if let Some(version) = version { version_sst_ids = Some( version diff --git a/src/meta/src/hummock/manager/timer_task.rs b/src/meta/src/hummock/manager/timer_task.rs index ec0f77ac88a8a..94537e9c33e1f 100644 --- a/src/meta/src/hummock/manager/timer_task.rs +++ b/src/meta/src/hummock/manager/timer_task.rs @@ -43,7 +43,7 @@ impl HummockManager { const COMPACTION_HEARTBEAT_PERIOD_SEC: u64 = 1; pub enum HummockTimerEvent { - GroupSplit, + GroupSchedule, CheckDeadTask, Report, CompactionHeartBeatExpiredCheck, @@ -158,7 +158,7 @@ impl HummockManager { .set_missed_tick_behavior(tokio::time::MissedTickBehavior::Delay); let split_group_trigger = IntervalStream::new(split_group_trigger_interval) - .map(|_| HummockTimerEvent::GroupSplit); + .map(|_| HummockTimerEvent::GroupSchedule); triggers.push(Box::pin(split_group_trigger)); } @@ -189,12 +189,12 @@ impl HummockManager { hummock_manager.check_dead_task().await; } - HummockTimerEvent::GroupSplit => { + HummockTimerEvent::GroupSchedule => { if hummock_manager.env.opts.compaction_deterministic_test { continue; } - hummock_manager.on_handle_check_split_multi_group().await; + hummock_manager.on_handle_schedule_group().await; } HummockTimerEvent::Report => { @@ -443,7 +443,7 @@ impl HummockManager { /// throughput keep larger than `table_write_throughput_threshold` for a long time. /// * For state-table whose throughput less than `min_table_split_write_throughput`, do not /// increase it size of base-level. - async fn on_handle_check_split_multi_group(&self) { + async fn on_handle_schedule_group(&self) { let params = self.env.system_params_reader().await; let barrier_interval_ms = params.barrier_interval_ms() as u64; let checkpoint_secs = std::cmp::max( @@ -469,18 +469,13 @@ impl HummockManager { continue; } - for (table_id, table_size) in &group.table_statistic { - self.try_move_table_to_dedicated_cg( - &table_write_throughput, - table_id, - table_size, - !created_tables.contains(table_id), - checkpoint_secs, - group.group_id, - group.group_size, - ) - .await; - } + self.try_split_compaction_group( + &table_write_throughput, + checkpoint_secs, + group, + &created_tables, + ) + .await; } } diff --git a/src/meta/src/hummock/manager/transaction.rs b/src/meta/src/hummock/manager/transaction.rs index aa0ead3cef2aa..9a795608f7e1a 100644 --- a/src/meta/src/hummock/manager/transaction.rs +++ b/src/meta/src/hummock/manager/transaction.rs @@ -122,7 +122,7 @@ impl<'a> HummockVersionTransaction<'a> { is_visible_table_committed_epoch: bool, new_compaction_group: Option<(CompactionGroupId, CompactionConfig)>, commit_sstables: BTreeMap>, - new_table_ids: HashMap, + new_table_ids: &HashMap, new_table_watermarks: HashMap, change_log_delta: HashMap, ) -> HummockVersionDelta { @@ -175,7 +175,7 @@ impl<'a> HummockVersionTransaction<'a> { // update state table info new_version_delta.with_latest_version(|version, delta| { - for (table_id, cg_id) in &new_table_ids { + for (table_id, cg_id) in new_table_ids { assert!( !version.state_table_info.info().contains_key(table_id), "newly added table exists previously: {:?}", diff --git a/src/meta/src/hummock/mock_hummock_meta_client.rs b/src/meta/src/hummock/mock_hummock_meta_client.rs index 1cdd8547c8247..c926e2145e886 100644 --- a/src/meta/src/hummock/mock_hummock_meta_client.rs +++ b/src/meta/src/hummock/mock_hummock_meta_client.rs @@ -12,6 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. +use std::collections::BTreeSet; use std::sync::atomic::{AtomicU32, Ordering}; use std::sync::Arc; use std::time::SystemTime; @@ -22,6 +23,7 @@ use fail::fail_point; use futures::stream::BoxStream; use futures::{Stream, StreamExt}; use itertools::Itertools; +use risingwave_common::catalog::TableId; use risingwave_hummock_sdk::change_log::build_table_change_log_delta; use risingwave_hummock_sdk::compact_task::CompactTask; use risingwave_hummock_sdk::compaction_group::StaticCompactionGroupId; @@ -162,14 +164,63 @@ impl HummockMetaClient for MockHummockMetaClient { }) } - async fn commit_epoch(&self, epoch: HummockEpoch, sync_result: SyncResult) -> Result<()> { + async fn commit_epoch( + &self, + epoch: HummockEpoch, + sync_result: SyncResult, + is_log_store: bool, + ) -> Result<()> { let version: HummockVersion = self.hummock_manager.get_current_version().await; - let sst_to_worker = sync_result + let table_ids = version + .state_table_info + .info() + .keys() + .map(|table_id| table_id.table_id) + .collect::>(); + + let old_value_ssts_vec = if is_log_store { + sync_result.old_value_ssts.clone() + } else { + vec![] + }; + let commit_table_ids = sync_result + .uncommitted_ssts + .iter() + .flat_map(|sstable| sstable.sst_info.table_ids.clone()) + .chain({ + old_value_ssts_vec + .iter() + .flat_map(|sstable| sstable.sst_info.table_ids.clone()) + }) + .collect::>(); + + let new_table_fragment_info = if commit_table_ids + .iter() + .all(|table_id| table_ids.contains(table_id)) + { + NewTableFragmentInfo::None + } else { + NewTableFragmentInfo::Normal { + mv_table_id: None, + internal_table_ids: commit_table_ids + .iter() + .cloned() + .map(TableId::from) + .collect_vec(), + } + }; + + let sst_to_context = sync_result .uncommitted_ssts .iter() .map(|LocalSstableInfo { sst_info, .. }| (sst_info.object_id, self.context_id)) .collect(); let new_table_watermark = sync_result.table_watermarks; + let table_change_log_table_ids = if is_log_store { + commit_table_ids.clone() + } else { + BTreeSet::new() + }; let table_change_log = build_table_change_log_delta( sync_result .old_value_ssts @@ -177,22 +228,24 @@ impl HummockMetaClient for MockHummockMetaClient { .map(|sst| sst.sst_info), sync_result.uncommitted_ssts.iter().map(|sst| &sst.sst_info), &vec![epoch], - version - .state_table_info - .info() - .keys() - .map(|table_id| (table_id.table_id, 0)), + table_change_log_table_ids + .into_iter() + .map(|table_id| (table_id, 0)), ); self.hummock_manager .commit_epoch(CommitEpochInfo { sstables: sync_result.uncommitted_ssts, new_table_watermarks: new_table_watermark, - sst_to_context: sst_to_worker, - new_table_fragment_info: NewTableFragmentInfo::None, + sst_to_context, + new_table_fragment_info, change_log_delta: table_change_log, committed_epoch: epoch, - tables_to_commit: version.state_table_info.info().keys().cloned().collect(), + tables_to_commit: commit_table_ids + .iter() + .cloned() + .map(TableId::from) + .collect(), is_visible_table_committed_epoch: true, }) .await @@ -348,7 +401,11 @@ impl HummockMetaClient for MockHummockMetaClient { )) } - async fn get_version_by_epoch(&self, _epoch: HummockEpoch) -> Result { + async fn get_version_by_epoch( + &self, + _epoch: HummockEpoch, + _table_id: u32, + ) -> Result { unimplemented!() } } diff --git a/src/meta/src/hummock/test_utils.rs b/src/meta/src/hummock/test_utils.rs index 2188d9b539325..00cb52b34a0a4 100644 --- a/src/meta/src/hummock/test_utils.rs +++ b/src/meta/src/hummock/test_utils.rs @@ -22,7 +22,6 @@ use bytes::Bytes; use itertools::Itertools; use risingwave_common::catalog::{TableId, TableOption}; use risingwave_common::util::epoch::test_epoch; -use risingwave_hummock_sdk::compaction_group::StaticCompactionGroupId; use risingwave_hummock_sdk::key::key_with_epoch; use risingwave_hummock_sdk::key_range::KeyRange; use risingwave_hummock_sdk::level::Levels; @@ -30,12 +29,13 @@ use risingwave_hummock_sdk::sstable_info::SstableInfo; use risingwave_hummock_sdk::table_watermark::TableWatermarks; use risingwave_hummock_sdk::version::{HummockVersion, HummockVersionStateTableInfo}; use risingwave_hummock_sdk::{ - CompactionGroupId, HummockContextId, HummockEpoch, HummockSstableObjectId, LocalSstableInfo, + CompactionGroupId, HummockEpoch, HummockSstableObjectId, LocalSstableInfo, SyncResult, }; use risingwave_pb::common::{HostAddress, WorkerNode, WorkerType}; use risingwave_pb::hummock::compact_task::TaskStatus; use risingwave_pb::hummock::CompactionConfig; use risingwave_pb::meta::add_worker_node_request::Property; +use risingwave_rpc_client::HummockMetaClient; use crate::hummock::compaction::compaction_config::CompactionConfigBuilder; use crate::hummock::compaction::selector::{default_compaction_selector, LocalSelectorStatistic}; @@ -44,9 +44,7 @@ use crate::hummock::level_handler::LevelHandler; pub use crate::hummock::manager::CommitEpochInfo; use crate::hummock::model::CompactionGroup; use crate::hummock::{CompactorManager, HummockManager, HummockManagerRef}; -use crate::manager::{ - ClusterManager, ClusterManagerRef, FragmentManager, MetaSrvEnv, META_NODE_ID, -}; +use crate::manager::{ClusterManager, ClusterManagerRef, FragmentManager, MetaSrvEnv}; use crate::rpc::metrics::MetaMetrics; pub fn to_local_sstable_info(ssts: &[SstableInfo]) -> Vec { @@ -55,9 +53,15 @@ pub fn to_local_sstable_info(ssts: &[SstableInfo]) -> Vec { .collect_vec() } +// This function has 3 phases: +// 1. add 3 ssts to +// 2. trigger a compaction and replace the input from phase 1 with the 1 new sst +// 3. add 1 new sst +// Please make sure the function do what you want before using it. pub async fn add_test_tables( hummock_manager: &HummockManager, - context_id: HummockContextId, + hummock_meta_client: Arc, + compaction_group_id: CompactionGroupId, ) -> Vec> { // Increase version by 2. @@ -66,43 +70,31 @@ pub async fn add_test_tables( let mut epoch = test_epoch(1); let sstable_ids = get_sst_ids(hummock_manager, 3).await; let test_tables = generate_test_sstables_with_table_id(epoch, 1, sstable_ids); - register_sstable_infos_to_compaction_group( - hummock_manager, - &test_tables, - StaticCompactionGroupId::StateDefault.into(), - ) - .await; - let ssts = to_local_sstable_info(&test_tables); - let sst_to_worker = ssts - .iter() - .map(|LocalSstableInfo { sst_info, .. }| (sst_info.object_id, context_id)) - .collect(); - hummock_manager - .commit_epoch_for_test(epoch, ssts, sst_to_worker) + register_sstable_infos_to_compaction_group(hummock_manager, &test_tables, compaction_group_id) + .await; + let test_local_tables = to_local_sstable_info(&test_tables); + hummock_meta_client + .commit_epoch( + epoch, + SyncResult { + uncommitted_ssts: test_local_tables, + ..Default::default() + }, + false, + ) .await .unwrap(); + // Simulate a compaction and increase version by 1. - let mut temp_compactor = false; - if hummock_manager - .compactor_manager_ref_for_test() - .compactor_num() - == 0 - { - hummock_manager - .compactor_manager_ref_for_test() - .add_compactor(context_id); - temp_compactor = true; - } let test_tables_2 = generate_test_tables(epoch, get_sst_ids(hummock_manager, 1).await); register_sstable_infos_to_compaction_group( hummock_manager, &test_tables_2, - StaticCompactionGroupId::StateDefault.into(), + compaction_group_id, ) .await; - let mut selector = default_compaction_selector(); let mut compact_task = hummock_manager - .get_compact_task(StaticCompactionGroupId::StateDefault.into(), &mut selector) + .get_compact_task(compaction_group_id, &mut default_compaction_selector()) .await .unwrap() .unwrap(); @@ -114,15 +106,8 @@ pub async fn add_test_tables( .sum::(), 3 ); - compact_task.target_level = 6; - if temp_compactor { - let compactor = hummock_manager - .compactor_manager_ref_for_test() - .next_compactor() - .unwrap(); - assert_eq!(compactor.context_id(), context_id); - } + compact_task.target_level = 6; hummock_manager .report_compact_task_for_test( compact_task.task_id, @@ -133,27 +118,25 @@ pub async fn add_test_tables( ) .await .unwrap(); - if temp_compactor { - hummock_manager - .compactor_manager_ref_for_test() - .remove_compactor(context_id); - } // Increase version by 1. epoch.inc_epoch(); let test_tables_3 = generate_test_tables(epoch, get_sst_ids(hummock_manager, 1).await); register_sstable_infos_to_compaction_group( hummock_manager, &test_tables_3, - StaticCompactionGroupId::StateDefault.into(), + compaction_group_id, ) .await; - let ssts = to_local_sstable_info(&test_tables_3); - let sst_to_worker = ssts - .iter() - .map(|LocalSstableInfo { sst_info, .. }| (sst_info.object_id, context_id)) - .collect(); - hummock_manager - .commit_epoch_for_test(epoch, ssts, sst_to_worker) + let test_local_tables_3 = to_local_sstable_info(&test_tables_3); + hummock_meta_client + .commit_epoch( + epoch, + SyncResult { + uncommitted_ssts: test_local_tables_3, + ..Default::default() + }, + false, + ) .await .unwrap(); vec![test_tables, test_tables_2, test_tables_3] @@ -290,11 +273,9 @@ pub fn get_sorted_object_ids(sstables: &[SstableInfo]) -> Vec Vec { - let levels = match hummock_version - .levels - .get(&StaticCompactionGroupId::StateDefault.into()) - { + let levels = match hummock_version.levels.get(&compaction_group_id) { Some(levels) => levels, None => return vec![], }; @@ -385,34 +366,23 @@ pub async fn get_sst_ids( (range.start_id..range.end_id).collect_vec() } -pub async fn commit_from_meta_node( - hummock_manager_ref: &HummockManager, - epoch: HummockEpoch, - ssts: Vec, -) -> crate::hummock::error::Result<()> { - let sst_to_worker = ssts - .iter() - .map(|LocalSstableInfo { sst_info, .. }| (sst_info.object_id, META_NODE_ID)) - .collect(); - hummock_manager_ref - .commit_epoch_for_test(epoch, ssts, sst_to_worker) - .await -} - pub async fn add_ssts( epoch: HummockEpoch, hummock_manager: &HummockManager, - context_id: HummockContextId, + hummock_meta_client: Arc, ) -> Vec { let table_ids = get_sst_ids(hummock_manager, 3).await; let test_tables = generate_test_sstables_with_table_id(test_epoch(epoch), 1, table_ids); let ssts = to_local_sstable_info(&test_tables); - let sst_to_worker = ssts - .iter() - .map(|LocalSstableInfo { sst_info, .. }| (sst_info.object_id, context_id)) - .collect(); - hummock_manager - .commit_epoch_for_test(epoch, ssts, sst_to_worker) + hummock_meta_client + .commit_epoch( + epoch, + SyncResult { + uncommitted_ssts: ssts, + ..Default::default() + }, + false, + ) .await .unwrap(); test_tables @@ -441,3 +411,12 @@ pub fn compaction_selector_context<'a>( state_table_info, } } + +pub async fn get_compaction_group_id_by_table_id( + hummock_manager_ref: HummockManagerRef, + table_id: u32, +) -> u64 { + let version = hummock_manager_ref.get_current_version().await; + let mapping = version.state_table_info.build_table_compaction_group_id(); + *mapping.get(&(table_id.into())).unwrap() +} diff --git a/src/meta/src/hummock/vacuum.rs b/src/meta/src/hummock/vacuum.rs index d747651b86d43..10e2c08abd6e5 100644 --- a/src/meta/src/hummock/vacuum.rs +++ b/src/meta/src/hummock/vacuum.rs @@ -228,17 +228,23 @@ mod tests { use std::sync::Arc; use itertools::Itertools; + use risingwave_hummock_sdk::compaction_group::StaticCompactionGroupId; use risingwave_hummock_sdk::HummockVersionId; use risingwave_pb::hummock::VacuumTask; + use risingwave_rpc_client::HummockMetaClient; use crate::backup_restore::BackupManager; use crate::hummock::test_utils::{add_test_tables, setup_compute_env}; - use crate::hummock::VacuumManager; + use crate::hummock::{MockHummockMetaClient, VacuumManager}; #[tokio::test] async fn test_vacuum() { let (env, hummock_manager, _cluster_manager, worker_node) = setup_compute_env(80).await; let context_id = worker_node.id; + let hummock_meta_client: Arc = Arc::new(MockHummockMetaClient::new( + hummock_manager.clone(), + worker_node.id, + )); let compactor_manager = hummock_manager.compactor_manager_ref_for_test(); let backup_manager = Arc::new(BackupManager::for_test(env.clone(), hummock_manager.clone()).await); @@ -251,7 +257,13 @@ mod tests { assert_eq!(vacuum.vacuum_metadata().await.unwrap(), 0); assert_eq!(vacuum.vacuum_object().await.unwrap().len(), 0); hummock_manager.pin_version(context_id).await.unwrap(); - let sst_infos = add_test_tables(hummock_manager.as_ref(), context_id).await; + let compaction_group_id = StaticCompactionGroupId::StateDefault.into(); + let sst_infos = add_test_tables( + hummock_manager.as_ref(), + hummock_meta_client.clone(), + compaction_group_id, + ) + .await; assert_eq!(vacuum.vacuum_metadata().await.unwrap(), 0); hummock_manager.create_version_checkpoint(1).await.unwrap(); assert_eq!(vacuum.vacuum_metadata().await.unwrap(), 6); diff --git a/src/meta/src/lib.rs b/src/meta/src/lib.rs index 61e29b2fb1129..eab9dd1287ebf 100644 --- a/src/meta/src/lib.rs +++ b/src/meta/src/lib.rs @@ -15,7 +15,6 @@ #![allow(clippy::derive_partial_eq_without_eq)] #![feature(trait_alias)] #![feature(type_alias_impl_trait)] -#![feature(lint_reasons)] #![feature(map_try_insert)] #![feature(extract_if)] #![feature(hash_extract_if)] diff --git a/src/meta/src/manager/catalog/mod.rs b/src/meta/src/manager/catalog/mod.rs index 12c1596841f67..4db6711862810 100644 --- a/src/meta/src/manager/catalog/mod.rs +++ b/src/meta/src/manager/catalog/mod.rs @@ -1811,15 +1811,11 @@ impl CatalogManager { all_table_ids.extend(index_table_ids.iter().cloned()); for index_table_id in &index_table_ids { - let internal_table_ids = match fragment_manager + let internal_table_ids = fragment_manager .select_table_fragments_by_table_id(&(index_table_id.into())) .await .map(|fragments| fragments.internal_table_ids()) - { - Ok(v) => v, - // Handle backwards compat with no state persistence. - Err(_) => vec![], - }; + .unwrap_or_default(); // 1 should be used by table scan. if internal_table_ids.len() == 1 { @@ -1901,15 +1897,11 @@ impl CatalogManager { } all_table_ids.insert(index.index_table_id); - let internal_table_ids = match fragment_manager + let internal_table_ids = fragment_manager .select_table_fragments_by_table_id(&(index.index_table_id.into())) .await .map(|fragments| fragments.internal_table_ids()) - { - Ok(v) => v, - // Handle backwards compat with no state persistence. - Err(_) => vec![], - }; + .unwrap_or_default(); // 1 should be used by table scan. if internal_table_ids.len() == 1 { diff --git a/src/meta/src/manager/sink_coordination/coordinator_worker.rs b/src/meta/src/manager/sink_coordination/coordinator_worker.rs index 8409e714852c2..8ed063e5325c0 100644 --- a/src/meta/src/manager/sink_coordination/coordinator_worker.rs +++ b/src/meta/src/manager/sink_coordination/coordinator_worker.rs @@ -12,64 +12,191 @@ // See the License for the specific language governing permissions and // limitations under the License. -use std::collections::HashSet; +use std::collections::{BTreeMap, HashMap, HashSet}; +use std::future::{poll_fn, Future}; use std::pin::pin; +use std::task::Poll; +use std::time::{Duration, Instant}; use anyhow::anyhow; use futures::future::{select, Either}; -use futures::stream::FuturesUnordered; -use futures::{StreamExt, TryStreamExt}; +use futures::pin_mut; +use itertools::Itertools; use risingwave_common::bitmap::Bitmap; -use risingwave_common::hash::{VirtualNode, VnodeBitmapExt}; +use risingwave_common::hash::VirtualNode; use risingwave_connector::dispatch_sink; use risingwave_connector::sink::{build_sink, Sink, SinkCommitCoordinator, SinkParam}; -use risingwave_pb::connector_service::coordinate_request::CommitRequest; -use risingwave_pb::connector_service::coordinate_response::{ - CommitResponse, StartCoordinationResponse, -}; -use risingwave_pb::connector_service::{ - coordinate_request, coordinate_response, CoordinateRequest, CoordinateResponse, SinkMetadata, -}; +use risingwave_pb::connector_service::SinkMetadata; use thiserror_ext::AsReport; +use tokio::select; use tokio::sync::mpsc::UnboundedReceiver; +use tokio::time::sleep; use tonic::Status; use tracing::{error, warn}; -use crate::manager::sink_coordination::{ - NewSinkWriterRequest, SinkCoordinatorResponseSender, SinkWriterRequestStream, -}; +use crate::manager::sink_coordination::handle::SinkWriterCoordinationHandle; -macro_rules! send_await_with_err_check { - ($tx:expr, $msg:expr) => { - if $tx.send($msg).await.is_err() { - error!("unable to send msg"); +async fn run_future_with_periodic_fn( + future: F, + interval: Duration, + mut f: impl FnMut(), +) -> F::Output { + pin_mut!(future); + loop { + match select(&mut future, pin!(sleep(interval))).await { + Either::Left((output, _)) => { + break output; + } + Either::Right(_) => f(), } - }; + } } -pub struct CoordinatorWorker { +struct EpochCommitRequests { + epoch: u64, + metadatas: Vec, + handle_ids: HashSet, + bitmap: Bitmap, +} + +impl EpochCommitRequests { + fn new(epoch: u64) -> Self { + Self { + epoch, + metadatas: vec![], + handle_ids: Default::default(), + bitmap: Bitmap::zeros(VirtualNode::COUNT), + } + } + + fn add_new_request( + &mut self, + handle_id: usize, + metadata: SinkMetadata, + vnode_bitmap: Bitmap, + ) -> anyhow::Result<()> { + self.metadatas.push(metadata); + assert!(self.handle_ids.insert(handle_id)); + let check_bitmap = (&self.bitmap) & &vnode_bitmap; + if check_bitmap.count_ones() > 0 { + return Err(anyhow!( + "duplicate vnode {:?} on epoch {}. request vnode: {:?}, prev vnode: {:?}", + check_bitmap.iter_ones().collect_vec(), + self.epoch, + vnode_bitmap, + self.bitmap + )); + } + self.bitmap |= &vnode_bitmap; + Ok(()) + } + + fn can_commit(&self) -> bool { + self.bitmap.count_ones() == VirtualNode::COUNT + } +} + +struct CoordinationHandleManager { param: SinkParam, - request_streams: Vec, - response_senders: Vec, - request_rx: UnboundedReceiver, + writer_handles: HashMap, + next_handle_id: usize, + request_rx: UnboundedReceiver, +} + +impl CoordinationHandleManager { + fn ack_commit( + &mut self, + epoch: u64, + handle_ids: impl IntoIterator, + ) -> anyhow::Result<()> { + for handle_id in handle_ids { + let handle = self.writer_handles.get_mut(&handle_id).ok_or_else(|| { + anyhow!( + "fail to find handle for {} when ack commit on epoch {}", + handle_id, + epoch + ) + })?; + handle.ack_commit(epoch).map_err(|_| { + anyhow!( + "fail to ack commit on epoch {} for handle {}", + epoch, + handle_id + ) + })?; + } + Ok(()) + } + + async fn next_commit_request_inner( + writer_handles: &mut HashMap, + ) -> anyhow::Result<(usize, Bitmap, u64, SinkMetadata)> { + poll_fn(|cx| 'outer: loop { + for (handle_id, handle) in writer_handles.iter_mut() { + if let Poll::Ready(result) = handle.poll_next_commit_request(cx) { + match result { + Ok(Some((epoch, metadata))) => { + return Poll::Ready(Ok(( + *handle_id, + handle.vnode_bitmap().clone(), + epoch, + metadata, + ))); + } + Ok(None) => { + let handle_id = *handle_id; + writer_handles.remove(&handle_id); + continue 'outer; + } + Err(e) => { + return Poll::Ready(Err(e)); + } + } + } + } + return Poll::Pending; + }) + .await + } + + async fn next_commit_request(&mut self) -> anyhow::Result<(usize, Bitmap, u64, SinkMetadata)> { + loop { + select! { + handle = self.request_rx.recv() => { + let mut handle = handle.ok_or_else(|| anyhow!("end of writer request stream"))?; + if handle.param() != &self.param { + warn!(prev_param = ?self.param, new_param = ?handle.param(), "sink param mismatch"); + } + handle.start()?; + let handle_id = self.next_handle_id; + self.next_handle_id += 1; + self.writer_handles.insert(handle_id, handle); + } + result = Self::next_commit_request_inner(&mut self.writer_handles) => { + break result; + } + } + } + } +} + +pub struct CoordinatorWorker { + handle_manager: CoordinationHandleManager, + pending_epochs: BTreeMap, } impl CoordinatorWorker { pub async fn run( - first_writer_request: NewSinkWriterRequest, - request_rx: UnboundedReceiver, + param: SinkParam, + request_rx: UnboundedReceiver, ) { - let sink = match build_sink(first_writer_request.param.clone()) { + let sink = match build_sink(param.clone()) { Ok(sink) => sink, Err(e) => { error!( error = %e.as_report(), "unable to build sink with param {:?}", - first_writer_request.param - ); - send_await_with_err_check!( - first_writer_request.response_tx, - Err(Status::invalid_argument("failed to build sink")) + param ); return; } @@ -81,247 +208,77 @@ impl CoordinatorWorker { error!( error = %e.as_report(), "unable to build coordinator with param {:?}", - first_writer_request.param - ); - send_await_with_err_check!( - first_writer_request.response_tx, - Err(Status::invalid_argument("failed to build coordinator")) + param ); return; } }; - Self::execute_coordinator(first_writer_request, request_rx, coordinator).await + Self::execute_coordinator(param, request_rx, coordinator).await }); } pub async fn execute_coordinator( - first_writer_request: NewSinkWriterRequest, - request_rx: UnboundedReceiver, + param: SinkParam, + request_rx: UnboundedReceiver, coordinator: impl SinkCommitCoordinator, ) { let mut worker = CoordinatorWorker { - param: first_writer_request.param, - request_streams: vec![first_writer_request.request_stream], - response_senders: vec![first_writer_request.response_tx], - request_rx, + handle_manager: CoordinationHandleManager { + param, + writer_handles: HashMap::new(), + next_handle_id: 0, + request_rx, + }, + pending_epochs: Default::default(), }; - if let Err(e) = worker - .wait_for_writers(first_writer_request.vnode_bitmap) - .await - { - error!(error = %e.as_report(), "failed to wait for all writers"); - worker - .send_to_all_sink_writers(|| { - Err(Status::cancelled("failed to wait for all writers")) - }) - .await; - } - - worker.start_coordination(coordinator).await; - } - - async fn send_to_all_sink_writers( - &mut self, - new_msg: impl Fn() -> Result, - ) { - for sender in &self.response_senders { - send_await_with_err_check!(sender, new_msg()); - } - } - - async fn next_new_writer(&mut self) -> anyhow::Result { - // TODO: add timeout log - match select( - pin!(self.request_rx.recv()), - pin!(FuturesUnordered::from_iter( - self.request_streams - .iter_mut() - .map(|stream| stream.try_next()), - ) - .next()), - ) - .await - { - Either::Left((Some(req), _)) => Ok(req), - Either::Left((None, _)) => Err(anyhow!("manager request stream reaches the end")), - Either::Right((Some(Ok(Some(request))), _)) => Err(anyhow!( - "get new request from sink writer before initialize: {:?}", - request - )), - Either::Right((Some(Ok(None)), _)) => Err(anyhow!( - "one sink writer stream reaches the end before initialize" - )), - Either::Right((Some(Err(e)), _)) => { - Err(anyhow!(e).context("unable to poll one sink writer stream")) + if let Err(e) = worker.run_coordination(coordinator).await { + for handle in worker.handle_manager.writer_handles.into_values() { + handle.abort(Status::internal(format!( + "failed to run coordination: {:?}", + e.as_report() + ))) } - Either::Right((None, _)) => unreachable!("request_streams must not be empty"), } } - async fn wait_for_writers(&mut self, first_vnode_bitmap: Bitmap) -> anyhow::Result<()> { - let mut remaining_count = VirtualNode::COUNT; - let mut registered_vnode = HashSet::with_capacity(VirtualNode::COUNT); - - for vnode in first_vnode_bitmap.iter_vnodes() { - remaining_count -= 1; - registered_vnode.insert(vnode); - } - - while remaining_count > 0 { - let new_writer_request = self.next_new_writer().await?; - if self.param != new_writer_request.param { - // TODO: may return error. - warn!( - "get different param {:?} while current param {:?}", - new_writer_request.param, self.param - ); - } - self.request_streams.push(new_writer_request.request_stream); - self.response_senders.push(new_writer_request.response_tx); - - for vnode in new_writer_request.vnode_bitmap.iter_vnodes() { - if registered_vnode.contains(&vnode) { - return Err(anyhow!( - "get overlapped vnode: {}, current vnode {:?}", - vnode, - registered_vnode - )); - } - registered_vnode.insert(vnode); - remaining_count -= 1; - } - } - - self.send_to_all_sink_writers(|| { - Ok(CoordinateResponse { - msg: Some(coordinate_response::Msg::StartResponse( - StartCoordinationResponse {}, - )), - }) - }) - .await; - Ok(()) - } - - async fn collect_all_metadata(&mut self) -> anyhow::Result<(u64, Vec)> { - let mut epoch = None; - let mut metadata_list = Vec::with_capacity(self.request_streams.len()); - let mut uncollected_futures = FuturesUnordered::from_iter( - self.request_streams - .iter_mut() - .map(|stream| stream.try_next()), - ); - + async fn run_coordination( + &mut self, + mut coordinator: impl SinkCommitCoordinator, + ) -> anyhow::Result<()> { + coordinator.init().await?; loop { - match select( - pin!(self.request_rx.recv()), - pin!(uncollected_futures.next()), - ) - .await + let (handle_id, vnode_bitmap, epoch, metadata) = + self.handle_manager.next_commit_request().await?; + self.pending_epochs + .entry(epoch) + .or_insert_with(|| EpochCommitRequests::new(epoch)) + .add_new_request(handle_id, metadata, vnode_bitmap)?; + if self + .pending_epochs + .first_key_value() + .expect("non-empty") + .1 + .can_commit() { - Either::Left((Some(new_request), _)) => { - warn!("get new writer request while collecting metadata"); - send_await_with_err_check!( - new_request.response_tx, - Err(Status::already_exists( - "coordinator already running, should not get new request" - )) - ); - continue; - } - Either::Left((None, _)) => { - return Err(anyhow!( - "coordinator get notified to stop while collecting metadata" - )); - } - Either::Right((Some(next_result), _)) => match next_result { - Ok(Some(CoordinateRequest { - msg: - Some(coordinate_request::Msg::CommitRequest(CommitRequest { - epoch: request_epoch, - metadata: Some(metadata), - })), - })) => { - match &epoch { - Some(epoch) => { - if *epoch != request_epoch { - warn!( - "current epoch is {} but get request from {}", - epoch, request_epoch - ); - } - } - None => { - epoch = Some(request_epoch); - } - } - metadata_list.push(metadata); - } - Ok(Some(req)) => { - return Err(anyhow!("expect commit request but get {:?}", req)); - } - Ok(None) => { - return Err(anyhow!( - "sink writer input reaches the end while collecting metadata" - )); - } - Err(e) => { - return Err( - anyhow!(e).context("failed to poll one of the writer request streams") - ); - } - }, - Either::Right((None, _)) => { - break; - } - } - } - Ok(( - epoch.expect("should not be empty when have at least one writer"), - metadata_list, - )) - } - - async fn start_coordination(&mut self, mut coordinator: impl SinkCommitCoordinator) { - let result: Result<(), String> = try { - coordinator.init().await.map_err(|e| { - error!(error = %e.as_report(), "failed to initialize coordinator"); - format!("failed to initialize coordinator: {:?}", e.as_report()) - })?; - loop { - let (epoch, metadata_list) = self.collect_all_metadata().await.map_err(|e| { - error!(error = %e.as_report(), "failed to collect all metadata"); - format!("failed to collect all metadata: {:?}", e.as_report()) - })?; + let (epoch, requests) = self.pending_epochs.pop_first().expect("non-empty"); // TODO: measure commit time - coordinator - .commit(epoch, metadata_list) - .await - .map_err(|e| { - error!(epoch, error = %e.as_report(), "failed to commit metadata of epoch"); - format!("failed to commit: {:?}", e.as_report()) - })?; - - self.send_to_all_sink_writers(|| { - Ok(CoordinateResponse { - msg: Some(coordinate_response::Msg::CommitResponse(CommitResponse { - epoch, - })), - }) - }) - .await; + let start_time = Instant::now(); + run_future_with_periodic_fn( + coordinator.commit(epoch, requests.metadatas), + Duration::from_secs(5), + || { + warn!( + elapsed = ?start_time.elapsed(), + sink_id = self.handle_manager.param.sink_id.sink_id, + "committing" + ); + }, + ) + .await + .map_err(|e| anyhow!(e))?; + self.handle_manager.ack_commit(epoch, requests.handle_ids)?; } - }; - - if let Err(err_str) = result { - self.send_to_all_sink_writers(|| { - Err(Status::aborted(format!( - "failed to run coordination: {}", - err_str - ))) - }) - .await; } } } diff --git a/src/meta/src/manager/sink_coordination/handle.rs b/src/meta/src/manager/sink_coordination/handle.rs new file mode 100644 index 0000000000000..60b49cfd623ab --- /dev/null +++ b/src/meta/src/manager/sink_coordination/handle.rs @@ -0,0 +1,139 @@ +// Copyright 2024 RisingWave Labs +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use std::pin::pin; +use std::task::{Context, Poll}; + +use anyhow::anyhow; +use futures::{Future, TryStreamExt}; +use risingwave_common::bitmap::Bitmap; +use risingwave_connector::sink::SinkParam; +use risingwave_pb::connector_service::coordinate_response::{ + CommitResponse, StartCoordinationResponse, +}; +use risingwave_pb::connector_service::{ + coordinate_request, coordinate_response, CoordinateResponse, SinkMetadata, +}; +use tonic::Status; + +use crate::manager::sink_coordination::{SinkCoordinatorResponseSender, SinkWriterRequestStream}; + +pub(super) struct SinkWriterCoordinationHandle { + request_stream: SinkWriterRequestStream, + response_tx: SinkCoordinatorResponseSender, + param: SinkParam, + vnode_bitmap: Bitmap, + prev_epoch: Option, +} + +impl SinkWriterCoordinationHandle { + pub(super) fn new( + request_stream: SinkWriterRequestStream, + response_tx: SinkCoordinatorResponseSender, + param: SinkParam, + vnode_bitmap: Bitmap, + ) -> Self { + Self { + request_stream, + response_tx, + param, + vnode_bitmap, + prev_epoch: None, + } + } + + pub(super) fn param(&self) -> &SinkParam { + &self.param + } + + pub(super) fn vnode_bitmap(&self) -> &Bitmap { + &self.vnode_bitmap + } + + pub(super) fn start(&mut self) -> anyhow::Result<()> { + self.response_tx + .send(Ok(CoordinateResponse { + msg: Some(coordinate_response::Msg::StartResponse( + StartCoordinationResponse {}, + )), + })) + .map_err(|_| anyhow!("fail to send start response")) + } + + pub(super) fn abort(self, status: Status) { + let _ = self.response_tx.send(Err(status)); + } + + pub(super) fn ack_commit(&mut self, epoch: u64) -> anyhow::Result<()> { + self.response_tx + .send(Ok(CoordinateResponse { + msg: Some(coordinate_response::Msg::CommitResponse(CommitResponse { + epoch, + })), + })) + .map_err(|_| anyhow!("fail to send commit response of epoch {}", epoch)) + } + + pub(super) fn poll_next_commit_request( + &mut self, + cx: &mut Context<'_>, + ) -> Poll>> { + let future = self.next_commit_request(); + let future = pin!(future); + future.poll(cx) + } + + async fn next_commit_request(&mut self) -> anyhow::Result> { + loop { + let request = self + .request_stream + .try_next() + .await? + .ok_or_else(|| anyhow!("end of request stream"))?; + match request.msg.ok_or_else(|| anyhow!("None msg in request"))? { + coordinate_request::Msg::StartRequest(_) => { + return Err(anyhow!("should have started")); + } + coordinate_request::Msg::CommitRequest(request) => { + if let Some(prev_epoch) = self.prev_epoch { + if request.epoch < prev_epoch { + return Err(anyhow!( + "invalid commit epoch {}, prev_epoch {}", + request.epoch, + prev_epoch + )); + } + } + let Some(metadata) = request.metadata else { + return Err(anyhow!("empty commit metadata")); + }; + self.prev_epoch = Some(request.epoch); + return Ok(Some((request.epoch, metadata))); + } + coordinate_request::Msg::UpdateVnodeRequest(request) => { + let bitmap = Bitmap::from( + &request + .vnode_bitmap + .ok_or_else(|| anyhow!("empty vnode bitmap"))?, + ); + self.vnode_bitmap = bitmap; + continue; + } + coordinate_request::Msg::Stop(_) => { + return Ok(None); + } + } + } + } +} diff --git a/src/meta/src/manager/sink_coordination/manager.rs b/src/meta/src/manager/sink_coordination/manager.rs index fd2b986be28e7..2fe2e8bfb3b8c 100644 --- a/src/meta/src/manager/sink_coordination/manager.rs +++ b/src/meta/src/manager/sink_coordination/manager.rs @@ -12,7 +12,6 @@ // See the License for the specific language governing permissions and // limitations under the License. -use std::collections::hash_map::Entry; use std::collections::HashMap; use std::pin::pin; @@ -30,12 +29,13 @@ use tokio::sync::mpsc; use tokio::sync::mpsc::{unbounded_channel, UnboundedReceiver, UnboundedSender}; use tokio::sync::oneshot::{channel, Receiver, Sender}; use tokio::task::{JoinError, JoinHandle}; -use tokio_stream::wrappers::ReceiverStream; +use tokio_stream::wrappers::UnboundedReceiverStream; use tonic::Status; use tracing::{debug, error, info, warn}; use crate::manager::sink_coordination::coordinator_worker::CoordinatorWorker; -use crate::manager::sink_coordination::{NewSinkWriterRequest, SinkWriterRequestStream}; +use crate::manager::sink_coordination::handle::SinkWriterCoordinationHandle; +use crate::manager::sink_coordination::SinkWriterRequestStream; macro_rules! send_with_err_check { ($tx:expr, $msg:expr) => { @@ -56,7 +56,7 @@ macro_rules! send_await_with_err_check { const BOUNDED_CHANNEL_SIZE: usize = 16; enum ManagerRequest { - NewSinkWriter(NewSinkWriterRequest), + NewSinkWriter(SinkWriterCoordinationHandle), StopCoordinator { finish_notifier: Sender<()>, /// sink id to stop. When `None`, stop all sink coordinator @@ -71,11 +71,8 @@ pub struct SinkCoordinatorManager { impl SinkCoordinatorManager { pub fn start_worker() -> (Self, (JoinHandle<()>, Sender<()>)) { - Self::start_worker_with_spawn_worker(|writer_request, manager_request_stream| { - tokio::spawn(CoordinatorWorker::run( - writer_request, - manager_request_stream, - )) + Self::start_worker_with_spawn_worker(|param, manager_request_stream| { + tokio::spawn(CoordinatorWorker::run(param, manager_request_stream)) }) } @@ -111,14 +108,11 @@ impl SinkCoordinatorManager { ))); } }; - let (response_tx, response_rx) = mpsc::channel(BOUNDED_CHANNEL_SIZE); + let (response_tx, response_rx) = mpsc::unbounded_channel(); self.request_tx - .send(ManagerRequest::NewSinkWriter(NewSinkWriterRequest { - request_stream, - response_tx, - param, - vnode_bitmap, - })) + .send(ManagerRequest::NewSinkWriter( + SinkWriterCoordinationHandle::new(request_stream, response_tx, param, vnode_bitmap), + )) .await .map_err(|_| { Status::unavailable( @@ -126,7 +120,7 @@ impl SinkCoordinatorManager { ) })?; - Ok(ReceiverStream::new(response_rx)) + Ok(UnboundedReceiverStream::new(response_rx)) } async fn stop_coordinator(&self, sink_id: Option) { @@ -155,7 +149,7 @@ impl SinkCoordinatorManager { struct CoordinatorWorkerHandle { /// Sender to coordinator worker. Drop the sender as a stop signal - request_sender: Option>, + request_sender: Option>, /// Notify when the coordinator worker stops finish_notifiers: Vec>, } @@ -163,7 +157,7 @@ struct CoordinatorWorkerHandle { struct ManagerWorker { request_rx: mpsc::Receiver, // Make it option so that it can be polled with &mut SinkManagerWorker - shutdown_rx: Option>, + shutdown_rx: Receiver<()>, running_coordinator_worker_join_handles: FuturesUnordered)>>, @@ -178,7 +172,7 @@ enum ManagerEvent { }, } -trait SpawnCoordinatorFn = FnMut(NewSinkWriterRequest, UnboundedReceiver) -> JoinHandle<()> +trait SpawnCoordinatorFn = FnMut(SinkParam, UnboundedReceiver) -> JoinHandle<()> + Send + 'static; @@ -186,7 +180,7 @@ impl ManagerWorker { fn new(request_rx: mpsc::Receiver, shutdown_rx: Receiver<()>) -> Self { ManagerWorker { request_rx, - shutdown_rx: Some(shutdown_rx), + shutdown_rx, running_coordinator_worker_join_handles: Default::default(), running_coordinator_worker: Default::default(), } @@ -237,7 +231,6 @@ impl ManagerWorker { } async fn next_event(&mut self) -> Option { - let shutdown_rx = self.shutdown_rx.take().expect("should not be empty"); match select( select( pin!(self.request_rx.recv()), @@ -245,23 +238,20 @@ impl ManagerWorker { self.running_coordinator_worker_join_handles.next() )), ), - shutdown_rx, + &mut self.shutdown_rx, ) .await { - Either::Left((either, shutdown_rx)) => { - self.shutdown_rx = Some(shutdown_rx); - match either { - Either::Left((Some(request), _)) => Some(ManagerEvent::NewRequest(request)), - Either::Left((None, _)) => None, - Either::Right(((sink_id, join_result), _)) => { - Some(ManagerEvent::CoordinatorWorkerFinished { - sink_id, - join_result, - }) - } + Either::Left((either, _)) => match either { + Either::Left((Some(request), _)) => Some(ManagerEvent::NewRequest(request)), + Either::Left((None, _)) => None, + Either::Right(((sink_id, join_result), _)) => { + Some(ManagerEvent::CoordinatorWorkerFinished { + sink_id, + join_result, + }) } - } + }, Either::Right(_) => None, } } @@ -309,39 +299,39 @@ impl ManagerWorker { fn handle_new_sink_writer( &mut self, - request: NewSinkWriterRequest, + new_writer: SinkWriterCoordinationHandle, spawn_coordinator_worker: &mut impl SpawnCoordinatorFn, ) { - let param = &request.param; + let param = new_writer.param(); let sink_id = param.sink_id; - // Launch the coordinator worker task if it is the first - match self.running_coordinator_worker.entry(param.sink_id) { - Entry::Occupied(mut entry) => { - if let Some(sender) = entry.get_mut().request_sender.as_mut() { - send_with_err_check!(sender, request); - } else { - warn!( - "handle a new request while the sink coordinator is being stopped: {:?}", - param - ); - drop(request.response_tx); - } - } - Entry::Vacant(entry) => { + let handle = self + .running_coordinator_worker + .entry(param.sink_id) + .or_insert_with(|| { + // Launch the coordinator worker task if it is the first let (request_tx, request_rx) = unbounded_channel(); - let join_handle = spawn_coordinator_worker(request, request_rx); + let join_handle = spawn_coordinator_worker(param.clone(), request_rx); self.running_coordinator_worker_join_handles.push( join_handle .map(move |join_result| (sink_id, join_result)) .boxed(), ); - entry.insert(CoordinatorWorkerHandle { + CoordinatorWorkerHandle { request_sender: Some(request_tx), finish_notifiers: Vec::new(), - }); - } - }; + } + }); + + if let Some(sender) = handle.request_sender.as_mut() { + send_with_err_check!(sender, new_writer); + } else { + warn!( + "handle a new request while the sink coordinator is being stopped: {:?}", + param + ); + new_writer.abort(Status::internal("the sink is being stopped")); + } } } @@ -357,7 +347,7 @@ mod tests { use futures::{FutureExt, StreamExt}; use itertools::Itertools; use rand::seq::SliceRandom; - use risingwave_common::bitmap::{Bitmap, BitmapBuilder}; + use risingwave_common::bitmap::BitmapBuilder; use risingwave_common::hash::VirtualNode; use risingwave_connector::sink::catalog::{SinkId, SinkType}; use risingwave_connector::sink::{SinkCommitCoordinator, SinkError, SinkParam}; @@ -367,7 +357,7 @@ mod tests { use tokio_stream::wrappers::ReceiverStream; use crate::manager::sink_coordination::coordinator_worker::CoordinatorWorker; - use crate::manager::sink_coordination::{NewSinkWriterRequest, SinkCoordinatorManager}; + use crate::manager::sink_coordination::SinkCoordinatorManager; struct MockCoordinator, &mut C) -> Result<(), SinkError>> { context: C, @@ -434,16 +424,16 @@ mod tests { let (manager, (_join_handle, _stop_tx)) = SinkCoordinatorManager::start_worker_with_spawn_worker({ - let param = param.clone(); + let expected_param = param.clone(); let metadata = metadata.clone(); - move |first_request: NewSinkWriterRequest, new_writer_rx| { - let param = param.clone(); + move |param, new_writer_rx| { let metadata = metadata.clone(); + let expected_param = expected_param.clone(); tokio::spawn(async move { // validate the start request - assert_eq!(first_request.param, param); + assert_eq!(param, expected_param); CoordinatorWorker::execute_coordinator( - first_request, + param.clone(), new_writer_rx, MockCoordinator::new(0, |epoch, metadata_list, count: &mut usize| { *count += 1; @@ -497,14 +487,8 @@ mod tests { .unwrap() }; - let mut build_client_future1 = pin!(build_client(vnode1)); - assert!( - poll_fn(|cx| Poll::Ready(build_client_future1.as_mut().poll(cx))) - .await - .is_pending() - ); let (mut client1, mut client2) = - join(build_client_future1, pin!(build_client(vnode2))).await; + join(build_client(vnode1), pin!(build_client(vnode2))).await; { // commit epoch1 @@ -598,16 +582,16 @@ mod tests { let (manager, (_join_handle, _stop_tx)) = SinkCoordinatorManager::start_worker_with_spawn_worker({ - let param = param.clone(); + let expected_param = param.clone(); let metadata = metadata.clone(); - move |first_request: NewSinkWriterRequest, new_writer_rx| { - let param = param.clone(); + move |param, new_writer_rx| { let metadata = metadata.clone(); + let expected_param = expected_param.clone(); tokio::spawn(async move { // validate the start request - assert_eq!(first_request.param, param); + assert_eq!(param, expected_param); CoordinatorWorker::execute_coordinator( - first_request, + param.clone(), new_writer_rx, MockCoordinator::new(0, |epoch, metadata_list, count: &mut usize| { *count += 1; @@ -686,46 +670,6 @@ mod tests { .unwrap(); } - #[tokio::test] - async fn test_drop_sink_while_init() { - let sink_id = SinkId::from(1); - let param = SinkParam { - sink_id, - sink_name: "test".into(), - properties: Default::default(), - columns: vec![], - downstream_pk: vec![], - sink_type: SinkType::AppendOnly, - format_desc: None, - db_name: "test".into(), - sink_from_name: "test".into(), - }; - - let (manager, (_join_handle, _stop_tx)) = SinkCoordinatorManager::start_worker(); - - let mut build_client_future1 = pin!(CoordinatorStreamHandle::new_with_init_stream( - param.to_proto(), - Bitmap::zeros(VirtualNode::COUNT), - |rx| async { - Ok(tonic::Response::new( - manager - .handle_new_request(ReceiverStream::new(rx).map(Ok).boxed()) - .await - .unwrap() - .boxed(), - )) - }, - )); - assert!( - poll_fn(|cx| Poll::Ready(build_client_future1.as_mut().poll(cx))) - .await - .is_pending() - ); - manager.stop_sink_coordinator(sink_id).await; - - assert!(build_client_future1.await.is_err()); - } - #[tokio::test] async fn test_partial_commit() { let param = SinkParam { @@ -757,14 +701,14 @@ mod tests { let (manager, (_join_handle, _stop_tx)) = SinkCoordinatorManager::start_worker_with_spawn_worker({ - let param = param.clone(); - move |first_request: NewSinkWriterRequest, new_writer_rx| { - let param = param.clone(); + let expected_param = param.clone(); + move |param, new_writer_rx| { + let expected_param = expected_param.clone(); tokio::spawn(async move { // validate the start request - assert_eq!(first_request.param, param); + assert_eq!(param, expected_param); CoordinatorWorker::execute_coordinator( - first_request, + param, new_writer_rx, MockCoordinator::new((), |_, _, _| unreachable!()), ) @@ -836,14 +780,14 @@ mod tests { let (manager, (_join_handle, _stop_tx)) = SinkCoordinatorManager::start_worker_with_spawn_worker({ - let param = param.clone(); - move |first_request: NewSinkWriterRequest, new_writer_rx| { - let param = param.clone(); + let expected_param = param.clone(); + move |param, new_writer_rx| { + let expected_param = expected_param.clone(); tokio::spawn(async move { // validate the start request - assert_eq!(first_request.param, param); + assert_eq!(param, expected_param); CoordinatorWorker::execute_coordinator( - first_request, + param, new_writer_rx, MockCoordinator::new((), |_, _, _| { Err(SinkError::Coordinator(anyhow!("failed to commit"))) @@ -897,4 +841,269 @@ mod tests { assert!(result1.is_err()); assert!(result2.is_err()); } + + #[tokio::test] + async fn test_update_vnode_bitmap() { + let param = SinkParam { + sink_id: SinkId::from(1), + sink_name: "test".into(), + properties: Default::default(), + columns: vec![], + downstream_pk: vec![], + sink_type: SinkType::AppendOnly, + format_desc: None, + db_name: "test".into(), + sink_from_name: "test".into(), + }; + + let epoch1 = 233; + let epoch2 = 234; + let epoch3 = 235; + let epoch4 = 236; + + let mut all_vnode = (0..VirtualNode::COUNT).collect_vec(); + all_vnode.shuffle(&mut rand::thread_rng()); + let (first, second) = all_vnode.split_at(VirtualNode::COUNT / 2); + let build_bitmap = |indexes: &[usize]| { + let mut builder = BitmapBuilder::zeroed(VirtualNode::COUNT); + for i in indexes { + builder.set(*i, true); + } + builder.finish() + }; + let vnode1 = build_bitmap(first); + let vnode2 = build_bitmap(second); + + let metadata = [ + [vec![1u8, 2u8], vec![3u8, 4u8]], + [vec![5u8, 6u8], vec![7u8, 8u8]], + ]; + + let metadata_scale_out = [vec![9u8, 10u8], vec![11u8, 12u8], vec![13u8, 14u8]]; + let metadata_scale_in = [vec![13u8, 14u8], vec![15u8, 16u8]]; + + let (manager, (_join_handle, _stop_tx)) = + SinkCoordinatorManager::start_worker_with_spawn_worker({ + let expected_param = param.clone(); + let metadata = metadata.clone(); + let metadata_scale_out = metadata_scale_out.clone(); + let metadata_scale_in = metadata_scale_in.clone(); + move |param, new_writer_rx| { + let metadata = metadata.clone(); + let metadata_scale_out = metadata_scale_out.clone(); + let metadata_scale_in = metadata_scale_in.clone(); + let expected_param = expected_param.clone(); + tokio::spawn(async move { + // validate the start request + assert_eq!(param, expected_param); + CoordinatorWorker::execute_coordinator( + param.clone(), + new_writer_rx, + MockCoordinator::new(0, |epoch, metadata_list, count: &mut usize| { + *count += 1; + let mut metadata_list = metadata_list + .into_iter() + .map(|metadata| match metadata { + SinkMetadata { + metadata: + Some(Metadata::Serialized(SerializedMetadata { + metadata, + })), + } => metadata, + _ => unreachable!(), + }) + .collect_vec(); + metadata_list.sort(); + let (expected_epoch, expected_metadata_list) = match *count { + 1 => (epoch1, metadata[0].as_slice()), + 2 => (epoch2, metadata[1].as_slice()), + 3 => (epoch3, metadata_scale_out.as_slice()), + 4 => (epoch4, metadata_scale_in.as_slice()), + _ => unreachable!(), + }; + assert_eq!(expected_epoch, epoch); + assert_eq!(expected_metadata_list, &metadata_list); + Ok(()) + }), + ) + .await; + }) + } + }); + + let build_client = |vnode| async { + CoordinatorStreamHandle::new_with_init_stream(param.to_proto(), vnode, |rx| async { + Ok(tonic::Response::new( + manager + .handle_new_request(ReceiverStream::new(rx).map(Ok).boxed()) + .await + .unwrap() + .boxed(), + )) + }) + .await + .unwrap() + }; + + let (mut client1, mut client2) = + join(build_client(vnode1), pin!(build_client(vnode2))).await; + + { + // commit epoch1 + let mut commit_future = pin!(client2 + .commit( + epoch1, + SinkMetadata { + metadata: Some(Metadata::Serialized(SerializedMetadata { + metadata: metadata[0][1].clone(), + })), + }, + ) + .map(|result| result.unwrap())); + assert!(poll_fn(|cx| Poll::Ready(commit_future.as_mut().poll(cx))) + .await + .is_pending()); + join( + commit_future, + client1 + .commit( + epoch1, + SinkMetadata { + metadata: Some(Metadata::Serialized(SerializedMetadata { + metadata: metadata[0][0].clone(), + })), + }, + ) + .map(|result| result.unwrap()), + ) + .await; + } + + let (vnode1, vnode2, vnode3) = { + let (first, second) = all_vnode.split_at(VirtualNode::COUNT / 3); + let (second, third) = second.split_at(VirtualNode::COUNT / 3); + ( + build_bitmap(first), + build_bitmap(second), + build_bitmap(third), + ) + }; + + let mut client3 = build_client(vnode3).await; + { + let mut commit_future3 = pin!(client3 + .commit( + epoch3, + SinkMetadata { + metadata: Some(Metadata::Serialized(SerializedMetadata { + metadata: metadata_scale_out[2].clone(), + })), + }, + ) + .map(|result| result.unwrap())); + assert!(poll_fn(|cx| Poll::Ready(commit_future3.as_mut().poll(cx))) + .await + .is_pending()); + + { + // commit epoch2 + let mut commit_future = pin!(client1 + .commit( + epoch2, + SinkMetadata { + metadata: Some(Metadata::Serialized(SerializedMetadata { + metadata: metadata[1][0].clone(), + })), + }, + ) + .map(|result| result.unwrap())); + assert!(poll_fn(|cx| Poll::Ready(commit_future.as_mut().poll(cx))) + .await + .is_pending()); + join( + commit_future, + client2 + .commit( + epoch2, + SinkMetadata { + metadata: Some(Metadata::Serialized(SerializedMetadata { + metadata: metadata[1][1].clone(), + })), + }, + ) + .map(|result| result.unwrap()), + ) + .await; + } + + client1.update_vnode_bitmap(&vnode1).await.unwrap(); + client2.update_vnode_bitmap(&vnode2).await.unwrap(); + let mut commit_future1 = pin!(client1 + .commit( + epoch3, + SinkMetadata { + metadata: Some(Metadata::Serialized(SerializedMetadata { + metadata: metadata_scale_out[0].clone(), + })), + }, + ) + .map(|result| result.unwrap())); + assert!(poll_fn(|cx| Poll::Ready(commit_future1.as_mut().poll(cx))) + .await + .is_pending()); + assert!(poll_fn(|cx| Poll::Ready(commit_future3.as_mut().poll(cx))) + .await + .is_pending()); + client2 + .commit( + epoch3, + SinkMetadata { + metadata: Some(Metadata::Serialized(SerializedMetadata { + metadata: metadata_scale_out[1].clone(), + })), + }, + ) + .map(|result| result.unwrap()) + .await; + } + + let (vnode2, vnode3) = { + let (first, second) = all_vnode.split_at(VirtualNode::COUNT / 3); + (build_bitmap(first), build_bitmap(second)) + }; + + // client1.stop().await.unwrap(); + client2.update_vnode_bitmap(&vnode2).await.unwrap(); + client3.update_vnode_bitmap(&vnode3).await.unwrap(); + + { + let mut commit_future = pin!(client2 + .commit( + epoch4, + SinkMetadata { + metadata: Some(Metadata::Serialized(SerializedMetadata { + metadata: metadata_scale_in[0].clone(), + })), + }, + ) + .map(|result| result.unwrap())); + assert!(poll_fn(|cx| Poll::Ready(commit_future.as_mut().poll(cx))) + .await + .is_pending()); + join( + commit_future, + client3 + .commit( + epoch4, + SinkMetadata { + metadata: Some(Metadata::Serialized(SerializedMetadata { + metadata: metadata_scale_in[1].clone(), + })), + }, + ) + .map(|result| result.unwrap()), + ) + .await; + } + } } diff --git a/src/meta/src/manager/sink_coordination/mod.rs b/src/meta/src/manager/sink_coordination/mod.rs index ab44965891d5f..2f5f4d6ba62b1 100644 --- a/src/meta/src/manager/sink_coordination/mod.rs +++ b/src/meta/src/manager/sink_coordination/mod.rs @@ -13,22 +13,14 @@ // limitations under the License. mod coordinator_worker; +mod handle; mod manager; use futures::stream::BoxStream; pub use manager::SinkCoordinatorManager; -use risingwave_common::bitmap::Bitmap; -use risingwave_connector::sink::SinkParam; use risingwave_pb::connector_service::{CoordinateRequest, CoordinateResponse}; -use tokio::sync::mpsc::Sender; +use tokio::sync::mpsc::UnboundedSender; use tonic::Status; pub type SinkWriterRequestStream = BoxStream<'static, Result>; -pub type SinkCoordinatorResponseSender = Sender>; - -pub struct NewSinkWriterRequest { - pub request_stream: SinkWriterRequestStream, - pub response_tx: SinkCoordinatorResponseSender, - pub param: SinkParam, - pub vnode_bitmap: Bitmap, -} +pub type SinkCoordinatorResponseSender = UnboundedSender>; diff --git a/src/meta/src/rpc/ddl_controller_v2.rs b/src/meta/src/rpc/ddl_controller_v2.rs index c097fa5acb5c6..5e83e49b767a7 100644 --- a/src/meta/src/rpc/ddl_controller_v2.rs +++ b/src/meta/src/rpc/ddl_controller_v2.rs @@ -146,7 +146,7 @@ impl DdlController { let internal_tables = fragment_graph.internal_tables().into_values().collect_vec(); let table_id_map = mgr .catalog_controller - .create_internal_table_catalog(streaming_job.id() as _, internal_tables) + .create_internal_table_catalog(&streaming_job, internal_tables) .await?; fragment_graph.refill_internal_table_ids(table_id_map); diff --git a/src/meta/src/stream/source_manager.rs b/src/meta/src/stream/source_manager.rs index 751ee92beebc1..68d74902c1c77 100644 --- a/src/meta/src/stream/source_manager.rs +++ b/src/meta/src/stream/source_manager.rs @@ -188,10 +188,9 @@ impl ConnectorSourceWorker

{ let source_is_up = |res: i64| { self.source_is_up.set(res); }; - let splits = self.enumerator.list_splits().await.map_err(|e| { + let splits = self.enumerator.list_splits().await.inspect_err(|_| { source_is_up(0); self.fail_cnt += 1; - e })?; source_is_up(1); self.fail_cnt = 0; @@ -589,21 +588,24 @@ where ) } -pub fn validate_assignment(assignment: &mut HashMap>) { +pub fn validate_assignment(assignment: &mut HashMap>) -> bool { + let mut dup_assignment_found_flag = false; + // check if one split is assign to multiple actors let mut split_to_actor = HashMap::new(); for (actor_id, splits) in &mut *assignment { - let _ = splits.iter().map(|split| { + for split in splits { split_to_actor .entry(split.id()) .or_insert_with(Vec::new) - .push(*actor_id) - }); + .push(*actor_id); + } } for (split_id, actor_ids) in &mut split_to_actor { if actor_ids.len() > 1 { tracing::warn!(split_id = ?split_id, actor_ids = ?actor_ids, "split is assigned to multiple actors"); + dup_assignment_found_flag = true; } // keep the first actor and remove the rest from the assignment for actor_id in actor_ids.iter().skip(1) { @@ -613,6 +615,8 @@ pub fn validate_assignment(assignment: &mut HashMap>) { .retain(|split| split.id() != *split_id); } } + + dup_assignment_found_flag } fn align_backfill_splits( @@ -1137,15 +1141,28 @@ impl SourceManager { /// The command will first updates `SourceExecutor`'s splits, and finally calls `Self::apply_source_change` /// to update states in `SourceManager`. async fn tick(&self) -> MetaResult<()> { - let split_assignment = { + let mut split_assignment = { let core_guard = self.core.lock().await; core_guard.reassign_splits().await? }; + let dup_assignment_flag = split_assignment + .iter_mut() + .map(|(_, assignment)| validate_assignment(assignment)) + .reduce(|a, b| a || b) + .unwrap_or(false); + if !split_assignment.is_empty() { let command = Command::SourceSplitAssignment(split_assignment); tracing::info!(command = ?command, "pushing down split assignment command"); - self.barrier_scheduler.run_command(command).await?; + if dup_assignment_flag { + tracing::warn!("duplicate split assignment found, wrap with pause and resume"); + self.barrier_scheduler + .run_config_change_command_with_pause(command) + .await?; + } else { + self.barrier_scheduler.run_command(command).await?; + } } Ok(()) @@ -1354,10 +1371,12 @@ mod tests { 1 => test_assignment, }; - fragment_assignment.iter_mut().for_each(|(_, assignment)| { - validate_assignment(assignment); - }); - + let dup_assignment_flag = fragment_assignment + .iter_mut() + .map(|(_, assignment)| validate_assignment(assignment)) + .reduce(|a, b| a || b) + .unwrap_or(false); + assert!(dup_assignment_flag); { let mut split_to_actor = HashMap::new(); for actor_to_splits in fragment_assignment.values() { diff --git a/src/object_store/src/lib.rs b/src/object_store/src/lib.rs index d9e768b7f0290..c70d38eb90a90 100644 --- a/src/object_store/src/lib.rs +++ b/src/object_store/src/lib.rs @@ -14,7 +14,6 @@ #![feature(trait_alias)] #![feature(type_alias_impl_trait)] -#![feature(lint_reasons)] #![feature(error_generic_member_access)] #![feature(let_chains)] diff --git a/src/prost/build.rs b/src/prost/build.rs index 63c6313e35956..0afbaef2ea730 100644 --- a/src/prost/build.rs +++ b/src/prost/build.rs @@ -180,6 +180,7 @@ fn main() -> Result<(), Box> { .type_attribute("hummock.GroupDestroy", "#[derive(Eq)]") .type_attribute("hummock.GroupMetaChange", "#[derive(Eq)]") .type_attribute("hummock.GroupTableChange", "#[derive(Eq)]") + .type_attribute("hummock.GroupMerge", "#[derive(Eq)]") .type_attribute("hummock.GroupDelta", "#[derive(Eq)]") .type_attribute("hummock.LevelHandler.RunningCompactTask", "#[derive(Eq)]") .type_attribute("hummock.LevelHandler", "#[derive(Eq)]") diff --git a/src/prost/src/lib.rs b/src/prost/src/lib.rs index c8ad9de582edc..e965f76282da4 100644 --- a/src/prost/src/lib.rs +++ b/src/prost/src/lib.rs @@ -15,7 +15,6 @@ // for derived code of `Message` #![expect(clippy::all)] #![expect(clippy::doc_markdown)] -#![feature(lint_reasons)] use std::str::FromStr; diff --git a/src/risedevtool/src/lib.rs b/src/risedevtool/src/lib.rs index 57294e5a7eafa..e7b2fdf56f777 100644 --- a/src/risedevtool/src/lib.rs +++ b/src/risedevtool/src/lib.rs @@ -15,7 +15,6 @@ #![allow(clippy::derive_partial_eq_without_eq)] #![feature(exit_status_error)] #![feature(let_chains)] -#![feature(lint_reasons)] mod config; pub use config::*; diff --git a/src/risedevtool/src/task/task_kafka_ready_check.rs b/src/risedevtool/src/task/task_kafka_ready_check.rs index 79838bf8eca66..b749822a1ebe2 100644 --- a/src/risedevtool/src/task/task_kafka_ready_check.rs +++ b/src/risedevtool/src/task/task_kafka_ready_check.rs @@ -42,7 +42,7 @@ impl Task for KafkaReadyCheckTask { let mut config = ClientConfig::new(); config.set( "bootstrap.servers", - &format!("{}:{}", self.config.address, self.config.port), + format!("{}:{}", self.config.address, self.config.port), ); let rt = tokio::runtime::Builder::new_current_thread() diff --git a/src/rpc_client/Cargo.toml b/src/rpc_client/Cargo.toml index 49729c6d9e8ac..6a25be3c21738 100644 --- a/src/rpc_client/Cargo.toml +++ b/src/rpc_client/Cargo.toml @@ -23,7 +23,7 @@ http = "1" hyper = "1" itertools = { workspace = true } lru = { workspace = true } -moka = { version = "0.12", features = ["future"] } +moka = { version = "0.12.0", features = ["future"] } paste = "1" rand = { workspace = true } risingwave_common = { workspace = true } diff --git a/src/rpc_client/src/hummock_meta_client.rs b/src/rpc_client/src/hummock_meta_client.rs index df42a0da3ff35..db99036a34754 100644 --- a/src/rpc_client/src/hummock_meta_client.rs +++ b/src/rpc_client/src/hummock_meta_client.rs @@ -38,7 +38,12 @@ pub trait HummockMetaClient: Send + Sync + 'static { async fn get_snapshot(&self) -> Result; async fn get_new_sst_ids(&self, number: u32) -> Result; // We keep `commit_epoch` only for test/benchmark. - async fn commit_epoch(&self, epoch: HummockEpoch, sync_result: SyncResult) -> Result<()>; + async fn commit_epoch( + &self, + epoch: HummockEpoch, + sync_result: SyncResult, + is_log_store: bool, + ) -> Result<()>; async fn report_vacuum_task(&self, vacuum_task: VacuumTask) -> Result<()>; async fn trigger_manual_compaction( &self, @@ -66,5 +71,9 @@ pub trait HummockMetaClient: Send + Sync + 'static { BoxStream<'static, CompactionEventItem>, )>; - async fn get_version_by_epoch(&self, epoch: HummockEpoch) -> Result; + async fn get_version_by_epoch( + &self, + epoch: HummockEpoch, + table_id: u32, + ) -> Result; } diff --git a/src/rpc_client/src/meta_client.rs b/src/rpc_client/src/meta_client.rs index db66e60c91eeb..67ea55269b2bd 100644 --- a/src/rpc_client/src/meta_client.rs +++ b/src/rpc_client/src/meta_client.rs @@ -1246,10 +1246,12 @@ impl MetaClient { &self, group_id: CompactionGroupId, table_ids_to_new_group: &[StateTableId], + partition_vnode_count: u32, ) -> Result { let req = SplitCompactionGroupRequest { group_id, table_ids: table_ids_to_new_group.to_vec(), + partition_vnode_count, }; let resp = self.inner.split_compaction_group(req).await?; Ok(resp.new_group_id) @@ -1432,8 +1434,12 @@ impl MetaClient { Ok(resp.ret) } - pub async fn get_version_by_epoch(&self, epoch: HummockEpoch) -> Result { - let req = GetVersionByEpochRequest { epoch }; + pub async fn get_version_by_epoch( + &self, + epoch: HummockEpoch, + table_id: u32, + ) -> Result { + let req = GetVersionByEpochRequest { epoch, table_id }; let resp = self.inner.get_version_by_epoch(req).await?; Ok(resp.version.unwrap()) } @@ -1445,6 +1451,19 @@ impl MetaClient { let resp = self.inner.get_cluster_limits(req).await?; Ok(resp.active_limits.into_iter().map(|l| l.into()).collect()) } + + pub async fn merge_compaction_group( + &self, + left_group_id: CompactionGroupId, + right_group_id: CompactionGroupId, + ) -> Result<()> { + let req = MergeCompactionGroupRequest { + left_group_id, + right_group_id, + }; + self.inner.merge_compaction_group(req).await?; + Ok(()) + } } #[async_trait] @@ -1512,7 +1531,12 @@ impl HummockMetaClient for MetaClient { Ok(SstObjectIdRange::new(resp.start_id, resp.end_id)) } - async fn commit_epoch(&self, _epoch: HummockEpoch, _sync_result: SyncResult) -> Result<()> { + async fn commit_epoch( + &self, + _epoch: HummockEpoch, + _sync_result: SyncResult, + _is_log_store: bool, + ) -> Result<()> { panic!("Only meta service can commit_epoch in production.") } @@ -1607,8 +1631,12 @@ impl HummockMetaClient for MetaClient { Ok((request_sender, Box::pin(stream))) } - async fn get_version_by_epoch(&self, epoch: HummockEpoch) -> Result { - self.get_version_by_epoch(epoch).await + async fn get_version_by_epoch( + &self, + epoch: HummockEpoch, + table_id: u32, + ) -> Result { + self.get_version_by_epoch(epoch, table_id).await } } @@ -2117,6 +2145,7 @@ macro_rules! for_all_meta_rpc { ,{ hummock_client, cancel_compact_task, CancelCompactTaskRequest, CancelCompactTaskResponse} ,{ hummock_client, list_change_log_epochs, ListChangeLogEpochsRequest, ListChangeLogEpochsResponse } ,{ hummock_client, get_version_by_epoch, GetVersionByEpochRequest, GetVersionByEpochResponse } + ,{ hummock_client, merge_compaction_group, MergeCompactionGroupRequest, MergeCompactionGroupResponse } ,{ user_client, create_user, CreateUserRequest, CreateUserResponse } ,{ user_client, update_user, UpdateUserRequest, UpdateUserResponse } ,{ user_client, drop_user, DropUserRequest, DropUserResponse } diff --git a/src/rpc_client/src/sink_coordinate_client.rs b/src/rpc_client/src/sink_coordinate_client.rs index 06602ef4db3b7..8823dd440bc77 100644 --- a/src/rpc_client/src/sink_coordinate_client.rs +++ b/src/rpc_client/src/sink_coordinate_client.rs @@ -18,7 +18,7 @@ use anyhow::anyhow; use futures::{Stream, TryStreamExt}; use risingwave_common::bitmap::Bitmap; use risingwave_pb::connector_service::coordinate_request::{ - CommitRequest, StartCoordinationRequest, + CommitRequest, StartCoordinationRequest, UpdateVnodeBitmapRequest, }; use risingwave_pb::connector_service::{ coordinate_request, coordinate_response, CoordinateRequest, CoordinateResponse, PbSinkParam, @@ -99,4 +99,24 @@ impl CoordinatorStreamHandle { msg => Err(anyhow!("should get commit response but get {:?}", msg)), } } + + pub async fn update_vnode_bitmap(&mut self, vnode_bitmap: &Bitmap) -> anyhow::Result<()> { + self.send_request(CoordinateRequest { + msg: Some(coordinate_request::Msg::UpdateVnodeRequest( + UpdateVnodeBitmapRequest { + vnode_bitmap: Some(vnode_bitmap.to_protobuf()), + }, + )), + }) + .await?; + Ok(()) + } + + pub async fn stop(&mut self) -> anyhow::Result<()> { + self.send_request(CoordinateRequest { + msg: Some(coordinate_request::Msg::Stop(true)), + }) + .await?; + Ok(()) + } } diff --git a/src/sqlparser/src/lib.rs b/src/sqlparser/src/lib.rs index a102e5428edae..07967d4cf75a7 100644 --- a/src/sqlparser/src/lib.rs +++ b/src/sqlparser/src/lib.rs @@ -31,7 +31,6 @@ //! ``` #![cfg_attr(not(feature = "std"), no_std)] -#![feature(lint_reasons)] #![feature(let_chains)] #![expect(clippy::doc_markdown)] #![expect(clippy::upper_case_acronyms)] diff --git a/src/storage/Cargo.toml b/src/storage/Cargo.toml index 2886c4e4e23f7..b321c43b99e43 100644 --- a/src/storage/Cargo.toml +++ b/src/storage/Cargo.toml @@ -36,7 +36,7 @@ libc = "0.2" lz4 = "1.25.0" memcomparable = "0.2" metrics-prometheus = "0.7" -moka = { version = "0.12", features = ["future", "sync"] } +moka = { version = "0.12.0", features = ["future", "sync"] } more-asserts = "0.3" num-integer = "0.1" parking_lot = { workspace = true } @@ -96,7 +96,7 @@ workspace-hack = { path = "../workspace-hack" } bincode = "1" criterion = { workspace = true, features = ["async_futures", "async_tokio"] } expect-test = "1" -risingwave_hummock_sdk = { workspace = true } +risingwave_hummock_sdk = { workspace = true, features = ["test"] } risingwave_test_runner = { workspace = true } uuid = { version = "1", features = ["v4"] } diff --git a/src/storage/backup/src/lib.rs b/src/storage/backup/src/lib.rs index 8dfba1b62a181..e543d139b44f0 100644 --- a/src/storage/backup/src/lib.rs +++ b/src/storage/backup/src/lib.rs @@ -17,7 +17,6 @@ #![feature(type_alias_impl_trait)] #![feature(extract_if)] #![feature(custom_test_frameworks)] -#![feature(lint_reasons)] #![feature(map_try_insert)] #![feature(hash_extract_if)] #![feature(btree_extract_if)] diff --git a/src/storage/benches/bench_table_watermarks.rs b/src/storage/benches/bench_table_watermarks.rs index 4a9e1c5edda0b..5153dd0f9fe38 100644 --- a/src/storage/benches/bench_table_watermarks.rs +++ b/src/storage/benches/bench_table_watermarks.rs @@ -166,7 +166,7 @@ fn bench_table_watermarks(c: &mut Criterion) { let mut pinned_version = PinnedVersion::new(versions.pop_front().unwrap(), unbounded_channel().0); while let Some(version) = versions.pop_front() { - pinned_version = pinned_version.new_pin_version(version); + pinned_version = pinned_version.new_pin_version(version).unwrap(); } }, BatchSize::SmallInput, diff --git a/src/storage/compactor/src/lib.rs b/src/storage/compactor/src/lib.rs index 22e70ac759aed..4c503f3d7a8d5 100644 --- a/src/storage/compactor/src/lib.rs +++ b/src/storage/compactor/src/lib.rs @@ -12,8 +12,6 @@ // See the License for the specific language governing permissions and // limitations under the License. -#![feature(lint_reasons)] - mod compactor_observer; mod rpc; pub mod server; diff --git a/src/storage/hummock_sdk/src/change_log.rs b/src/storage/hummock_sdk/src/change_log.rs index 433309acab930..c231b0eb6b7b5 100644 --- a/src/storage/hummock_sdk/src/change_log.rs +++ b/src/storage/hummock_sdk/src/change_log.rs @@ -16,32 +16,42 @@ use std::collections::HashMap; use risingwave_common::catalog::TableId; use risingwave_pb::hummock::hummock_version_delta::PbChangeLogDelta; -use risingwave_pb::hummock::{PbEpochNewChangeLog, PbTableChangeLog}; +use risingwave_pb::hummock::{PbEpochNewChangeLog, PbSstableInfo, PbTableChangeLog}; use tracing::warn; use crate::sstable_info::SstableInfo; #[derive(Debug, Clone, PartialEq)] -pub struct TableChangeLog(pub Vec); +pub struct TableChangeLogCommon(pub Vec>); + +pub type TableChangeLog = TableChangeLogCommon; #[derive(Debug, Clone, PartialEq)] -pub struct EpochNewChangeLog { - pub new_value: Vec, - pub old_value: Vec, +pub struct EpochNewChangeLogCommon { + pub new_value: Vec, + pub old_value: Vec, pub epochs: Vec, } -impl From<&EpochNewChangeLog> for PbEpochNewChangeLog { - fn from(val: &EpochNewChangeLog) -> Self { +pub type EpochNewChangeLog = EpochNewChangeLogCommon; + +impl From<&EpochNewChangeLogCommon> for PbEpochNewChangeLog +where + PbSstableInfo: for<'a> From<&'a T>, +{ + fn from(val: &EpochNewChangeLogCommon) -> Self { Self { - new_value: val.new_value.iter().map(|a| a.clone().into()).collect(), - old_value: val.old_value.iter().map(|a| a.clone().into()).collect(), + new_value: val.new_value.iter().map(|a| a.into()).collect(), + old_value: val.old_value.iter().map(|a| a.into()).collect(), epochs: val.epochs.clone(), } } } -impl From<&PbEpochNewChangeLog> for EpochNewChangeLog { +impl From<&PbEpochNewChangeLog> for EpochNewChangeLogCommon +where + T: for<'a> From<&'a PbSstableInfo>, +{ fn from(value: &PbEpochNewChangeLog) -> Self { Self { new_value: value.new_value.iter().map(|a| a.into()).collect(), @@ -51,30 +61,28 @@ impl From<&PbEpochNewChangeLog> for EpochNewChangeLog { } } -impl From for PbEpochNewChangeLog { - fn from(val: EpochNewChangeLog) -> Self { +impl From> for PbEpochNewChangeLog +where + PbSstableInfo: From, +{ + fn from(val: EpochNewChangeLogCommon) -> Self { Self { - new_value: val - .new_value - .into_iter() - .map(|a| a.clone().into()) - .collect(), - old_value: val - .old_value - .into_iter() - .map(|a| a.clone().into()) - .collect(), - epochs: val.epochs.clone(), + new_value: val.new_value.into_iter().map(|a| a.into()).collect(), + old_value: val.old_value.into_iter().map(|a| a.into()).collect(), + epochs: val.epochs, } } } -impl From for EpochNewChangeLog { +impl From for EpochNewChangeLogCommon +where + T: From, +{ fn from(value: PbEpochNewChangeLog) -> Self { Self { new_value: value.new_value.into_iter().map(|a| a.into()).collect(), old_value: value.old_value.into_iter().map(|a| a.into()).collect(), - epochs: value.epochs.clone(), + epochs: value.epochs, } } } @@ -117,15 +125,23 @@ impl TableChangeLog { } } -impl TableChangeLog { +impl TableChangeLogCommon +where + PbSstableInfo: for<'a> From<&'a T>, +{ pub fn to_protobuf(&self) -> PbTableChangeLog { PbTableChangeLog { change_logs: self.0.iter().map(|a| a.into()).collect(), } } +} +impl TableChangeLogCommon +where + T: for<'a> From<&'a PbSstableInfo>, +{ pub fn from_protobuf(val: &PbTableChangeLog) -> Self { - Self(val.change_logs.clone().iter().map(|a| a.into()).collect()) + Self(val.change_logs.iter().map(|a| a.into()).collect()) } } @@ -173,13 +189,18 @@ pub fn build_table_change_log_delta<'a>( } #[derive(Debug, PartialEq, Clone)] -pub struct ChangeLogDelta { +pub struct ChangeLogDeltaCommon { pub truncate_epoch: u64, - pub new_log: Option, + pub new_log: Option>, } -impl From<&ChangeLogDelta> for PbChangeLogDelta { - fn from(val: &ChangeLogDelta) -> Self { +pub type ChangeLogDelta = ChangeLogDeltaCommon; + +impl From<&ChangeLogDeltaCommon> for PbChangeLogDelta +where + PbSstableInfo: for<'a> From<&'a T>, +{ + fn from(val: &ChangeLogDeltaCommon) -> Self { Self { truncate_epoch: val.truncate_epoch, new_log: val.new_log.as_ref().map(|a| a.into()), @@ -187,7 +208,10 @@ impl From<&ChangeLogDelta> for PbChangeLogDelta { } } -impl From<&PbChangeLogDelta> for ChangeLogDelta { +impl From<&PbChangeLogDelta> for ChangeLogDeltaCommon +where + T: for<'a> From<&'a PbSstableInfo>, +{ fn from(val: &PbChangeLogDelta) -> Self { Self { truncate_epoch: val.truncate_epoch, @@ -196,8 +220,11 @@ impl From<&PbChangeLogDelta> for ChangeLogDelta { } } -impl From for PbChangeLogDelta { - fn from(val: ChangeLogDelta) -> Self { +impl From> for PbChangeLogDelta +where + PbSstableInfo: From, +{ + fn from(val: ChangeLogDeltaCommon) -> Self { Self { truncate_epoch: val.truncate_epoch, new_log: val.new_log.map(|a| a.into()), @@ -205,7 +232,10 @@ impl From for PbChangeLogDelta { } } -impl From for ChangeLogDelta { +impl From for ChangeLogDeltaCommon +where + T: From, +{ fn from(val: PbChangeLogDelta) -> Self { Self { truncate_epoch: val.truncate_epoch, @@ -218,11 +248,12 @@ impl From for ChangeLogDelta { mod tests { use itertools::Itertools; - use crate::change_log::{EpochNewChangeLog, TableChangeLog}; + use crate::change_log::{EpochNewChangeLog, TableChangeLogCommon}; + use crate::sstable_info::SstableInfo; #[test] fn test_filter_epoch() { - let table_change_log = TableChangeLog(vec![ + let table_change_log = TableChangeLogCommon::(vec![ EpochNewChangeLog { new_value: vec![], old_value: vec![], @@ -262,7 +293,7 @@ mod tests { #[test] fn test_truncate() { - let mut table_change_log = TableChangeLog(vec![ + let mut table_change_log = TableChangeLogCommon::(vec![ EpochNewChangeLog { new_value: vec![], old_value: vec![], @@ -288,7 +319,7 @@ mod tests { table_change_log.truncate(1); assert_eq!( table_change_log, - TableChangeLog(vec![ + TableChangeLogCommon::(vec![ EpochNewChangeLog { new_value: vec![], old_value: vec![], @@ -310,7 +341,7 @@ mod tests { table_change_log.truncate(3); assert_eq!( table_change_log, - TableChangeLog(vec![ + TableChangeLogCommon::(vec![ EpochNewChangeLog { new_value: vec![], old_value: vec![], diff --git a/src/storage/hummock_sdk/src/compaction_group/hummock_version_ext.rs b/src/storage/hummock_sdk/src/compaction_group/hummock_version_ext.rs index c54dd05b25d28..376626e844242 100644 --- a/src/storage/hummock_sdk/src/compaction_group/hummock_version_ext.rs +++ b/src/storage/hummock_sdk/src/compaction_group/hummock_version_ext.rs @@ -22,13 +22,14 @@ use itertools::Itertools; use risingwave_common::catalog::TableId; use risingwave_common::hash::VnodeBitmapExt; use risingwave_pb::hummock::{ - CompactionConfig, CompatibilityVersion, GroupConstruct, GroupDestroy, GroupMetaChange, + CompactionConfig, CompatibilityVersion, GroupConstruct, GroupMerge, GroupMetaChange, GroupTableChange, PbLevelType, }; use tracing::warn; -use super::StateTableId; -use crate::change_log::TableChangeLog; +use super::group_split::get_sub_level_insert_hint; +use super::{group_split, StateTableId}; +use crate::change_log::TableChangeLogCommon; use crate::compaction_group::StaticCompactionGroupId; use crate::key_range::KeyRangeCommon; use crate::level::{Level, Levels, OverlappingLevel}; @@ -47,13 +48,17 @@ pub struct GroupDeltasSummary { pub insert_sub_level_id: u64, pub insert_table_infos: Vec, pub group_construct: Option, - pub group_destroy: Option, + pub group_destroy: Option, pub group_meta_changes: Vec, pub group_table_change: Option, pub new_vnode_partition_count: u32, + pub group_merge: Option, } -pub fn summarize_group_deltas(group_deltas: &GroupDeltas) -> GroupDeltasSummary { +pub fn summarize_group_deltas( + group_deltas: &GroupDeltas, + compaction_group_id: CompactionGroupId, +) -> GroupDeltasSummary { let mut delete_sst_levels = Vec::with_capacity(group_deltas.group_deltas.len()); let mut delete_sst_ids_set = HashSet::new(); let mut insert_sst_level_id = u32::MAX; @@ -64,6 +69,7 @@ pub fn summarize_group_deltas(group_deltas: &GroupDeltas) -> GroupDeltasSummary let mut group_meta_changes = vec![]; let mut group_table_change = None; let mut new_vnode_partition_count = 0; + let mut group_merge = None; for group_delta in &group_deltas.group_deltas { match group_delta { @@ -83,9 +89,9 @@ pub fn summarize_group_deltas(group_deltas: &GroupDeltas) -> GroupDeltasSummary assert!(group_construct.is_none()); group_construct = Some(construct_delta.clone()); } - GroupDelta::GroupDestroy(destroy_delta) => { + GroupDelta::GroupDestroy(_) => { assert!(group_destroy.is_none()); - group_destroy = Some(*destroy_delta); + group_destroy = Some(compaction_group_id); } GroupDelta::GroupMetaChange(meta_delta) => { group_meta_changes.push(meta_delta.clone()); @@ -93,6 +99,11 @@ pub fn summarize_group_deltas(group_deltas: &GroupDeltas) -> GroupDeltasSummary GroupDelta::GroupTableChange(meta_delta) => { group_table_change = Some(meta_delta.clone()); } + GroupDelta::GroupMerge(merge_delta) => { + assert!(group_merge.is_none()); + group_merge = Some(*merge_delta); + group_destroy = Some(merge_delta.right_group_id); + } } } @@ -110,6 +121,7 @@ pub fn summarize_group_deltas(group_deltas: &GroupDeltas) -> GroupDeltasSummary group_meta_changes, group_table_change, new_vnode_partition_count, + group_merge, } } @@ -173,6 +185,25 @@ impl HummockVersion { })) } + // only scan the sst infos from levels in the specified compaction group (without table change log) + pub fn get_sst_ids_by_group_id( + &self, + compaction_group_id: CompactionGroupId, + ) -> impl Iterator + '_ { + self.levels + .iter() + .filter_map(move |(cg_id, level)| { + if *cg_id == compaction_group_id { + Some(level) + } else { + None + } + }) + .flat_map(|level| level.l0.sub_levels.iter().rev().chain(level.levels.iter())) + .flat_map(|level| level.table_infos.iter()) + .map(|s| s.sst_id) + } + /// `get_sst_infos_from_groups` doesn't guarantee that all returned sst info belongs to `select_group`. /// i.e. `select_group` is just a hint. /// We separate `get_sst_infos_from_groups` and `get_sst_infos` because `get_sst_infos_from_groups` may be further customized in the future. @@ -386,23 +417,6 @@ impl HummockVersion { { for sub_level in &mut l0.sub_levels { let target_l0 = &mut cur_levels.l0; - // When `insert_hint` is `Ok(idx)`, it means that the sub level `idx` in `target_l0` - // will extend these SSTs. When `insert_hint` is `Err(idx)`, it - // means that we will add a new sub level `idx` into `target_l0`. - let mut insert_hint = Err(target_l0.sub_levels.len()); - for (idx, other) in target_l0.sub_levels.iter_mut().enumerate() { - match other.sub_level_id.cmp(&sub_level.sub_level_id) { - Ordering::Less => {} - Ordering::Equal => { - insert_hint = Ok(idx); - break; - } - Ordering::Greater => { - insert_hint = Err(idx); - break; - } - } - } // Remove SST from sub level may result in empty sub level. It will be purged // whenever another compaction task is finished. let insert_table_infos = @@ -419,7 +433,7 @@ impl HummockVersion { if insert_table_infos.is_empty() { continue; } - match insert_hint { + match get_sub_level_insert_hint(&target_l0.sub_levels, sub_level) { Ok(idx) => { add_ssts_to_sub_level(target_l0, idx, insert_table_infos); } @@ -570,7 +584,7 @@ impl HummockVersion { // apply to `levels`, which is different compaction groups for (compaction_group_id, group_deltas) in &version_delta.group_deltas { - let summary = summarize_group_deltas(group_deltas); + let summary = summarize_group_deltas(group_deltas, *compaction_group_id); if let Some(group_construct) = &summary.group_construct { let mut new_levels = build_initial_compaction_group_levels( *compaction_group_id, @@ -635,14 +649,19 @@ impl HummockVersion { .expect("compaction group should exist") .member_table_ids .append(&mut moving_tables); + } else if let Some(group_merge) = &summary.group_merge { + tracing::info!( + "group_merge left {:?} right {:?}", + group_merge.left_group_id, + group_merge.right_group_id + ); + self.merge_compaction_group(group_merge.left_group_id, group_merge.right_group_id) } - let has_destroy = summary.group_destroy.is_some(); let visible_table_committed_epoch = self.visible_table_committed_epoch(); - let levels = self - .levels - .get_mut(compaction_group_id) - .expect("compaction group should exist"); - + let group_destroy = summary.group_destroy; + let levels = self.levels.get_mut(compaction_group_id).unwrap_or_else(|| { + panic!("compaction group {} does not exist", compaction_group_id) + }); #[expect(deprecated)] // for backward-compatibility of previous hummock version delta for group_meta_delta in &summary.group_meta_changes { levels @@ -669,7 +688,8 @@ impl HummockVersion { } = summary; assert!( - delete_sst_levels.is_empty() && delete_sst_ids_set.is_empty() || has_destroy, + delete_sst_levels.is_empty() && delete_sst_ids_set.is_empty() + || group_destroy.is_some(), "no sst should be deleted when committing an epoch" ); for group_delta in &group_deltas.group_deltas { @@ -703,8 +723,8 @@ impl HummockVersion { .compaction_group_member_table_ids(*compaction_group_id), ); } - if has_destroy { - self.levels.remove(compaction_group_id); + if let Some(destroy_group_id) = &group_destroy { + self.levels.remove(destroy_group_id); } } self.id = version_delta.id; @@ -775,7 +795,7 @@ impl HummockVersion { change_log.0.push(new_change_log.clone()); } Entry::Vacant(entry) => { - entry.insert(TableChangeLog(vec![new_change_log.clone()])); + entry.insert(TableChangeLogCommon(vec![new_change_log.clone()])); } }; } @@ -835,6 +855,45 @@ impl HummockVersion { } ret } + + pub fn merge_compaction_group( + &mut self, + left_group_id: CompactionGroupId, + right_group_id: CompactionGroupId, + ) { + // Double check + let left_group_id_table_ids = self + .state_table_info + .compaction_group_member_table_ids(left_group_id) + .iter() + .map(|table_id| table_id.table_id); + let right_group_id_table_ids = self + .state_table_info + .compaction_group_member_table_ids(right_group_id) + .iter() + .map(|table_id| table_id.table_id); + + assert!(left_group_id_table_ids + .chain(right_group_id_table_ids) + .is_sorted()); + + let total_cg = self.levels.keys().cloned().collect::>(); + let right_levels = self.levels.remove(&right_group_id).unwrap_or_else(|| { + panic!( + "compaction group should exist right {} all {:?}", + right_group_id, total_cg + ) + }); + + let left_levels = self.levels.get_mut(&left_group_id).unwrap_or_else(|| { + panic!( + "compaction group should exist left {} all {:?}", + left_group_id, total_cg + ) + }); + + group_split::merge_levels(left_levels, right_levels); + } } #[easy_ext::ext(HummockLevelsExt)] @@ -1228,6 +1287,14 @@ pub fn object_size_map(version: &HummockVersion) -> HashMap, left: Bytes, right: Bytes) -> SstableInfo { + SstableInfo { + object_id, + sst_id: object_id, + key_range: KeyRange { + left, + right, + right_exclusive: false, + }, + table_ids, + file_size: 100, + sst_size: 100, + uncompressed_file_size: 100, + ..Default::default() + } + } + + #[test] + fn test_merge_levels() { + let mut left_levels = build_initial_compaction_group_levels( + 1, + &CompactionConfig { + max_level: 6, + ..Default::default() + }, + ); + + let mut right_levels = build_initial_compaction_group_levels( + 2, + &CompactionConfig { + max_level: 6, + ..Default::default() + }, + ); + + left_levels.levels[0] = Level { + level_idx: 1, + level_type: LevelType::Nonoverlapping, + table_infos: vec![ + gen_sst_info( + 1, + vec![3], + FullKey::for_test( + TableId::new(3), + gen_key_from_str(VirtualNode::from_index(1), "1"), + 0, + ) + .encode() + .into(), + FullKey::for_test( + TableId::new(3), + gen_key_from_str(VirtualNode::from_index(200), "1"), + 0, + ) + .encode() + .into(), + ), + gen_sst_info( + 10, + vec![3, 4], + FullKey::for_test( + TableId::new(3), + gen_key_from_str(VirtualNode::from_index(201), "1"), + 0, + ) + .encode() + .into(), + FullKey::for_test( + TableId::new(4), + gen_key_from_str(VirtualNode::from_index(10), "1"), + 0, + ) + .encode() + .into(), ), - (1, cg1), - ]); - version + gen_sst_info( + 11, + vec![4], + FullKey::for_test( + TableId::new(4), + gen_key_from_str(VirtualNode::from_index(11), "1"), + 0, + ) + .encode() + .into(), + FullKey::for_test( + TableId::new(4), + gen_key_from_str(VirtualNode::from_index(200), "1"), + 0, + ) + .encode() + .into(), + ), + ], + total_file_size: 300, + ..Default::default() + }; + + left_levels.l0.sub_levels.push(Level { + level_idx: 0, + table_infos: vec![gen_sst_info( + 3, + vec![3], + FullKey::for_test( + TableId::new(3), + gen_key_from_str(VirtualNode::from_index(1), "1"), + 0, + ) + .encode() + .into(), + FullKey::for_test( + TableId::new(3), + gen_key_from_str(VirtualNode::from_index(200), "1"), + 0, + ) + .encode() + .into(), + )], + sub_level_id: 101, + level_type: LevelType::Overlapping, + total_file_size: 100, + ..Default::default() + }); + + left_levels.l0.sub_levels.push(Level { + level_idx: 0, + table_infos: vec![gen_sst_info( + 3, + vec![3], + FullKey::for_test( + TableId::new(3), + gen_key_from_str(VirtualNode::from_index(1), "1"), + 0, + ) + .encode() + .into(), + FullKey::for_test( + TableId::new(3), + gen_key_from_str(VirtualNode::from_index(200), "1"), + 0, + ) + .encode() + .into(), + )], + sub_level_id: 103, + level_type: LevelType::Overlapping, + total_file_size: 100, + ..Default::default() + }); + + left_levels.l0.sub_levels.push(Level { + level_idx: 0, + table_infos: vec![gen_sst_info( + 3, + vec![3], + FullKey::for_test( + TableId::new(3), + gen_key_from_str(VirtualNode::from_index(1), "1"), + 0, + ) + .encode() + .into(), + FullKey::for_test( + TableId::new(3), + gen_key_from_str(VirtualNode::from_index(200), "1"), + 0, + ) + .encode() + .into(), + )], + sub_level_id: 105, + level_type: LevelType::Nonoverlapping, + total_file_size: 100, + ..Default::default() }); + + right_levels.levels[0] = Level { + level_idx: 1, + level_type: LevelType::Nonoverlapping, + table_infos: vec![ + gen_sst_info( + 1, + vec![5], + FullKey::for_test( + TableId::new(5), + gen_key_from_str(VirtualNode::from_index(1), "1"), + 0, + ) + .encode() + .into(), + FullKey::for_test( + TableId::new(5), + gen_key_from_str(VirtualNode::from_index(200), "1"), + 0, + ) + .encode() + .into(), + ), + gen_sst_info( + 10, + vec![5, 6], + FullKey::for_test( + TableId::new(5), + gen_key_from_str(VirtualNode::from_index(201), "1"), + 0, + ) + .encode() + .into(), + FullKey::for_test( + TableId::new(6), + gen_key_from_str(VirtualNode::from_index(10), "1"), + 0, + ) + .encode() + .into(), + ), + gen_sst_info( + 11, + vec![6], + FullKey::for_test( + TableId::new(6), + gen_key_from_str(VirtualNode::from_index(11), "1"), + 0, + ) + .encode() + .into(), + FullKey::for_test( + TableId::new(6), + gen_key_from_str(VirtualNode::from_index(200), "1"), + 0, + ) + .encode() + .into(), + ), + ], + total_file_size: 300, + ..Default::default() + }; + + right_levels.l0.sub_levels.push(Level { + level_idx: 0, + table_infos: vec![gen_sst_info( + 3, + vec![5], + FullKey::for_test( + TableId::new(5), + gen_key_from_str(VirtualNode::from_index(1), "1"), + 0, + ) + .encode() + .into(), + FullKey::for_test( + TableId::new(5), + gen_key_from_str(VirtualNode::from_index(200), "1"), + 0, + ) + .encode() + .into(), + )], + sub_level_id: 101, + level_type: LevelType::Overlapping, + total_file_size: 100, + ..Default::default() + }); + + right_levels.l0.sub_levels.push(Level { + level_idx: 0, + table_infos: vec![gen_sst_info( + 5, + vec![5], + FullKey::for_test( + TableId::new(5), + gen_key_from_str(VirtualNode::from_index(1), "1"), + 0, + ) + .encode() + .into(), + FullKey::for_test( + TableId::new(5), + gen_key_from_str(VirtualNode::from_index(200), "1"), + 0, + ) + .encode() + .into(), + )], + sub_level_id: 102, + level_type: LevelType::Overlapping, + total_file_size: 100, + ..Default::default() + }); + + right_levels.l0.sub_levels.push(Level { + level_idx: 0, + table_infos: vec![gen_sst_info( + 3, + vec![5], + FullKey::for_test( + TableId::new(5), + gen_key_from_str(VirtualNode::from_index(1), "1"), + 0, + ) + .encode() + .into(), + FullKey::for_test( + TableId::new(5), + gen_key_from_str(VirtualNode::from_index(200), "1"), + 0, + ) + .encode() + .into(), + )], + sub_level_id: 103, + level_type: LevelType::Nonoverlapping, + total_file_size: 100, + ..Default::default() + }); + + { + // test empty + let mut left_levels = Levels::default(); + let right_levels = Levels::default(); + + group_split::merge_levels(&mut left_levels, right_levels); + } + + { + // test empty left + let mut left_levels = build_initial_compaction_group_levels( + 1, + &CompactionConfig { + max_level: 6, + ..Default::default() + }, + ); + let right_levels = right_levels.clone(); + + group_split::merge_levels(&mut left_levels, right_levels); + + assert!(left_levels.l0.sub_levels.len() == 3); + assert!(left_levels.l0.sub_levels[0].sub_level_id == 101); + assert_eq!(100, left_levels.l0.sub_levels[0].total_file_size); + assert!(left_levels.l0.sub_levels[1].sub_level_id == 102); + assert_eq!(100, left_levels.l0.sub_levels[1].total_file_size); + assert!(left_levels.l0.sub_levels[2].sub_level_id == 103); + assert_eq!(100, left_levels.l0.sub_levels[2].total_file_size); + + assert!(left_levels.levels[0].level_idx == 1); + assert_eq!(300, left_levels.levels[0].total_file_size); + } + + { + // test empty right + let mut left_levels = left_levels.clone(); + let right_levels = build_initial_compaction_group_levels( + 2, + &CompactionConfig { + max_level: 6, + ..Default::default() + }, + ); + + group_split::merge_levels(&mut left_levels, right_levels); + + assert!(left_levels.l0.sub_levels.len() == 3); + assert!(left_levels.l0.sub_levels[0].sub_level_id == 101); + assert_eq!(100, left_levels.l0.sub_levels[0].total_file_size); + assert!(left_levels.l0.sub_levels[1].sub_level_id == 103); + assert_eq!(100, left_levels.l0.sub_levels[1].total_file_size); + assert!(left_levels.l0.sub_levels[2].sub_level_id == 105); + assert_eq!(100, left_levels.l0.sub_levels[2].total_file_size); + + assert!(left_levels.levels[0].level_idx == 1); + assert_eq!(300, left_levels.levels[0].total_file_size); + } + + { + let mut left_levels = left_levels.clone(); + let right_levels = right_levels.clone(); + + group_split::merge_levels(&mut left_levels, right_levels); + + assert!(left_levels.l0.sub_levels.len() == 6); + assert!(left_levels.l0.sub_levels[0].sub_level_id == 101); + assert_eq!(100, left_levels.l0.sub_levels[0].total_file_size); + assert!(left_levels.l0.sub_levels[1].sub_level_id == 103); + assert_eq!(100, left_levels.l0.sub_levels[1].total_file_size); + assert!(left_levels.l0.sub_levels[2].sub_level_id == 105); + assert_eq!(100, left_levels.l0.sub_levels[2].total_file_size); + assert!(left_levels.l0.sub_levels[3].sub_level_id == 106); + assert_eq!(100, left_levels.l0.sub_levels[3].total_file_size); + assert!(left_levels.l0.sub_levels[4].sub_level_id == 107); + assert_eq!(100, left_levels.l0.sub_levels[4].total_file_size); + assert!(left_levels.l0.sub_levels[5].sub_level_id == 108); + assert_eq!(100, left_levels.l0.sub_levels[5].total_file_size); + + assert!(left_levels.levels[0].level_idx == 1); + assert_eq!(600, left_levels.levels[0].total_file_size); + } } } diff --git a/src/storage/hummock_sdk/src/compaction_group/mod.rs b/src/storage/hummock_sdk/src/compaction_group/mod.rs index 973cc3e3c6140..94ef89b8046e2 100644 --- a/src/storage/hummock_sdk/src/compaction_group/mod.rs +++ b/src/storage/hummock_sdk/src/compaction_group/mod.rs @@ -43,3 +43,115 @@ impl From for CompactionGroupId { cg as CompactionGroupId } } + +pub mod group_split { + use std::cmp::Ordering; + + use super::hummock_version_ext::insert_new_sub_level; + use crate::can_concat; + use crate::level::{Level, Levels}; + + pub fn merge_levels(left_levels: &mut Levels, right_levels: Levels) { + let right_l0 = right_levels.l0; + + let mut max_left_sub_level_id = left_levels + .l0 + .sub_levels + .iter() + .map(|sub_level| sub_level.sub_level_id + 1) + .max() + .unwrap_or(0); // If there are no sub levels, the max sub level id is 0. + let need_rewrite_right_sub_level_id = max_left_sub_level_id != 0; + + for mut right_sub_level in right_l0.sub_levels { + // Rewrtie the sub level id of right sub level to avoid conflict with left sub levels. (conflict level type) + // e.g. left sub levels: [0, 1, 2], right sub levels: [0, 1, 2], after rewrite, right sub levels: [3, 4, 5] + if need_rewrite_right_sub_level_id { + right_sub_level.sub_level_id = max_left_sub_level_id; + max_left_sub_level_id += 1; + } + + insert_new_sub_level( + &mut left_levels.l0, + right_sub_level.sub_level_id, + right_sub_level.level_type, + right_sub_level.table_infos, + None, + ); + } + + assert!( + left_levels + .l0 + .sub_levels + .is_sorted_by_key(|sub_level| sub_level.sub_level_id), + "{}", + format!("left_levels.l0.sub_levels: {:?}", left_levels.l0.sub_levels) + ); + + // Reinitialise `vnode_partition_count` to avoid misaligned hierarchies + // caused by the merge of different compaction groups.(picker might reject the different `vnode_partition_count` sub_level to compact) + left_levels + .l0 + .sub_levels + .iter_mut() + .for_each(|sub_level| sub_level.vnode_partition_count = 0); + + for (idx, level) in right_levels.levels.into_iter().enumerate() { + if level.table_infos.is_empty() { + continue; + } + + let insert_table_infos = level.table_infos; + left_levels.levels[idx].total_file_size += insert_table_infos + .iter() + .map(|sst| sst.sst_size) + .sum::(); + left_levels.levels[idx].uncompressed_file_size += insert_table_infos + .iter() + .map(|sst| sst.uncompressed_file_size) + .sum::(); + + left_levels.levels[idx] + .table_infos + .extend(insert_table_infos); + left_levels.levels[idx] + .table_infos + .sort_by(|sst1, sst2| sst1.key_range.cmp(&sst2.key_range)); + assert!( + can_concat(&left_levels.levels[idx].table_infos), + "{}", + format!( + "left-group {} right-group {} left_levels.levels[{}].table_infos: {:?} level_idx {:?}", + left_levels.group_id, + right_levels.group_id, + idx, + left_levels.levels[idx].table_infos, + left_levels.levels[idx].level_idx + ) + ); + } + } + + // When `insert_hint` is `Ok(idx)`, it means that the sub level `idx` in `target_l0` + // will extend these SSTs. When `insert_hint` is `Err(idx)`, it + // means that we will add a new sub level `idx` into `target_l0`. + pub fn get_sub_level_insert_hint( + target_levels: &Vec, + sub_level: &Level, + ) -> Result { + for (idx, other) in target_levels.iter().enumerate() { + match other.sub_level_id.cmp(&sub_level.sub_level_id) { + Ordering::Less => {} + Ordering::Equal => { + return Ok(idx); + } + Ordering::Greater => { + return Err(idx); + } + } + } + + Err(target_levels.len()) + } +} diff --git a/src/storage/hummock_sdk/src/level.rs b/src/storage/hummock_sdk/src/level.rs index c7db09e69e76d..762b5abd25ac9 100644 --- a/src/storage/hummock_sdk/src/level.rs +++ b/src/storage/hummock_sdk/src/level.rs @@ -23,19 +23,24 @@ use risingwave_pb::hummock::{ use crate::sstable_info::SstableInfo; #[derive(Debug, Clone, PartialEq, Default)] -pub struct OverlappingLevel { - pub sub_levels: Vec, +pub struct OverlappingLevelCommon { + pub sub_levels: Vec>, pub total_file_size: u64, pub uncompressed_file_size: u64, } -impl From<&PbOverlappingLevel> for OverlappingLevel { +pub type OverlappingLevel = OverlappingLevelCommon; + +impl From<&PbOverlappingLevel> for OverlappingLevelCommon +where + for<'a> LevelCommon: From<&'a PbLevel>, +{ fn from(pb_overlapping_level: &PbOverlappingLevel) -> Self { Self { sub_levels: pb_overlapping_level .sub_levels .iter() - .map(Level::from) + .map(LevelCommon::from) .collect_vec(), total_file_size: pb_overlapping_level.total_file_size, uncompressed_file_size: pb_overlapping_level.uncompressed_file_size, @@ -43,13 +48,16 @@ impl From<&PbOverlappingLevel> for OverlappingLevel { } } -impl From<&OverlappingLevel> for PbOverlappingLevel { - fn from(overlapping_level: &OverlappingLevel) -> Self { +impl From<&OverlappingLevelCommon> for PbOverlappingLevel +where + for<'a> &'a LevelCommon: Into, +{ + fn from(overlapping_level: &OverlappingLevelCommon) -> Self { Self { sub_levels: overlapping_level .sub_levels .iter() - .map(|pb_level| pb_level.into()) + .map(|level| level.into()) .collect_vec(), total_file_size: overlapping_level.total_file_size, uncompressed_file_size: overlapping_level.uncompressed_file_size, @@ -57,8 +65,11 @@ impl From<&OverlappingLevel> for PbOverlappingLevel { } } -impl From for PbOverlappingLevel { - fn from(overlapping_level: OverlappingLevel) -> Self { +impl From> for PbOverlappingLevel +where + LevelCommon: Into, +{ + fn from(overlapping_level: OverlappingLevelCommon) -> Self { Self { sub_levels: overlapping_level .sub_levels @@ -71,13 +82,16 @@ impl From for PbOverlappingLevel { } } -impl From for OverlappingLevel { +impl From for OverlappingLevelCommon +where + LevelCommon: From, +{ fn from(pb_overlapping_level: PbOverlappingLevel) -> Self { Self { sub_levels: pb_overlapping_level .sub_levels .into_iter() - .map(Level::from) + .map(LevelCommon::from) .collect_vec(), total_file_size: pb_overlapping_level.total_file_size, uncompressed_file_size: pb_overlapping_level.uncompressed_file_size, @@ -97,26 +111,27 @@ impl OverlappingLevel { } #[derive(Debug, Clone, PartialEq, Default)] -pub struct Level { +pub struct LevelCommon { pub level_idx: u32, pub level_type: PbLevelType, - pub table_infos: Vec, + pub table_infos: Vec, pub total_file_size: u64, pub sub_level_id: u64, pub uncompressed_file_size: u64, pub vnode_partition_count: u32, } -impl From<&PbLevel> for Level { +pub type Level = LevelCommon; + +impl From<&PbLevel> for LevelCommon +where + T: for<'a> From<&'a PbSstableInfo>, +{ fn from(pb_level: &PbLevel) -> Self { Self { level_idx: pb_level.level_idx, level_type: PbLevelType::try_from(pb_level.level_type).unwrap(), - table_infos: pb_level - .table_infos - .iter() - .map(SstableInfo::from) - .collect_vec(), + table_infos: pb_level.table_infos.iter().map(Into::into).collect_vec(), total_file_size: pb_level.total_file_size, sub_level_id: pb_level.sub_level_id, uncompressed_file_size: pb_level.uncompressed_file_size, @@ -125,16 +140,15 @@ impl From<&PbLevel> for Level { } } -impl From<&Level> for PbLevel { - fn from(level: &Level) -> Self { +impl From<&LevelCommon> for PbLevel +where + PbSstableInfo: for<'a> From<&'a T>, +{ + fn from(level: &LevelCommon) -> Self { Self { level_idx: level.level_idx, level_type: level.level_type.into(), - table_infos: level - .table_infos - .iter() - .map(PbSstableInfo::from) - .collect_vec(), + table_infos: level.table_infos.iter().map(Into::into).collect_vec(), total_file_size: level.total_file_size, sub_level_id: level.sub_level_id, uncompressed_file_size: level.uncompressed_file_size, @@ -143,16 +157,15 @@ impl From<&Level> for PbLevel { } } -impl From for PbLevel { - fn from(level: Level) -> Self { +impl From> for PbLevel +where + PbSstableInfo: From, +{ + fn from(level: LevelCommon) -> Self { Self { level_idx: level.level_idx, level_type: level.level_type.into(), - table_infos: level - .table_infos - .into_iter() - .map(PbSstableInfo::from) - .collect_vec(), + table_infos: level.table_infos.into_iter().map(Into::into).collect_vec(), total_file_size: level.total_file_size, sub_level_id: level.sub_level_id, uncompressed_file_size: level.uncompressed_file_size, @@ -161,7 +174,10 @@ impl From for PbLevel { } } -impl From for Level { +impl From for LevelCommon +where + T: From, +{ fn from(pb_level: PbLevel) -> Self { Self { level_idx: pb_level.level_idx, @@ -169,7 +185,7 @@ impl From for Level { table_infos: pb_level .table_infos .into_iter() - .map(SstableInfo::from) + .map(Into::into) .collect_vec(), total_file_size: pb_level.total_file_size, sub_level_id: pb_level.sub_level_id, @@ -196,9 +212,9 @@ impl Level { } #[derive(Debug, Clone, PartialEq, Default)] -pub struct Levels { - pub levels: Vec, - pub l0: OverlappingLevel, +pub struct LevelsCommon { + pub levels: Vec>, + pub l0: OverlappingLevelCommon, pub group_id: u64, pub parent_group_id: u64, @@ -206,6 +222,8 @@ pub struct Levels { pub member_table_ids: Vec, } +pub type Levels = LevelsCommon; + impl Levels { pub fn level0(&self) -> &OverlappingLevel { &self.l0 @@ -236,15 +254,25 @@ impl Levels { } } -impl Levels { - pub fn from_protobuf(pb_levels: &PbLevels) -> Self { - Self::from(pb_levels) - } - +impl LevelsCommon +where + PbLevels: for<'a> From<&'a LevelsCommon>, +{ pub fn to_protobuf(&self) -> PbLevels { self.into() } +} + +impl LevelsCommon +where + LevelsCommon: for<'a> From<&'a PbLevels>, +{ + pub fn from_protobuf(pb_levels: &PbLevels) -> LevelsCommon { + LevelsCommon::::from(pb_levels) + } +} +impl Levels { pub fn estimated_encode_len(&self) -> usize { let mut basic = self .levels @@ -260,12 +288,15 @@ impl Levels { } } -impl From<&PbLevels> for Levels { +impl From<&PbLevels> for LevelsCommon +where + T: for<'a> From<&'a PbSstableInfo>, +{ #[expect(deprecated)] fn from(pb_levels: &PbLevels) -> Self { Self { - l0: OverlappingLevel::from(pb_levels.l0.as_ref().unwrap()), - levels: pb_levels.levels.iter().map(Level::from).collect_vec(), + l0: OverlappingLevelCommon::from(pb_levels.l0.as_ref().unwrap()), + levels: pb_levels.levels.iter().map(Into::into).collect_vec(), group_id: pb_levels.group_id, parent_group_id: pb_levels.parent_group_id, member_table_ids: pb_levels.member_table_ids.clone(), @@ -273,9 +304,12 @@ impl From<&PbLevels> for Levels { } } -impl From<&Levels> for PbLevels { +impl From<&LevelsCommon> for PbLevels +where + PbSstableInfo: for<'a> From<&'a T>, +{ #[expect(deprecated)] - fn from(levels: &Levels) -> Self { + fn from(levels: &LevelsCommon) -> Self { Self { l0: Some((&levels.l0).into()), levels: levels.levels.iter().map(PbLevel::from).collect_vec(), @@ -286,28 +320,38 @@ impl From<&Levels> for PbLevels { } } -impl From for Levels { +impl From for LevelsCommon +where + T: From, +{ #[expect(deprecated)] fn from(pb_levels: PbLevels) -> Self { Self { - l0: OverlappingLevel::from(pb_levels.l0.as_ref().unwrap()), - levels: pb_levels.levels.into_iter().map(Level::from).collect_vec(), + l0: OverlappingLevelCommon::from(pb_levels.l0.unwrap()), + levels: pb_levels + .levels + .into_iter() + .map(LevelCommon::from) + .collect_vec(), group_id: pb_levels.group_id, parent_group_id: pb_levels.parent_group_id, - member_table_ids: pb_levels.member_table_ids.clone(), + member_table_ids: pb_levels.member_table_ids, } } } -impl From for PbLevels { - fn from(levels: Levels) -> Self { +impl From> for PbLevels +where + PbSstableInfo: From, +{ + fn from(levels: LevelsCommon) -> Self { #[expect(deprecated)] Self { l0: Some(levels.l0.into()), levels: levels.levels.into_iter().map(PbLevel::from).collect_vec(), group_id: levels.group_id, parent_group_id: levels.parent_group_id, - member_table_ids: levels.member_table_ids.clone(), + member_table_ids: levels.member_table_ids, } } } diff --git a/src/storage/hummock_sdk/src/lib.rs b/src/storage/hummock_sdk/src/lib.rs index 9e6962ab117aa..921ab18fcf7cd 100644 --- a/src/storage/hummock_sdk/src/lib.rs +++ b/src/storage/hummock_sdk/src/lib.rs @@ -15,7 +15,6 @@ #![feature(async_closure)] #![feature(extract_if)] #![feature(hash_extract_if)] -#![feature(lint_reasons)] #![feature(map_many_mut)] #![feature(type_alias_impl_trait)] #![feature(impl_trait_in_assoc_type)] @@ -130,6 +129,7 @@ pub const FIRST_VERSION_ID: HummockVersionId = HummockVersionId(1); pub const SPLIT_TABLE_COMPACTION_GROUP_ID_HEAD: u64 = 1u64 << 56; pub const SINGLE_TABLE_COMPACTION_GROUP_ID_HEAD: u64 = 2u64 << 56; pub const OBJECT_SUFFIX: &str = "data"; +pub const HUMMOCK_SSTABLE_OBJECT_ID_MAX_DECIMAL_LENGTH: usize = 20; #[macro_export] /// This is wrapper for `info` log. @@ -359,3 +359,14 @@ impl EpochWithGap { self.0 & EPOCH_SPILL_TIME_MASK } } + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_object_id_decimal_max_length() { + let len = HummockSstableObjectId::MAX.to_string().len(); + assert_eq!(len, HUMMOCK_SSTABLE_OBJECT_ID_MAX_DECIMAL_LENGTH) + } +} diff --git a/src/storage/hummock_sdk/src/time_travel.rs b/src/storage/hummock_sdk/src/time_travel.rs index 380d75340df27..e828c94a4d781 100644 --- a/src/storage/hummock_sdk/src/time_travel.rs +++ b/src/storage/hummock_sdk/src/time_travel.rs @@ -13,87 +13,20 @@ // limitations under the License. use std::collections::{HashMap, HashSet}; -use std::sync::Arc; -use risingwave_common::catalog::TableId; -use risingwave_pb::hummock::hummock_version_delta::PbGroupDeltas; -use risingwave_pb::hummock::{PbHummockVersion, PbHummockVersionDelta, PbStateTableInfoDelta}; +use risingwave_pb::hummock::hummock_version::PbLevels; +use risingwave_pb::hummock::hummock_version_delta::{PbChangeLogDelta, PbGroupDeltas}; +use risingwave_pb::hummock::{PbEpochNewChangeLog, PbSstableInfo}; -use crate::change_log::{ChangeLogDelta, EpochNewChangeLog, TableChangeLog}; -use crate::level::{Level, Levels, OverlappingLevel}; +use crate::change_log::{TableChangeLog, TableChangeLogCommon}; +use crate::level::Level; use crate::sstable_info::SstableInfo; -use crate::table_watermark::TableWatermarks; use crate::version::{ - GroupDelta, GroupDeltas, HummockVersion, HummockVersionDelta, HummockVersionStateTableInfo, - IntraLevelDelta, + HummockVersion, HummockVersionCommon, HummockVersionDelta, HummockVersionDeltaCommon, }; -use crate::{CompactionGroupId, HummockSstableId, HummockVersionId}; +use crate::{CompactionGroupId, HummockSstableId}; -/// [`IncompleteHummockVersion`] is incomplete because `SSTableInfo` only has the `sst_id` set in the following fields: -/// - `PbLevels` -/// - `TableChangeLog` -#[derive(Debug, Clone, PartialEq)] -pub struct IncompleteHummockVersion { - pub id: HummockVersionId, - pub levels: HashMap, - max_committed_epoch: u64, - safe_epoch: u64, - pub table_watermarks: HashMap>, - pub table_change_log: HashMap, - pub state_table_info: HummockVersionStateTableInfo, -} - -/// Clone from an `SstableInfo`, but only set the `sst_id` for the target, leaving other fields as default. -/// The goal is to reduce the size of pb object generated afterward. -fn stripped_sstable_info(origin: &SstableInfo) -> SstableInfo { - SstableInfo { - object_id: Default::default(), - sst_id: origin.sst_id, - key_range: Default::default(), - file_size: Default::default(), - table_ids: Default::default(), - meta_offset: Default::default(), - stale_key_count: Default::default(), - total_key_count: Default::default(), - min_epoch: Default::default(), - max_epoch: Default::default(), - uncompressed_file_size: Default::default(), - range_tombstone_count: Default::default(), - bloom_filter_kind: Default::default(), - sst_size: Default::default(), - } -} - -fn stripped_epoch_new_change_log(origin: &EpochNewChangeLog) -> EpochNewChangeLog { - EpochNewChangeLog { - old_value: origin.old_value.iter().map(stripped_sstable_info).collect(), - new_value: origin.new_value.iter().map(stripped_sstable_info).collect(), - epochs: origin.epochs.clone(), - } -} - -fn stripped_change_log_delta(origin: &ChangeLogDelta) -> ChangeLogDelta { - ChangeLogDelta { - new_log: origin.new_log.as_ref().map(stripped_epoch_new_change_log), - truncate_epoch: origin.truncate_epoch, - } -} - -fn stripped_level(origin: &Level) -> Level { - Level { - level_idx: origin.level_idx, - level_type: origin.level_type, - table_infos: origin - .table_infos - .iter() - .map(stripped_sstable_info) - .collect(), - total_file_size: origin.total_file_size, - sub_level_id: origin.sub_level_id, - uncompressed_file_size: origin.uncompressed_file_size, - vnode_partition_count: origin.vnode_partition_count, - } -} +pub type IncompleteHummockVersion = HummockVersionCommon; pub fn refill_version( version: &mut HummockVersion, @@ -146,55 +79,6 @@ fn refill_sstable_info( .clone(); } -fn stripped_l0(origin: &OverlappingLevel) -> OverlappingLevel { - OverlappingLevel { - sub_levels: origin.sub_levels.iter().map(stripped_level).collect(), - total_file_size: origin.total_file_size, - uncompressed_file_size: origin.uncompressed_file_size, - } -} - -#[allow(deprecated)] -fn stripped_levels(origin: &Levels) -> Levels { - Levels { - levels: origin.levels.iter().map(stripped_level).collect(), - l0: stripped_l0(&origin.l0), - group_id: origin.group_id, - parent_group_id: origin.parent_group_id, - member_table_ids: Default::default(), - } -} - -fn stripped_intra_level_delta(origin: &IntraLevelDelta) -> IntraLevelDelta { - IntraLevelDelta { - level_idx: origin.level_idx, - l0_sub_level_id: origin.l0_sub_level_id, - removed_table_ids: origin.removed_table_ids.clone(), - inserted_table_infos: origin - .inserted_table_infos - .iter() - .map(stripped_sstable_info) - .collect(), - vnode_partition_count: origin.vnode_partition_count, - } -} - -fn stripped_group_delta(origin: &GroupDelta) -> GroupDelta { - match origin { - GroupDelta::IntraLevel(l) => GroupDelta::IntraLevel(stripped_intra_level_delta(l)), - _ => panic!("time travel expects DeltaType::IntraLevel only"), - } -} - -fn stripped_group_deltas(origin: &GroupDeltas) -> GroupDeltas { - let group_deltas = origin - .group_deltas - .iter() - .map(stripped_group_delta) - .collect(); - GroupDeltas { group_deltas } -} - /// `SStableInfo` will be stripped. impl From<(&HummockVersion, &HashSet)> for IncompleteHummockVersion { fn from(p: (&HummockVersion, &HashSet)) -> Self { @@ -206,7 +90,10 @@ impl From<(&HummockVersion, &HashSet)> for IncompleteHummockV .iter() .filter_map(|(group_id, levels)| { if select_group.contains(group_id) { - Some((*group_id as CompactionGroupId, stripped_levels(levels))) + Some(( + *group_id as CompactionGroupId, + PbLevels::from(levels).into(), + )) } else { None } @@ -215,7 +102,7 @@ impl From<(&HummockVersion, &HashSet)> for IncompleteHummockV max_committed_epoch: version.visible_table_committed_epoch(), safe_epoch: version.visible_table_safe_epoch(), table_watermarks: version.table_watermarks.clone(), - // TODO: optimization: strip table change log + // TODO: optimization: strip table change log based on select_group table_change_log: version .table_change_log .iter() @@ -223,9 +110,9 @@ impl From<(&HummockVersion, &HashSet)> for IncompleteHummockV let incomplete_table_change_log = change_log .0 .iter() - .map(stripped_epoch_new_change_log) + .map(|e| PbEpochNewChangeLog::from(e).into()) .collect(); - (*table_id, TableChangeLog(incomplete_table_change_log)) + (*table_id, TableChangeLogCommon(incomplete_table_change_log)) }) .collect(), state_table_info: version.state_table_info.clone(), @@ -233,49 +120,10 @@ impl From<(&HummockVersion, &HashSet)> for IncompleteHummockV } } -impl IncompleteHummockVersion { - /// Resulted `SStableInfo` is incompelte. - pub fn to_protobuf(&self) -> PbHummockVersion { - PbHummockVersion { - id: self.id.0, - levels: self - .levels - .iter() - .map(|(group_id, levels)| (*group_id as _, levels.to_protobuf())) - .collect(), - max_committed_epoch: self.max_committed_epoch, - safe_epoch: self.safe_epoch, - table_watermarks: self - .table_watermarks - .iter() - .map(|(table_id, watermark)| (table_id.table_id, watermark.to_protobuf())) - .collect(), - table_change_logs: self - .table_change_log - .iter() - .map(|(table_id, change_log)| (table_id.table_id, change_log.to_protobuf())) - .collect(), - state_table_info: self.state_table_info.to_protobuf(), - } - } -} - /// [`IncompleteHummockVersionDelta`] is incomplete because `SSTableInfo` only has the `sst_id` set in the following fields: /// - `PbGroupDeltas` /// - `ChangeLogDelta` -#[derive(Debug, PartialEq, Clone)] -pub struct IncompleteHummockVersionDelta { - pub id: HummockVersionId, - pub prev_id: HummockVersionId, - pub group_deltas: HashMap, - pub max_committed_epoch: u64, - pub safe_epoch: u64, - pub trivial_move: bool, - pub new_table_watermarks: HashMap, - pub removed_table_ids: HashSet, - pub change_log_delta: HashMap, - pub state_table_info_delta: HashMap, -} +pub type IncompleteHummockVersionDelta = HummockVersionDeltaCommon; /// `SStableInfo` will be stripped. impl From<(&HummockVersionDelta, &HashSet)> for IncompleteHummockVersionDelta { @@ -289,7 +137,7 @@ impl From<(&HummockVersionDelta, &HashSet)> for IncompleteHum .iter() .filter_map(|(cg_id, deltas)| { if select_group.contains(cg_id) { - Some((*cg_id, stripped_group_deltas(deltas).to_protobuf())) + Some((*cg_id, PbGroupDeltas::from(deltas).into())) } else { None } @@ -300,47 +148,42 @@ impl From<(&HummockVersionDelta, &HashSet)> for IncompleteHum trivial_move: delta.trivial_move, new_table_watermarks: delta.new_table_watermarks.clone(), removed_table_ids: delta.removed_table_ids.clone(), - // TODO: optimization: strip table change log + // TODO: optimization: strip table change log based on select_group change_log_delta: delta .change_log_delta .iter() - .map(|(table_id, log_delta)| (*table_id, stripped_change_log_delta(log_delta))) + .map(|(table_id, log_delta)| (*table_id, PbChangeLogDelta::from(log_delta).into())) .collect(), state_table_info_delta: delta.state_table_info_delta.clone(), } } } -impl IncompleteHummockVersionDelta { - /// Resulted `SStableInfo` is incompelte. - pub fn to_protobuf(&self) -> PbHummockVersionDelta { - PbHummockVersionDelta { - id: self.id.0, - prev_id: self.prev_id.0, - group_deltas: self.group_deltas.clone(), - max_committed_epoch: self.max_committed_epoch, - safe_epoch: self.safe_epoch, - trivial_move: self.trivial_move, - new_table_watermarks: self - .new_table_watermarks - .iter() - .map(|(table_id, watermarks)| (table_id.table_id, watermarks.to_protobuf())) - .collect(), - removed_table_ids: self - .removed_table_ids - .iter() - .map(|table_id| table_id.table_id) - .collect(), - change_log_delta: self - .change_log_delta - .iter() - .map(|(table_id, log_delta)| (table_id.table_id, log_delta.into())) - .collect(), - state_table_info_delta: self - .state_table_info_delta - .iter() - .map(|(table_id, delta)| (table_id.table_id, *delta)) - .collect(), +pub struct SstableIdInVersion(HummockSstableId); + +impl From<&SstableIdInVersion> for PbSstableInfo { + fn from(sst_id: &SstableIdInVersion) -> Self { + Self { + sst_id: sst_id.0, + ..Default::default() } } } + +impl From for PbSstableInfo { + fn from(sst_id: SstableIdInVersion) -> Self { + (&sst_id).into() + } +} + +impl From<&PbSstableInfo> for SstableIdInVersion { + fn from(value: &PbSstableInfo) -> Self { + SstableIdInVersion(value.sst_id) + } +} + +impl From for SstableIdInVersion { + fn from(value: PbSstableInfo) -> Self { + (&value).into() + } +} diff --git a/src/storage/hummock_sdk/src/version.rs b/src/storage/hummock_sdk/src/version.rs index e418250f0b6bf..4aecfcde0cf48 100644 --- a/src/storage/hummock_sdk/src/version.rs +++ b/src/storage/hummock_sdk/src/version.rs @@ -24,16 +24,16 @@ use risingwave_common::util::epoch::INVALID_EPOCH; use risingwave_pb::hummock::group_delta::PbDeltaType; use risingwave_pb::hummock::hummock_version_delta::PbGroupDeltas; use risingwave_pb::hummock::{ - CompactionConfig, PbGroupConstruct, PbGroupDelta, PbGroupDestroy, PbGroupMetaChange, - PbGroupTableChange, PbHummockVersion, PbHummockVersionDelta, PbIntraLevelDelta, - PbStateTableInfo, StateTableInfo, StateTableInfoDelta, + CompactionConfig, PbGroupConstruct, PbGroupDelta, PbGroupDestroy, PbGroupMerge, + PbGroupMetaChange, PbGroupTableChange, PbHummockVersion, PbHummockVersionDelta, + PbIntraLevelDelta, PbSstableInfo, PbStateTableInfo, StateTableInfo, StateTableInfoDelta, }; use tracing::warn; -use crate::change_log::{ChangeLogDelta, TableChangeLog}; +use crate::change_log::{ChangeLogDeltaCommon, TableChangeLogCommon}; use crate::compaction_group::hummock_version_ext::build_initial_compaction_group_levels; use crate::compaction_group::StaticCompactionGroupId; -use crate::level::Levels; +use crate::level::LevelsCommon; use crate::sstable_info::SstableInfo; use crate::table_watermark::TableWatermarks; use crate::{CompactionGroupId, HummockSstableObjectId, HummockVersionId, FIRST_VERSION_ID}; @@ -209,33 +209,39 @@ impl HummockVersionStateTableInfo { } #[derive(Debug, Clone, PartialEq)] -pub struct HummockVersion { +pub struct HummockVersionCommon { pub id: HummockVersionId, - pub levels: HashMap, - max_committed_epoch: u64, - safe_epoch: u64, + pub levels: HashMap>, + pub(crate) max_committed_epoch: u64, + pub(crate) safe_epoch: u64, pub table_watermarks: HashMap>, - pub table_change_log: HashMap, + pub table_change_log: HashMap>, pub state_table_info: HummockVersionStateTableInfo, } +pub type HummockVersion = HummockVersionCommon; + impl Default for HummockVersion { fn default() -> Self { HummockVersion::from(&PbHummockVersion::default()) } } -impl HummockVersion { +impl HummockVersionCommon +where + T: for<'a> From<&'a PbSstableInfo>, + PbSstableInfo: for<'a> From<&'a T>, +{ /// Convert the `PbHummockVersion` received from rpc to `HummockVersion`. No need to /// maintain backward compatibility. pub fn from_rpc_protobuf(pb_version: &PbHummockVersion) -> Self { - HummockVersion::from(pb_version) + pb_version.into() } /// Convert the `PbHummockVersion` deserialized from persisted state to `HummockVersion`. /// We should maintain backward compatibility. pub fn from_persisted_protobuf(pb_version: &PbHummockVersion) -> Self { - HummockVersion::from(pb_version) + pb_version.into() } pub fn to_protobuf(&self) -> PbHummockVersion { @@ -260,14 +266,19 @@ impl HummockVersion { } } -impl From<&PbHummockVersion> for HummockVersion { +impl From<&PbHummockVersion> for HummockVersionCommon +where + T: for<'a> From<&'a PbSstableInfo>, +{ fn from(pb_version: &PbHummockVersion) -> Self { Self { id: HummockVersionId(pb_version.id), levels: pb_version .levels .iter() - .map(|(group_id, levels)| (*group_id as CompactionGroupId, Levels::from(levels))) + .map(|(group_id, levels)| { + (*group_id as CompactionGroupId, LevelsCommon::from(levels)) + }) .collect(), max_committed_epoch: pb_version.max_committed_epoch, safe_epoch: pb_version.safe_epoch, @@ -287,7 +298,7 @@ impl From<&PbHummockVersion> for HummockVersion { .map(|(table_id, change_log)| { ( TableId::new(*table_id), - TableChangeLog::from_protobuf(change_log), + TableChangeLogCommon::from_protobuf(change_log), ) }) .collect(), @@ -298,8 +309,11 @@ impl From<&PbHummockVersion> for HummockVersion { } } -impl From<&HummockVersion> for PbHummockVersion { - fn from(version: &HummockVersion) -> Self { +impl From<&HummockVersionCommon> for PbHummockVersion +where + PbSstableInfo: for<'a> From<&'a T>, +{ + fn from(version: &HummockVersionCommon) -> Self { Self { id: version.id.0, levels: version @@ -324,8 +338,12 @@ impl From<&HummockVersion> for PbHummockVersion { } } -impl From for PbHummockVersion { - fn from(version: HummockVersion) -> Self { +impl From> for PbHummockVersion +where + PbSstableInfo: From, + PbSstableInfo: for<'a> From<&'a T>, +{ + fn from(version: HummockVersionCommon) -> Self { Self { id: version.id.0, levels: version @@ -453,36 +471,42 @@ impl HummockVersion { } #[derive(Debug, PartialEq, Clone)] -pub struct HummockVersionDelta { +pub struct HummockVersionDeltaCommon { pub id: HummockVersionId, pub prev_id: HummockVersionId, - pub group_deltas: HashMap, - max_committed_epoch: u64, - safe_epoch: u64, + pub group_deltas: HashMap>, + pub(crate) max_committed_epoch: u64, + pub(crate) safe_epoch: u64, pub trivial_move: bool, pub new_table_watermarks: HashMap, pub removed_table_ids: HashSet, - pub change_log_delta: HashMap, + pub change_log_delta: HashMap>, pub state_table_info_delta: HashMap, } +pub type HummockVersionDelta = HummockVersionDeltaCommon; + impl Default for HummockVersionDelta { fn default() -> Self { HummockVersionDelta::from(&PbHummockVersionDelta::default()) } } -impl HummockVersionDelta { +impl HummockVersionDeltaCommon +where + T: for<'a> From<&'a PbSstableInfo>, + PbSstableInfo: for<'a> From<&'a T>, +{ /// Convert the `PbHummockVersionDelta` deserialized from persisted state to `HummockVersionDelta`. /// We should maintain backward compatibility. pub fn from_persisted_protobuf(delta: &PbHummockVersionDelta) -> Self { - Self::from(delta) + delta.into() } /// Convert the `PbHummockVersionDelta` received from rpc to `HummockVersionDelta`. No need to /// maintain backward compatibility. pub fn from_rpc_protobuf(delta: &PbHummockVersionDelta) -> Self { - Self::from(delta) + delta.into() } pub fn to_protobuf(&self) -> PbHummockVersionDelta { @@ -501,12 +525,10 @@ impl HummockVersionDelta { .flat_map(|group_deltas| { group_deltas.group_deltas.iter().flat_map(|group_delta| { static EMPTY_VEC: Vec = Vec::new(); - let sst_slice = match group_delta { - GroupDelta::IntraLevel(level_delta) => &level_delta.inserted_table_infos, - GroupDelta::GroupConstruct(_) - | GroupDelta::GroupDestroy(_) - | GroupDelta::GroupMetaChange(_) - | GroupDelta::GroupTableChange(_) => &EMPTY_VEC, + let sst_slice = if let GroupDelta::IntraLevel(level_delta) = &group_delta { + &level_delta.inserted_table_infos + } else { + &EMPTY_VEC }; sst_slice.iter().map(|sst| sst.object_id) }) @@ -526,12 +548,10 @@ impl HummockVersionDelta { let ssts_from_group_deltas = self.group_deltas.values().flat_map(|group_deltas| { group_deltas.group_deltas.iter().flat_map(|group_delta| { static EMPTY_VEC: Vec = Vec::new(); - let sst_slice = match group_delta { - GroupDelta::IntraLevel(level_delta) => &level_delta.inserted_table_infos, - GroupDelta::GroupConstruct(_) - | GroupDelta::GroupDestroy(_) - | GroupDelta::GroupMetaChange(_) - | GroupDelta::GroupTableChange(_) => &EMPTY_VEC, + let sst_slice = if let GroupDelta::IntraLevel(level_delta) = &group_delta { + &level_delta.inserted_table_infos + } else { + &EMPTY_VEC }; sst_slice.iter() }) @@ -564,12 +584,10 @@ impl HummockVersionDelta { .flat_map(|group_deltas| { group_deltas.group_deltas.iter().flat_map(|group_delta| { static EMPTY_VEC: Vec = Vec::new(); - let sst_slice = match group_delta { - GroupDelta::IntraLevel(level_delta) => &level_delta.inserted_table_infos, - GroupDelta::GroupConstruct(_) - | GroupDelta::GroupDestroy(_) - | GroupDelta::GroupMetaChange(_) - | GroupDelta::GroupTableChange(_) => &EMPTY_VEC, + let sst_slice = if let GroupDelta::IntraLevel(level_delta) = &group_delta { + &level_delta.inserted_table_infos + } else { + &EMPTY_VEC }; sst_slice.iter() }) @@ -598,7 +616,10 @@ impl HummockVersionDelta { } } -impl From<&PbHummockVersionDelta> for HummockVersionDelta { +impl From<&PbHummockVersionDelta> for HummockVersionDeltaCommon +where + T: for<'a> From<&'a PbSstableInfo>, +{ fn from(pb_version_delta: &PbHummockVersionDelta) -> Self { Self { id: HummockVersionId(pb_version_delta.id), @@ -607,7 +628,10 @@ impl From<&PbHummockVersionDelta> for HummockVersionDelta { .group_deltas .iter() .map(|(group_id, deltas)| { - (*group_id as CompactionGroupId, GroupDeltas::from(deltas)) + ( + *group_id as CompactionGroupId, + GroupDeltasCommon::from(deltas), + ) }) .collect(), max_committed_epoch: pb_version_delta.max_committed_epoch, @@ -631,8 +655,8 @@ impl From<&PbHummockVersionDelta> for HummockVersionDelta { .map(|(table_id, log_delta)| { ( TableId::new(*table_id), - ChangeLogDelta { - new_log: log_delta.new_log.clone().map(Into::into), + ChangeLogDeltaCommon { + new_log: log_delta.new_log.as_ref().map(Into::into), truncate_epoch: log_delta.truncate_epoch, }, ) @@ -648,8 +672,11 @@ impl From<&PbHummockVersionDelta> for HummockVersionDelta { } } -impl From<&HummockVersionDelta> for PbHummockVersionDelta { - fn from(version_delta: &HummockVersionDelta) -> Self { +impl From<&HummockVersionDeltaCommon> for PbHummockVersionDelta +where + PbSstableInfo: for<'a> From<&'a T>, +{ + fn from(version_delta: &HummockVersionDeltaCommon) -> Self { Self { id: version_delta.id.0, prev_id: version_delta.prev_id.0, @@ -685,8 +712,11 @@ impl From<&HummockVersionDelta> for PbHummockVersionDelta { } } -impl From for PbHummockVersionDelta { - fn from(version_delta: HummockVersionDelta) -> Self { +impl From> for PbHummockVersionDelta +where + PbSstableInfo: From, +{ + fn from(version_delta: HummockVersionDeltaCommon) -> Self { Self { id: version_delta.id.0, prev_id: version_delta.prev_id.0, @@ -722,7 +752,10 @@ impl From for PbHummockVersionDelta { } } -impl From for HummockVersionDelta { +impl From for HummockVersionDeltaCommon +where + T: From, +{ fn from(pb_version_delta: PbHummockVersionDelta) -> Self { Self { id: HummockVersionId(pb_version_delta.id), @@ -751,7 +784,7 @@ impl From for HummockVersionDelta { .map(|(table_id, log_delta)| { ( TableId::new(*table_id), - ChangeLogDelta { + ChangeLogDeltaCommon { new_log: log_delta.new_log.clone().map(Into::into), truncate_epoch: log_delta.truncate_epoch, }, @@ -768,14 +801,16 @@ impl From for HummockVersionDelta { } #[derive(Debug, PartialEq, Clone)] -pub struct IntraLevelDelta { +pub struct IntraLevelDeltaCommon { pub level_idx: u32, pub l0_sub_level_id: u64, pub removed_table_ids: Vec, - pub inserted_table_infos: Vec, + pub inserted_table_infos: Vec, pub vnode_partition_count: u32, } +pub type IntraLevelDelta = IntraLevelDeltaCommon; + impl IntraLevelDelta { pub fn estimated_encode_len(&self) -> usize { size_of::() @@ -790,40 +825,49 @@ impl IntraLevelDelta { } } -impl From for IntraLevelDelta { +impl From for IntraLevelDeltaCommon +where + T: From, +{ fn from(pb_intra_level_delta: PbIntraLevelDelta) -> Self { Self { level_idx: pb_intra_level_delta.level_idx, l0_sub_level_id: pb_intra_level_delta.l0_sub_level_id, - removed_table_ids: pb_intra_level_delta.removed_table_ids.clone(), + removed_table_ids: pb_intra_level_delta.removed_table_ids, inserted_table_infos: pb_intra_level_delta .inserted_table_infos .into_iter() - .map(SstableInfo::from) + .map(Into::into) .collect_vec(), vnode_partition_count: pb_intra_level_delta.vnode_partition_count, } } } -impl From for PbIntraLevelDelta { - fn from(intra_level_delta: IntraLevelDelta) -> Self { +impl From> for PbIntraLevelDelta +where + PbSstableInfo: From, +{ + fn from(intra_level_delta: IntraLevelDeltaCommon) -> Self { Self { level_idx: intra_level_delta.level_idx, l0_sub_level_id: intra_level_delta.l0_sub_level_id, - removed_table_ids: intra_level_delta.removed_table_ids.clone(), + removed_table_ids: intra_level_delta.removed_table_ids, inserted_table_infos: intra_level_delta .inserted_table_infos .into_iter() - .map(|sst| sst.into()) + .map(Into::into) .collect_vec(), vnode_partition_count: intra_level_delta.vnode_partition_count, } } } -impl From<&IntraLevelDelta> for PbIntraLevelDelta { - fn from(intra_level_delta: &IntraLevelDelta) -> Self { +impl From<&IntraLevelDeltaCommon> for PbIntraLevelDelta +where + PbSstableInfo: for<'a> From<&'a T>, +{ + fn from(intra_level_delta: &IntraLevelDeltaCommon) -> Self { Self { level_idx: intra_level_delta.level_idx, l0_sub_level_id: intra_level_delta.l0_sub_level_id, @@ -831,14 +875,17 @@ impl From<&IntraLevelDelta> for PbIntraLevelDelta { inserted_table_infos: intra_level_delta .inserted_table_infos .iter() - .map(|sst| sst.into()) + .map(Into::into) .collect_vec(), vnode_partition_count: intra_level_delta.vnode_partition_count, } } } -impl From<&PbIntraLevelDelta> for IntraLevelDelta { +impl From<&PbIntraLevelDelta> for IntraLevelDeltaCommon +where + T: for<'a> From<&'a PbSstableInfo>, +{ fn from(pb_intra_level_delta: &PbIntraLevelDelta) -> Self { Self { level_idx: pb_intra_level_delta.level_idx, @@ -847,7 +894,7 @@ impl From<&PbIntraLevelDelta> for IntraLevelDelta { inserted_table_infos: pb_intra_level_delta .inserted_table_infos .iter() - .map(SstableInfo::from) + .map(Into::into) .collect_vec(), vnode_partition_count: pb_intra_level_delta.vnode_partition_count, } @@ -873,100 +920,128 @@ impl IntraLevelDelta { } #[derive(Debug, PartialEq, Clone)] -pub enum GroupDelta { - IntraLevel(IntraLevelDelta), +pub enum GroupDeltaCommon { + IntraLevel(IntraLevelDeltaCommon), GroupConstruct(PbGroupConstruct), GroupDestroy(PbGroupDestroy), GroupMetaChange(PbGroupMetaChange), #[allow(dead_code)] GroupTableChange(PbGroupTableChange), + + GroupMerge(PbGroupMerge), } -impl From for GroupDelta { +pub type GroupDelta = GroupDeltaCommon; + +impl From for GroupDeltaCommon +where + T: From, +{ fn from(pb_group_delta: PbGroupDelta) -> Self { match pb_group_delta.delta_type { Some(PbDeltaType::IntraLevel(pb_intra_level_delta)) => { - GroupDelta::IntraLevel(IntraLevelDelta::from(pb_intra_level_delta)) + GroupDeltaCommon::IntraLevel(IntraLevelDeltaCommon::from(pb_intra_level_delta)) } Some(PbDeltaType::GroupConstruct(pb_group_construct)) => { - GroupDelta::GroupConstruct(pb_group_construct) + GroupDeltaCommon::GroupConstruct(pb_group_construct) } Some(PbDeltaType::GroupDestroy(pb_group_destroy)) => { - GroupDelta::GroupDestroy(pb_group_destroy) + GroupDeltaCommon::GroupDestroy(pb_group_destroy) } Some(PbDeltaType::GroupMetaChange(pb_group_meta_change)) => { - GroupDelta::GroupMetaChange(pb_group_meta_change) + GroupDeltaCommon::GroupMetaChange(pb_group_meta_change) } Some(PbDeltaType::GroupTableChange(pb_group_table_change)) => { - GroupDelta::GroupTableChange(pb_group_table_change) + GroupDeltaCommon::GroupTableChange(pb_group_table_change) + } + Some(PbDeltaType::GroupMerge(pb_group_merge)) => { + GroupDeltaCommon::GroupMerge(pb_group_merge) } None => panic!("delta_type is not set"), } } } -impl From for PbGroupDelta { - fn from(group_delta: GroupDelta) -> Self { +impl From> for PbGroupDelta +where + PbSstableInfo: From, +{ + fn from(group_delta: GroupDeltaCommon) -> Self { match group_delta { - GroupDelta::IntraLevel(intra_level_delta) => PbGroupDelta { + GroupDeltaCommon::IntraLevel(intra_level_delta) => PbGroupDelta { delta_type: Some(PbDeltaType::IntraLevel(intra_level_delta.into())), }, - GroupDelta::GroupConstruct(pb_group_construct) => PbGroupDelta { + GroupDeltaCommon::GroupConstruct(pb_group_construct) => PbGroupDelta { delta_type: Some(PbDeltaType::GroupConstruct(pb_group_construct)), }, - GroupDelta::GroupDestroy(pb_group_destroy) => PbGroupDelta { + GroupDeltaCommon::GroupDestroy(pb_group_destroy) => PbGroupDelta { delta_type: Some(PbDeltaType::GroupDestroy(pb_group_destroy)), }, - GroupDelta::GroupMetaChange(pb_group_meta_change) => PbGroupDelta { + GroupDeltaCommon::GroupMetaChange(pb_group_meta_change) => PbGroupDelta { delta_type: Some(PbDeltaType::GroupMetaChange(pb_group_meta_change)), }, - GroupDelta::GroupTableChange(pb_group_table_change) => PbGroupDelta { + GroupDeltaCommon::GroupTableChange(pb_group_table_change) => PbGroupDelta { delta_type: Some(PbDeltaType::GroupTableChange(pb_group_table_change)), }, + GroupDeltaCommon::GroupMerge(pb_group_merge) => PbGroupDelta { + delta_type: Some(PbDeltaType::GroupMerge(pb_group_merge)), + }, } } } -impl From<&GroupDelta> for PbGroupDelta { - fn from(group_delta: &GroupDelta) -> Self { +impl From<&GroupDeltaCommon> for PbGroupDelta +where + PbSstableInfo: for<'a> From<&'a T>, +{ + fn from(group_delta: &GroupDeltaCommon) -> Self { match group_delta { - GroupDelta::IntraLevel(intra_level_delta) => PbGroupDelta { + GroupDeltaCommon::IntraLevel(intra_level_delta) => PbGroupDelta { delta_type: Some(PbDeltaType::IntraLevel(intra_level_delta.into())), }, - GroupDelta::GroupConstruct(pb_group_construct) => PbGroupDelta { + GroupDeltaCommon::GroupConstruct(pb_group_construct) => PbGroupDelta { delta_type: Some(PbDeltaType::GroupConstruct(pb_group_construct.clone())), }, - GroupDelta::GroupDestroy(pb_group_destroy) => PbGroupDelta { + GroupDeltaCommon::GroupDestroy(pb_group_destroy) => PbGroupDelta { delta_type: Some(PbDeltaType::GroupDestroy(*pb_group_destroy)), }, - GroupDelta::GroupMetaChange(pb_group_meta_change) => PbGroupDelta { + GroupDeltaCommon::GroupMetaChange(pb_group_meta_change) => PbGroupDelta { delta_type: Some(PbDeltaType::GroupMetaChange(pb_group_meta_change.clone())), }, - GroupDelta::GroupTableChange(pb_group_table_change) => PbGroupDelta { + GroupDeltaCommon::GroupTableChange(pb_group_table_change) => PbGroupDelta { delta_type: Some(PbDeltaType::GroupTableChange(pb_group_table_change.clone())), }, + GroupDeltaCommon::GroupMerge(pb_group_merge) => PbGroupDelta { + delta_type: Some(PbDeltaType::GroupMerge(*pb_group_merge)), + }, } } } -impl From<&PbGroupDelta> for GroupDelta { +impl From<&PbGroupDelta> for GroupDeltaCommon +where + T: for<'a> From<&'a PbSstableInfo>, +{ fn from(pb_group_delta: &PbGroupDelta) -> Self { match &pb_group_delta.delta_type { Some(PbDeltaType::IntraLevel(pb_intra_level_delta)) => { - GroupDelta::IntraLevel(IntraLevelDelta::from(pb_intra_level_delta)) + GroupDeltaCommon::IntraLevel(IntraLevelDeltaCommon::from(pb_intra_level_delta)) } Some(PbDeltaType::GroupConstruct(pb_group_construct)) => { - GroupDelta::GroupConstruct(pb_group_construct.clone()) + GroupDeltaCommon::GroupConstruct(pb_group_construct.clone()) } Some(PbDeltaType::GroupDestroy(pb_group_destroy)) => { - GroupDelta::GroupDestroy(*pb_group_destroy) + GroupDeltaCommon::GroupDestroy(*pb_group_destroy) } Some(PbDeltaType::GroupMetaChange(pb_group_meta_change)) => { - GroupDelta::GroupMetaChange(pb_group_meta_change.clone()) + GroupDeltaCommon::GroupMetaChange(pb_group_meta_change.clone()) } Some(PbDeltaType::GroupTableChange(pb_group_table_change)) => { - GroupDelta::GroupTableChange(pb_group_table_change.clone()) + GroupDeltaCommon::GroupTableChange(pb_group_table_change.clone()) + } + Some(PbDeltaType::GroupMerge(pb_group_merge)) => { + GroupDeltaCommon::GroupMerge(*pb_group_merge) } None => panic!("delta_type is not set"), } @@ -974,24 +1049,32 @@ impl From<&PbGroupDelta> for GroupDelta { } #[derive(Debug, PartialEq, Clone, Default)] -pub struct GroupDeltas { - pub group_deltas: Vec, +pub struct GroupDeltasCommon { + pub group_deltas: Vec>, } -impl From for GroupDeltas { +pub type GroupDeltas = GroupDeltasCommon; + +impl From for GroupDeltasCommon +where + T: From, +{ fn from(pb_group_deltas: PbGroupDeltas) -> Self { Self { group_deltas: pb_group_deltas .group_deltas .into_iter() - .map(GroupDelta::from) + .map(GroupDeltaCommon::from) .collect_vec(), } } } -impl From for PbGroupDeltas { - fn from(group_deltas: GroupDeltas) -> Self { +impl From> for PbGroupDeltas +where + PbSstableInfo: From, +{ + fn from(group_deltas: GroupDeltasCommon) -> Self { Self { group_deltas: group_deltas .group_deltas @@ -1002,8 +1085,11 @@ impl From for PbGroupDeltas { } } -impl From<&GroupDeltas> for PbGroupDeltas { - fn from(group_deltas: &GroupDeltas) -> Self { +impl From<&GroupDeltasCommon> for PbGroupDeltas +where + PbSstableInfo: for<'a> From<&'a T>, +{ + fn from(group_deltas: &GroupDeltasCommon) -> Self { Self { group_deltas: group_deltas .group_deltas @@ -1014,19 +1100,25 @@ impl From<&GroupDeltas> for PbGroupDeltas { } } -impl From<&PbGroupDeltas> for GroupDeltas { +impl From<&PbGroupDeltas> for GroupDeltasCommon +where + T: for<'a> From<&'a PbSstableInfo>, +{ fn from(pb_group_deltas: &PbGroupDeltas) -> Self { Self { group_deltas: pb_group_deltas .group_deltas .iter() - .map(GroupDelta::from) + .map(GroupDeltaCommon::from) .collect_vec(), } } } -impl GroupDeltas { +impl GroupDeltasCommon +where + PbSstableInfo: for<'a> From<&'a T>, +{ pub fn to_protobuf(&self) -> PbGroupDeltas { self.into() } diff --git a/src/storage/hummock_test/src/bin/replay/main.rs b/src/storage/hummock_test/src/bin/replay/main.rs index 9181e37c992e2..7760d7ce530c6 100644 --- a/src/storage/hummock_test/src/bin/replay/main.rs +++ b/src/storage/hummock_test/src/bin/replay/main.rs @@ -31,7 +31,7 @@ use clap::Parser; use foyer::HybridCacheBuilder; use replay_impl::{get_replay_notification_client, GlobalReplayImpl}; use risingwave_common::config::{ - extract_storage_memory_config, load_config, NoOverride, ObjectStoreConfig, StorageConfig, + extract_storage_memory_config, load_config, NoOverride, ObjectStoreConfig, }; use risingwave_common::system_param::reader::SystemParamsReader; use risingwave_hummock_trace::{ @@ -46,7 +46,6 @@ use risingwave_storage::filter_key_extractor::{ use risingwave_storage::hummock::{HummockStorage, SstableStore, SstableStoreConfig}; use risingwave_storage::monitor::{CompactorMetrics, HummockStateStoreMetrics, ObjectStoreMetrics}; use risingwave_storage::opts::StorageOpts; -use serde::{Deserialize, Serialize}; // use a large offset to avoid collision with real sstables const SST_OFFSET: u64 = 2147383647000; @@ -183,8 +182,3 @@ async fn create_replay_hummock(r: Record, args: &Args) -> Result, notification_client: impl NotificationClient, hummock_manager_ref: &HummockManagerRef, - table_id: TableId, + table_ids: &[u32], ) -> HummockStorage { let remote_dir = "hummock_001_test".to_string(); let options = Arc::new(StorageOpts { @@ -117,7 +118,7 @@ pub(crate) mod tests { register_tables_with_id_for_test( hummock.filter_key_extractor_manager(), hummock_manager_ref, - &[table_id.table_id()], + table_ids, ) .await; @@ -162,6 +163,8 @@ pub(crate) mod tests { let mut local = storage .new_local(NewLocalOptions::for_test(TableId::default())) .await; + let table_id = local.table_id(); + let table_id_set = HashSet::from_iter([table_id]); // 1. add sstables let val = b"0"[..].repeat(value_size); local.init_for_test(epochs[0]).await.unwrap(); @@ -188,9 +191,14 @@ pub(crate) mod tests { } else { local.seal_current_epoch(u64::MAX, SealCurrentEpochOptions::for_test()); } - let res = storage.seal_and_sync_epoch(epoch).await.unwrap(); - - hummock_meta_client.commit_epoch(epoch, res).await.unwrap(); + let res = storage + .seal_and_sync_epoch(epoch, table_id_set.clone()) + .await + .unwrap(); + hummock_meta_client + .commit_epoch(epoch, res, false) + .await + .unwrap(); } } @@ -229,6 +237,7 @@ pub(crate) mod tests { )); // 1. add sstables + let table_id = 0; let mut key = BytesMut::default(); key.put_u16(0); key.put_slice(b"same_key"); @@ -236,7 +245,7 @@ pub(crate) mod tests { hummock_meta_client.clone(), get_notification_client_for_test(env, hummock_manager_ref.clone(), worker_node.clone()), &hummock_manager_ref, - Default::default(), + &[table_id], ) .await; let rpc_filter_key_extractor_manager = match storage.filter_key_extractor_manager().clone() @@ -284,12 +293,12 @@ pub(crate) mod tests { .collect_vec(), ) .await; + + let compaction_group_id = + get_compaction_group_id_by_table_id(hummock_manager_ref.clone(), table_id).await; // 2. get compact task while let Some(mut compact_task) = hummock_manager_ref - .get_compact_task( - StaticCompactionGroupId::StateDefault.into(), - &mut default_compaction_selector(), - ) + .get_compact_task(compaction_group_id, &mut default_compaction_selector()) .await .unwrap() { @@ -334,8 +343,7 @@ pub(crate) mod tests { // 4. get the latest version and check let version = hummock_manager_ref.get_current_version().await; - let group = - version.get_compaction_group_levels(StaticCompactionGroupId::StateDefault.into()); + let group = version.get_compaction_group_levels(compaction_group_id); // base level let output_tables = group @@ -402,11 +410,12 @@ pub(crate) mod tests { worker_node.id, )); + let table_id = 0; let storage = get_hummock_storage( hummock_meta_client.clone(), get_notification_client_for_test(env, hummock_manager_ref.clone(), worker_node.clone()), &hummock_manager_ref, - Default::default(), + &[table_id], ) .await; @@ -448,12 +457,10 @@ pub(crate) mod tests { // 2. get compact task - // 3. compact + let compaction_group_id = + get_compaction_group_id_by_table_id(hummock_manager_ref.clone(), table_id).await; while let Some(compact_task) = hummock_manager_ref - .get_compact_task( - StaticCompactionGroupId::StateDefault.into(), - &mut default_compaction_selector(), - ) + .get_compact_task(compaction_group_id, &mut default_compaction_selector()) .await .unwrap() { @@ -482,7 +489,7 @@ pub(crate) mod tests { // 4. get the latest version and check let version = hummock_manager_ref.get_current_version().await; let output_tables = version - .get_compaction_group_levels(StaticCompactionGroupId::StateDefault.into()) + .get_compaction_group_levels(compaction_group_id) .levels .iter() .flat_map(|level| level.table_infos.clone()) @@ -524,9 +531,16 @@ pub(crate) mod tests { hummock_meta_client: &Arc, storage: &HummockStorage, epoch: u64, + table_id: TableId, ) { - let res = storage.seal_and_sync_epoch(epoch).await.unwrap(); - hummock_meta_client.commit_epoch(epoch, res).await.unwrap(); + let res = storage + .seal_and_sync_epoch(epoch, HashSet::from([table_id])) + .await + .unwrap(); + hummock_meta_client + .commit_epoch(epoch, res, false) + .await + .unwrap(); } async fn prepare_data( @@ -539,8 +553,9 @@ pub(crate) mod tests { let kv_count: u16 = 128; let mut epoch = test_epoch(1); let mut local = storage.new_local(NewLocalOptions::for_test(table_id)).await; + let table_id_set = HashSet::from_iter([table_id]); - storage.start_epoch(epoch, HashSet::from_iter([table_id])); + storage.start_epoch(epoch, table_id_set); // 1. add sstables let val = Bytes::from(b"0"[..].repeat(1 << 10)); // 1024 Byte value @@ -562,7 +577,7 @@ pub(crate) mod tests { storage.start_epoch(next_epoch, HashSet::from_iter([table_id])); local.seal_current_epoch(next_epoch, SealCurrentEpochOptions::for_test()); - flush_and_commit(&hummock_meta_client, storage, epoch).await; + flush_and_commit(&hummock_meta_client, storage, epoch, table_id).await; epoch.inc_epoch(); } } @@ -604,7 +619,7 @@ pub(crate) mod tests { hummock_meta_client.clone(), get_notification_client_for_test(env, hummock_manager_ref.clone(), worker_node), &hummock_manager_ref, - TableId::from(existing_table_id), + &[existing_table_id], ) .await; @@ -616,6 +631,10 @@ pub(crate) mod tests { ) .await; + let compaction_group_id = + get_compaction_group_id_by_table_id(hummock_manager_ref.clone(), existing_table_id) + .await; + // Mimic dropping table unregister_table_ids_from_compaction_group(&hummock_manager_ref, &[existing_table_id]) .await; @@ -626,34 +645,19 @@ pub(crate) mod tests { }; // 2. get compact task and there should be none let compact_task = hummock_manager_ref - .manual_get_compact_task( - StaticCompactionGroupId::StateDefault.into(), - manual_compcation_option, - ) + .manual_get_compact_task(compaction_group_id, manual_compcation_option) .await .unwrap(); assert!(compact_task.is_none()); - // 3. get the latest version and check - let version = hummock_manager_ref.get_current_version().await; - let output_level_info = version - .get_compaction_group_levels(StaticCompactionGroupId::StateDefault.into()) - .levels - .last() - .unwrap(); - assert_eq!(0, output_level_info.total_file_size); + let current_version = hummock_manager_ref.get_current_version().await; + assert!(current_version + .get_sst_ids_by_group_id(compaction_group_id) + .collect_vec() + .is_empty()); - // 5. get compact task - let compact_task = hummock_manager_ref - .get_compact_task( - StaticCompactionGroupId::StateDefault.into(), - &mut default_compaction_selector(), - ) - .await - .unwrap(); - - assert!(compact_task.is_none()); + // assert_eq!(0, current_version.num_levels(compaction_group_id)); } #[tokio::test] @@ -679,6 +683,10 @@ pub(crate) mod tests { .new_local(NewLocalOptions::for_test(TableId::from(2))) .await; + let table_id_1 = storage_1.table_id(); + let table_id_2 = storage_2.table_id(); + let table_id_set = HashSet::from_iter([table_id_1, table_id_2]); + let rpc_filter_key_extractor_manager = match global_storage.filter_key_extractor_manager().clone() { FilterKeyExtractorManager::RpcFilterKeyExtractorManager( @@ -688,12 +696,12 @@ pub(crate) mod tests { }; rpc_filter_key_extractor_manager.update( - 1, + table_id_1.table_id(), Arc::new(FilterKeyExtractorImpl::FullKey(FullKeyFilterKeyExtractor)), ); rpc_filter_key_extractor_manager.update( - 2, + table_id_2.table_id(), Arc::new(FilterKeyExtractorImpl::FullKey(FullKeyFilterKeyExtractor)), ); let filter_key_extractor_manager = FilterKeyExtractorManager::RpcFilterKeyExtractorManager( @@ -713,13 +721,13 @@ pub(crate) mod tests { // 1. add sstables let val = Bytes::from(b"0"[..].repeat(1 << 10)); // 1024 Byte value - let drop_table_id = 1; - let existing_table_ids = 2; + let drop_table_id = table_id_1.table_id(); + let existing_table_id = table_id_2.table_id(); let kv_count: usize = 128; let mut epoch = test_epoch(1); register_table_ids_to_compaction_group( &hummock_manager_ref, - &[drop_table_id, existing_table_ids], + &[drop_table_id, existing_table_id], StaticCompactionGroupId::StateDefault.into(), ) .await; @@ -729,10 +737,10 @@ pub(crate) mod tests { .await; let vnode = VirtualNode::from_index(1); - global_storage.start_epoch(epoch, HashSet::from_iter([1.into(), 2.into()])); + global_storage.start_epoch(epoch, table_id_set.clone()); for index in 0..kv_count { let next_epoch = epoch.next_epoch(); - global_storage.start_epoch(next_epoch, HashSet::from_iter([1.into(), 2.into()])); + global_storage.start_epoch(next_epoch, table_id_set.clone()); if index == 0 { storage_1.init_for_test(epoch).await.unwrap(); storage_2.init_for_test(epoch).await.unwrap(); @@ -756,8 +764,14 @@ pub(crate) mod tests { storage.seal_current_epoch(next_epoch, SealCurrentEpochOptions::for_test()); other.seal_current_epoch(next_epoch, SealCurrentEpochOptions::for_test()); - let res = global_storage.seal_and_sync_epoch(epoch).await.unwrap(); - hummock_meta_client.commit_epoch(epoch, res).await.unwrap(); + let res = global_storage + .seal_and_sync_epoch(epoch, table_id_set.clone()) + .await + .unwrap(); + hummock_meta_client + .commit_epoch(epoch, res, false) + .await + .unwrap(); epoch.inc_epoch(); } @@ -768,12 +782,12 @@ pub(crate) mod tests { level: 0, ..Default::default() }; + let compaction_group_id = + get_compaction_group_id_by_table_id(hummock_manager_ref.clone(), existing_table_id) + .await; // 2. get compact task let mut compact_task = hummock_manager_ref - .manual_get_compact_task( - StaticCompactionGroupId::StateDefault.into(), - manual_compcation_option, - ) + .manual_get_compact_task(compaction_group_id, manual_compcation_option) .await .unwrap() .unwrap(); @@ -814,7 +828,7 @@ pub(crate) mod tests { // 5. get the latest version and check let version: HummockVersion = hummock_manager_ref.get_current_version().await; let mut tables_from_version = vec![]; - version.level_iter(StaticCompactionGroupId::StateDefault.into(), |level| { + version.level_iter(compaction_group_id, |level| { tables_from_version.extend(level.table_infos.iter().cloned()); true }); @@ -833,10 +847,7 @@ pub(crate) mod tests { // 6. get compact task and there should be none let compact_task = hummock_manager_ref - .get_compact_task( - StaticCompactionGroupId::StateDefault.into(), - &mut default_compaction_selector(), - ) + .get_compact_task(compaction_group_id, &mut default_compaction_selector()) .await .unwrap(); assert!(compact_task.is_none()); @@ -854,7 +865,7 @@ pub(crate) mod tests { epoch, None, ReadOptions { - table_id: TableId::from(existing_table_ids), + table_id: TableId::from(existing_table_id), prefetch_options: PrefetchOptions::default(), cache_policy: CachePolicy::Fill(CacheContext::Default), ..Default::default() @@ -865,7 +876,7 @@ pub(crate) mod tests { let mut scan_count = 0; for (k, _) in scan_result { let table_id = k.user_key.table_id.table_id(); - assert_eq!(table_id, existing_table_ids); + assert_eq!(table_id, existing_table_id); scan_count += 1; } assert_eq!(key_count, scan_count); @@ -885,7 +896,7 @@ pub(crate) mod tests { hummock_meta_client.clone(), get_notification_client_for_test(env, hummock_manager_ref.clone(), worker_node.clone()), &hummock_manager_ref, - TableId::from(existing_table_id), + &[existing_table_id], ) .await; @@ -906,7 +917,7 @@ pub(crate) mod tests { .sstable_id_remote_fetch_number, )); rpc_filter_key_extractor_manager.update( - 2, + existing_table_id, Arc::new(FilterKeyExtractorImpl::FullKey(FullKeyFilterKeyExtractor)), ); let filter_key_extractor_manager = FilterKeyExtractorManager::RpcFilterKeyExtractorManager( @@ -923,14 +934,15 @@ pub(crate) mod tests { let vnode = VirtualNode::from_index(1); let mut epoch_set = BTreeSet::new(); - storage.start_epoch(epoch, HashSet::from_iter([existing_table_id.into()])); + let table_id_set = HashSet::from_iter([existing_table_id.into()]); + storage.start_epoch(epoch, table_id_set.clone()); let mut local = storage .new_local(NewLocalOptions::for_test(existing_table_id.into())) .await; for i in 0..kv_count { let next_epoch = epoch + millisec_interval_epoch; - storage.start_epoch(next_epoch, HashSet::from_iter([existing_table_id.into()])); + storage.start_epoch(next_epoch, table_id_set.clone()); if i == 0 { local.init_for_test(epoch).await.unwrap(); } @@ -946,8 +958,14 @@ pub(crate) mod tests { local.flush().await.unwrap(); local.seal_current_epoch(next_epoch, SealCurrentEpochOptions::for_test()); - let res = storage.seal_and_sync_epoch(epoch).await.unwrap(); - hummock_meta_client.commit_epoch(epoch, res).await.unwrap(); + let res = storage + .seal_and_sync_epoch(epoch, table_id_set.clone()) + .await + .unwrap(); + hummock_meta_client + .commit_epoch(epoch, res, false) + .await + .unwrap(); epoch += millisec_interval_epoch; } @@ -955,12 +973,12 @@ pub(crate) mod tests { level: 0, ..Default::default() }; + let compaction_group_id = + get_compaction_group_id_by_table_id(hummock_manager_ref.clone(), existing_table_id) + .await; // 2. get compact task let mut compact_task = hummock_manager_ref - .manual_get_compact_task( - StaticCompactionGroupId::StateDefault.into(), - manual_compcation_option, - ) + .manual_get_compact_task(compaction_group_id, manual_compcation_option) .await .unwrap() .unwrap(); @@ -1013,7 +1031,7 @@ pub(crate) mod tests { // 4. get the latest version and check let version: HummockVersion = hummock_manager_ref.get_current_version().await; let mut tables_from_version = vec![]; - version.level_iter(StaticCompactionGroupId::StateDefault.into(), |level| { + version.level_iter(compaction_group_id, |level| { tables_from_version.extend(level.table_infos.iter().cloned()); true }); @@ -1033,10 +1051,7 @@ pub(crate) mod tests { // 5. get compact task and there should be none let compact_task = hummock_manager_ref - .get_compact_task( - StaticCompactionGroupId::StateDefault.into(), - &mut default_compaction_selector(), - ) + .get_compact_task(compaction_group_id, &mut default_compaction_selector()) .await .unwrap(); assert!(compact_task.is_none()); @@ -1090,7 +1105,7 @@ pub(crate) mod tests { hummock_meta_client.clone(), get_notification_client_for_test(env, hummock_manager_ref.clone(), worker_node.clone()), &hummock_manager_ref, - TableId::from(existing_table_id), + &[existing_table_id], ) .await; @@ -1131,13 +1146,14 @@ pub(crate) mod tests { let mut local = storage .new_local(NewLocalOptions::for_test(existing_table_id.into())) .await; - storage.start_epoch(epoch, HashSet::from_iter([existing_table_id.into()])); + let table_id_set = HashSet::from_iter([existing_table_id.into()]); + storage.start_epoch(epoch, table_id_set.clone()); for i in 0..kv_count { if i == 0 { local.init_for_test(epoch).await.unwrap(); } let next_epoch = epoch + millisec_interval_epoch; - storage.start_epoch(next_epoch, HashSet::from_iter([existing_table_id.into()])); + storage.start_epoch(next_epoch, table_id_set.clone()); epoch_set.insert(epoch); let ramdom_key = [key_prefix.as_ref(), &rand::thread_rng().gen::<[u8; 32]>()].concat(); @@ -1146,8 +1162,14 @@ pub(crate) mod tests { .unwrap(); local.flush().await.unwrap(); local.seal_current_epoch(next_epoch, SealCurrentEpochOptions::for_test()); - let res = storage.seal_and_sync_epoch(epoch).await.unwrap(); - hummock_meta_client.commit_epoch(epoch, res).await.unwrap(); + let res = storage + .seal_and_sync_epoch(epoch, table_id_set.clone()) + .await + .unwrap(); + hummock_meta_client + .commit_epoch(epoch, res, false) + .await + .unwrap(); epoch += millisec_interval_epoch; } @@ -1155,12 +1177,12 @@ pub(crate) mod tests { level: 0, ..Default::default() }; + let compaction_group_id = + get_compaction_group_id_by_table_id(hummock_manager_ref.clone(), existing_table_id) + .await; // 2. get compact task let mut compact_task = hummock_manager_ref - .manual_get_compact_task( - StaticCompactionGroupId::StateDefault.into(), - manual_compcation_option, - ) + .manual_get_compact_task(compaction_group_id, manual_compcation_option) .await .unwrap() .unwrap(); @@ -1205,7 +1227,7 @@ pub(crate) mod tests { // 4. get the latest version and check let version: HummockVersion = hummock_manager_ref.get_current_version().await; let tables_from_version: Vec<_> = version - .get_compaction_group_levels(StaticCompactionGroupId::StateDefault.into()) + .get_compaction_group_levels(compaction_group_id) .levels .iter() .flat_map(|level| level.table_infos.iter()) @@ -1226,10 +1248,7 @@ pub(crate) mod tests { // 5. get compact task and there should be none let compact_task = hummock_manager_ref - .get_compact_task( - StaticCompactionGroupId::StateDefault.into(), - &mut default_compaction_selector(), - ) + .get_compact_task(compaction_group_id, &mut default_compaction_selector()) .await .unwrap(); assert!(compact_task.is_none()); @@ -1290,7 +1309,7 @@ pub(crate) mod tests { hummock_meta_client.clone(), get_notification_client_for_test(env, hummock_manager_ref.clone(), worker_node.clone()), &hummock_manager_ref, - TableId::from(existing_table_id), + &[existing_table_id], ) .await; let (compact_ctx, filter_key_extractor_manager) = @@ -1324,18 +1343,24 @@ pub(crate) mod tests { // .unwrap(); local.seal_current_epoch(u64::MAX, SealCurrentEpochOptions::for_test()); - flush_and_commit(&hummock_meta_client, &storage, epoch).await; + flush_and_commit( + &hummock_meta_client, + &storage, + epoch, + existing_table_id.into(), + ) + .await; let manual_compcation_option = ManualCompactionOption { level: 0, ..Default::default() }; // 2. get compact task + let compaction_group_id = + get_compaction_group_id_by_table_id(hummock_manager_ref.clone(), existing_table_id) + .await; let mut compact_task = hummock_manager_ref - .manual_get_compact_task( - StaticCompactionGroupId::StateDefault.into(), - manual_compcation_option, - ) + .manual_get_compact_task(compaction_group_id, manual_compcation_option) .await .unwrap() .unwrap(); @@ -1376,7 +1401,7 @@ pub(crate) mod tests { // 4. get the latest version and check let version = hummock_manager_ref.get_current_version().await; let output_level_info = version - .get_compaction_group_levels(StaticCompactionGroupId::StateDefault.into()) + .get_compaction_group_levels(compaction_group_id) .levels .last() .unwrap(); @@ -1505,7 +1530,7 @@ pub(crate) mod tests { hummock_meta_client.clone(), get_notification_client_for_test(env, hummock_manager_ref.clone(), worker_node.clone()), &hummock_manager_ref, - TableId::from(existing_table_id), + &[existing_table_id], ) .await; hummock_manager_ref.get_new_sst_ids(10).await.unwrap(); @@ -1680,7 +1705,7 @@ pub(crate) mod tests { hummock_meta_client.clone(), get_notification_client_for_test(env, hummock_manager_ref.clone(), worker_node.clone()), &hummock_manager_ref, - TableId::from(existing_table_id), + &[existing_table_id], ) .await; hummock_manager_ref.get_new_sst_ids(10).await.unwrap(); @@ -1798,7 +1823,7 @@ pub(crate) mod tests { hummock_meta_client.clone(), get_notification_client_for_test(env, hummock_manager_ref.clone(), worker_node.clone()), &hummock_manager_ref, - TableId::from(existing_table_id), + &[existing_table_id], ) .await; hummock_manager_ref.get_new_sst_ids(10).await.unwrap(); @@ -1980,4 +2005,510 @@ pub(crate) mod tests { count += 1; } } + + #[tokio::test] + async fn test_split_and_merge() { + let (env, hummock_manager_ref, _cluster_manager_ref, worker_node) = + setup_compute_env(8080).await; + let hummock_meta_client: Arc = Arc::new(MockHummockMetaClient::new( + hummock_manager_ref.clone(), + worker_node.id, + )); + + let table_id_1 = TableId::from(1); + let table_id_2 = TableId::from(2); + + let storage = get_hummock_storage( + hummock_meta_client.clone(), + get_notification_client_for_test(env, hummock_manager_ref.clone(), worker_node.clone()), + &hummock_manager_ref, + &[table_id_1.table_id(), table_id_2.table_id()], + ) + .await; + + // basic cg2 -> [1, 2] + let rpc_filter_key_extractor_manager = match storage.filter_key_extractor_manager().clone() + { + FilterKeyExtractorManager::RpcFilterKeyExtractorManager( + rpc_filter_key_extractor_manager, + ) => rpc_filter_key_extractor_manager, + FilterKeyExtractorManager::StaticFilterKeyExtractorManager(_) => unreachable!(), + }; + + let mut key = BytesMut::default(); + key.put_u16(1); + key.put_slice(b"key_prefix"); + let key_prefix = key.freeze(); + + rpc_filter_key_extractor_manager.update( + table_id_1.table_id(), + Arc::new(FilterKeyExtractorImpl::FixedLength( + FixedLengthFilterKeyExtractor::new(TABLE_PREFIX_LEN + key_prefix.len()), + )), + ); + rpc_filter_key_extractor_manager.update( + table_id_2.table_id(), + Arc::new(FilterKeyExtractorImpl::FixedLength( + FixedLengthFilterKeyExtractor::new(TABLE_PREFIX_LEN + key_prefix.len()), + )), + ); + + let filter_key_extractor_manager = FilterKeyExtractorManager::RpcFilterKeyExtractorManager( + rpc_filter_key_extractor_manager, + ); + let compact_ctx = get_compactor_context(&storage); + let sstable_object_id_manager = Arc::new(SstableObjectIdManager::new( + hummock_meta_client.clone(), + storage + .storage_opts() + .clone() + .sstable_id_remote_fetch_number, + )); + + let base_epoch = Epoch::now(); + let mut epoch: u64 = base_epoch.0; + let millisec_interval_epoch: u64 = (1 << 16) * 100; + + let mut local_1 = storage + .new_local(NewLocalOptions::for_test(table_id_1)) + .await; + let mut local_2 = storage + .new_local(NewLocalOptions::for_test(table_id_2)) + .await; + + let val = Bytes::from(b"0"[..].to_vec()); + + async fn write_data( + storage: &HummockStorage, + local_1: (&mut LocalHummockStorage, bool), + local_2: (&mut LocalHummockStorage, bool), + epoch: &mut u64, + val: Bytes, + kv_count: u64, + millisec_interval_epoch: u64, + key_prefix: Bytes, + hummock_meta_client: Arc, + is_init: &mut bool, + ) { + let table_id_set = + HashSet::from_iter(vec![local_1.0.table_id(), local_2.0.table_id()].into_iter()); + + storage.start_epoch(*epoch, table_id_set.clone()); + for i in 0..kv_count { + if i == 0 && *is_init { + local_1.0.init_for_test(*epoch).await.unwrap(); + local_2.0.init_for_test(*epoch).await.unwrap(); + + *is_init = false; + } + let next_epoch = *epoch + millisec_interval_epoch; + storage.start_epoch(next_epoch, table_id_set.clone()); + + let ramdom_key = + [key_prefix.as_ref(), &rand::thread_rng().gen::<[u8; 32]>()].concat(); + + if local_1.1 { + local_1 + .0 + .insert(TableKey(Bytes::from(ramdom_key.clone())), val.clone(), None) + .unwrap(); + } + local_1.0.flush().await.unwrap(); + local_1 + .0 + .seal_current_epoch(next_epoch, SealCurrentEpochOptions::for_test()); + + if local_2.1 { + local_2 + .0 + .insert(TableKey(Bytes::from(ramdom_key.clone())), val.clone(), None) + .unwrap(); + } + local_2.0.flush().await.unwrap(); + local_2 + .0 + .seal_current_epoch(next_epoch, SealCurrentEpochOptions::for_test()); + + let res = storage + .seal_and_sync_epoch(*epoch, table_id_set.clone()) + .await + .unwrap(); + hummock_meta_client + .commit_epoch(*epoch, res, false) + .await + .unwrap(); + *epoch += millisec_interval_epoch; + } + } + + let mut is_init = true; + write_data( + &storage, + (&mut local_1, true), + (&mut local_2, true), + &mut epoch, + val.clone(), + 1, + millisec_interval_epoch, + key_prefix.clone(), + hummock_meta_client.clone(), + &mut is_init, + ) + .await; + epoch += millisec_interval_epoch; + + let parent_group_id = 2; + let split_table_ids = vec![table_id_2.table_id()]; + + async fn compact_once( + group_id: CompactionGroupId, + level: usize, + hummock_manager_ref: HummockManagerRef, + compact_ctx: CompactorContext, + filter_key_extractor_manager: FilterKeyExtractorManager, + sstable_object_id_manager: Arc, + ) { + // compact left group + let manual_compcation_option = ManualCompactionOption { + level, + ..Default::default() + }; + // 2. get compact task + let compact_task = hummock_manager_ref + .manual_get_compact_task(group_id, manual_compcation_option) + .await + .unwrap(); + + if compact_task.is_none() { + return; + } + + let mut compact_task = compact_task.unwrap(); + + let compaction_filter_flag = + CompactionFilterFlag::STATE_CLEAN | CompactionFilterFlag::TTL; + compact_task.compaction_filter_mask = compaction_filter_flag.bits(); + compact_task.current_epoch_time = hummock_manager_ref + .get_current_version() + .await + .max_committed_epoch(); + + // 3. compact + let (_tx, rx) = tokio::sync::oneshot::channel(); + let ((result_task, task_stats), _) = compact( + compact_ctx, + compact_task.clone(), + rx, + Box::new(sstable_object_id_manager.clone()), + filter_key_extractor_manager.clone(), + ) + .await; + + hummock_manager_ref + .report_compact_task( + result_task.task_id, + result_task.task_status, + result_task.sorted_output_ssts, + Some(to_prost_table_stats_map(task_stats)), + ) + .await + .unwrap(); + } + + // compact + compact_once( + parent_group_id, + 0, + hummock_manager_ref.clone(), + compact_ctx.clone(), + filter_key_extractor_manager.clone(), + sstable_object_id_manager.clone(), + ) + .await; + + let new_cg_id = hummock_manager_ref + .split_compaction_group(parent_group_id, &split_table_ids, 0) + .await + .unwrap(); + + assert_ne!(parent_group_id, new_cg_id); + assert!(hummock_manager_ref + .merge_compaction_group(parent_group_id, new_cg_id) + .await + .is_err()); + + write_data( + &storage, + (&mut local_1, true), + (&mut local_2, true), + &mut epoch, + val.clone(), + 100, + millisec_interval_epoch, + key_prefix.clone(), + hummock_meta_client.clone(), + &mut is_init, + ) + .await; + epoch += millisec_interval_epoch; + + compact_once( + parent_group_id, + 0, + hummock_manager_ref.clone(), + compact_ctx.clone(), + filter_key_extractor_manager.clone(), + sstable_object_id_manager.clone(), + ) + .await; + + compact_once( + new_cg_id, + 0, + hummock_manager_ref.clone(), + compact_ctx.clone(), + filter_key_extractor_manager.clone(), + sstable_object_id_manager.clone(), + ) + .await; + + // try merge + hummock_manager_ref + .merge_compaction_group(parent_group_id, new_cg_id) + .await + .unwrap(); + + let new_cg_id = hummock_manager_ref + .split_compaction_group(parent_group_id, &split_table_ids, 0) + .await + .unwrap(); + + compact_once( + parent_group_id, + 0, + hummock_manager_ref.clone(), + compact_ctx.clone(), + filter_key_extractor_manager.clone(), + sstable_object_id_manager.clone(), + ) + .await; + + compact_once( + new_cg_id, + 0, + hummock_manager_ref.clone(), + compact_ctx.clone(), + filter_key_extractor_manager.clone(), + sstable_object_id_manager.clone(), + ) + .await; + + // write left + write_data( + &storage, + (&mut local_1, true), + (&mut local_2, false), + &mut epoch, + val.clone(), + 16, + millisec_interval_epoch, + key_prefix.clone(), + hummock_meta_client.clone(), + &mut is_init, + ) + .await; + + epoch += millisec_interval_epoch; + + // try merge + hummock_manager_ref + .merge_compaction_group(parent_group_id, new_cg_id) + .await + .unwrap(); + + // compact + compact_once( + parent_group_id, + 0, + hummock_manager_ref.clone(), + compact_ctx.clone(), + filter_key_extractor_manager.clone(), + sstable_object_id_manager.clone(), + ) + .await; + + // try split + let new_cg_id = hummock_manager_ref + .split_compaction_group(parent_group_id, &split_table_ids, 0) + .await + .unwrap(); + + // write right + write_data( + &storage, + (&mut local_1, false), + (&mut local_2, true), + &mut epoch, + val.clone(), + 16, + millisec_interval_epoch, + key_prefix.clone(), + hummock_meta_client.clone(), + &mut is_init, + ) + .await; + + epoch += millisec_interval_epoch; + + hummock_manager_ref + .merge_compaction_group(parent_group_id, new_cg_id) + .await + .unwrap(); + + // write left and right + + write_data( + &storage, + (&mut local_1, true), + (&mut local_2, true), + &mut epoch, + val.clone(), + 1, + millisec_interval_epoch, + key_prefix.clone(), + hummock_meta_client.clone(), + &mut is_init, + ) + .await; + + epoch += millisec_interval_epoch; + + compact_once( + parent_group_id, + 0, + hummock_manager_ref.clone(), + compact_ctx.clone(), + filter_key_extractor_manager.clone(), + sstable_object_id_manager.clone(), + ) + .await; + + compact_once( + new_cg_id, + 0, + hummock_manager_ref.clone(), + compact_ctx.clone(), + filter_key_extractor_manager.clone(), + sstable_object_id_manager.clone(), + ) + .await; + + async fn compact_all( + group_id: CompactionGroupId, + level: usize, + hummock_manager_ref: HummockManagerRef, + compact_ctx: CompactorContext, + filter_key_extractor_manager: FilterKeyExtractorManager, + sstable_object_id_manager: Arc, + ) { + loop { + let manual_compcation_option = ManualCompactionOption { + level, + ..Default::default() + }; + let compact_task = hummock_manager_ref + .manual_get_compact_task(group_id, manual_compcation_option) + .await + .unwrap(); + + if compact_task.is_none() { + break; + } + + let mut compact_task = compact_task.unwrap(); + let compaction_filter_flag = + CompactionFilterFlag::STATE_CLEAN | CompactionFilterFlag::TTL; + compact_task.compaction_filter_mask = compaction_filter_flag.bits(); + compact_task.current_epoch_time = hummock_manager_ref + .get_current_version() + .await + .max_committed_epoch(); + + // 3. compact + let (_tx, rx) = tokio::sync::oneshot::channel(); + let ((result_task, task_stats), _) = compact( + compact_ctx.clone(), + compact_task.clone(), + rx, + Box::new(sstable_object_id_manager.clone()), + filter_key_extractor_manager.clone(), + ) + .await; + + hummock_manager_ref + .report_compact_task( + result_task.task_id, + result_task.task_status, + result_task.sorted_output_ssts, + Some(to_prost_table_stats_map(task_stats)), + ) + .await + .unwrap(); + } + } + + // try split + let new_cg_id = hummock_manager_ref + .split_compaction_group(parent_group_id, &split_table_ids, 0) + .await + .unwrap(); + + // try merge + assert!(hummock_manager_ref + .merge_compaction_group(parent_group_id, new_cg_id) + .await + .is_err()); + + // write left and write + + write_data( + &storage, + (&mut local_1, true), + (&mut local_2, true), + &mut epoch, + val.clone(), + 200, + millisec_interval_epoch, + key_prefix.clone(), + hummock_meta_client.clone(), + &mut is_init, + ) + .await; + + compact_all( + parent_group_id, + 0, + hummock_manager_ref.clone(), + compact_ctx.clone(), + filter_key_extractor_manager.clone(), + sstable_object_id_manager.clone(), + ) + .await; + + compact_all( + new_cg_id, + 0, + hummock_manager_ref.clone(), + compact_ctx.clone(), + filter_key_extractor_manager.clone(), + sstable_object_id_manager.clone(), + ) + .await; + + // try merge + hummock_manager_ref + .merge_compaction_group(parent_group_id, new_cg_id) + .await + .unwrap(); + } } diff --git a/src/storage/hummock_test/src/failpoint_tests.rs b/src/storage/hummock_test/src/failpoint_tests.rs index 240c07cd82c4b..27072abba08f2 100644 --- a/src/storage/hummock_test/src/failpoint_tests.rs +++ b/src/storage/hummock_test/src/failpoint_tests.rs @@ -12,6 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. +use std::collections::HashSet; use std::ops::Bound; use std::sync::Arc; @@ -140,8 +141,12 @@ async fn test_failpoints_state_store_read_upload() { ); // sync epoch1 test the read_error - let res = hummock_storage.seal_and_sync_epoch(1).await.unwrap(); - meta_client.commit_epoch(1, res).await.unwrap(); + let table_id_set = HashSet::from_iter([local.table_id()]); + let res = hummock_storage + .seal_and_sync_epoch(1, table_id_set.clone()) + .await + .unwrap(); + meta_client.commit_epoch(1, res, false).await.unwrap(); hummock_storage .try_wait_epoch(HummockReadEpoch::Committed(1)) .await @@ -208,12 +213,17 @@ async fn test_failpoints_state_store_read_upload() { // test the upload_error fail::cfg(mem_upload_err, "return").unwrap(); - let result = hummock_storage.seal_and_sync_epoch(3).await; + let result = hummock_storage + .seal_and_sync_epoch(3, table_id_set.clone()) + .await; assert!(result.is_err()); fail::remove(mem_upload_err); - let res = hummock_storage.seal_and_sync_epoch(3).await.unwrap(); - meta_client.commit_epoch(3, res).await.unwrap(); + let res = hummock_storage + .seal_and_sync_epoch(3, table_id_set) + .await + .unwrap(); + meta_client.commit_epoch(3, res, false).await.unwrap(); hummock_storage .try_wait_epoch(HummockReadEpoch::Committed(3)) .await diff --git a/src/storage/hummock_test/src/hummock_storage_tests.rs b/src/storage/hummock_test/src/hummock_storage_tests.rs index fc0fd6ae97b4f..18bad67a62570 100644 --- a/src/storage/hummock_test/src/hummock_storage_tests.rs +++ b/src/storage/hummock_test/src/hummock_storage_tests.rs @@ -461,6 +461,7 @@ async fn test_storage_basic() { #[tokio::test] async fn test_state_store_sync() { const TEST_TABLE_ID: TableId = TableId { table_id: 233 }; + let table_id_set = HashSet::from_iter([TEST_TABLE_ID]); let test_env = prepare_hummock_test_env().await; test_env.register_table_id(TEST_TABLE_ID).await; let mut hummock_storage = test_env @@ -557,10 +558,14 @@ async fn test_state_store_sync() { .start_epoch(epoch3, HashSet::from_iter([TEST_TABLE_ID])); hummock_storage.seal_current_epoch(epoch3, SealCurrentEpochOptions::for_test()); - let res = test_env.storage.seal_and_sync_epoch(epoch1).await.unwrap(); + let res = test_env + .storage + .seal_and_sync_epoch(epoch1, table_id_set.clone()) + .await + .unwrap(); test_env .meta_client - .commit_epoch(epoch1, res) + .commit_epoch(epoch1, res, false) .await .unwrap(); test_env.storage.try_wait_epoch_for_test(epoch1).await; @@ -599,10 +604,14 @@ async fn test_state_store_sync() { } } - let res = test_env.storage.seal_and_sync_epoch(epoch2).await.unwrap(); + let res = test_env + .storage + .seal_and_sync_epoch(epoch2, table_id_set.clone()) + .await + .unwrap(); test_env .meta_client - .commit_epoch(epoch2, res) + .commit_epoch(epoch2, res, false) .await .unwrap(); test_env.storage.try_wait_epoch_for_test(epoch2).await; @@ -819,6 +828,7 @@ async fn test_state_store_sync() { #[tokio::test] async fn test_delete_get() { const TEST_TABLE_ID: TableId = TableId { table_id: 233 }; + let table_id_set = HashSet::from_iter([TEST_TABLE_ID]); let test_env = prepare_hummock_test_env().await; test_env.register_table_id(TEST_TABLE_ID).await; let mut hummock_storage = test_env @@ -864,10 +874,14 @@ async fn test_delete_get() { .storage .start_epoch(epoch2, HashSet::from_iter([TEST_TABLE_ID])); hummock_storage.seal_current_epoch(epoch2, SealCurrentEpochOptions::for_test()); - let res = test_env.storage.seal_and_sync_epoch(epoch1).await.unwrap(); + let res = test_env + .storage + .seal_and_sync_epoch(epoch1, table_id_set.clone()) + .await + .unwrap(); test_env .meta_client - .commit_epoch(epoch1, res) + .commit_epoch(epoch1, res, false) .await .unwrap(); @@ -886,10 +900,14 @@ async fn test_delete_get() { .await .unwrap(); hummock_storage.seal_current_epoch(u64::MAX, SealCurrentEpochOptions::for_test()); - let res = test_env.storage.seal_and_sync_epoch(epoch2).await.unwrap(); + let res = test_env + .storage + .seal_and_sync_epoch(epoch2, table_id_set) + .await + .unwrap(); test_env .meta_client - .commit_epoch(epoch2, res) + .commit_epoch(epoch2, res, false) .await .unwrap(); test_env.storage.try_wait_epoch_for_test(epoch2).await; @@ -912,6 +930,7 @@ async fn test_delete_get() { #[tokio::test] async fn test_multiple_epoch_sync() { const TEST_TABLE_ID: TableId = TableId { table_id: 233 }; + let table_id_set = HashSet::from_iter([TEST_TABLE_ID]); let test_env = prepare_hummock_test_env().await; test_env.register_table_id(TEST_TABLE_ID).await; let mut hummock_storage = test_env @@ -1054,19 +1073,27 @@ async fn test_multiple_epoch_sync() { .storage .start_epoch(epoch4, HashSet::from_iter([TEST_TABLE_ID])); hummock_storage.seal_current_epoch(epoch4, SealCurrentEpochOptions::for_test()); - let sync_result2 = test_env.storage.seal_and_sync_epoch(epoch2).await.unwrap(); - let sync_result3 = test_env.storage.seal_and_sync_epoch(epoch3).await.unwrap(); + let sync_result2 = test_env + .storage + .seal_and_sync_epoch(epoch2, table_id_set.clone()) + .await + .unwrap(); + let sync_result3 = test_env + .storage + .seal_and_sync_epoch(epoch3, table_id_set) + .await + .unwrap(); test_get().await; test_env .meta_client - .commit_epoch(epoch2, sync_result2) + .commit_epoch(epoch2, sync_result2, false) .await .unwrap(); test_env .meta_client - .commit_epoch(epoch3, sync_result3) + .commit_epoch(epoch3, sync_result3, false) .await .unwrap(); test_env.storage.try_wait_epoch_for_test(epoch3).await; @@ -1076,6 +1103,7 @@ async fn test_multiple_epoch_sync() { #[tokio::test] async fn test_iter_with_min_epoch() { const TEST_TABLE_ID: TableId = TableId { table_id: 233 }; + let table_id_set = HashSet::from_iter([TEST_TABLE_ID]); let test_env = prepare_hummock_test_env().await; test_env.register_table_id(TEST_TABLE_ID).await; let mut hummock_storage = test_env @@ -1222,16 +1250,24 @@ async fn test_iter_with_min_epoch() { { // test after sync - let sync_result1 = test_env.storage.seal_and_sync_epoch(epoch1).await.unwrap(); - let sync_result2 = test_env.storage.seal_and_sync_epoch(epoch2).await.unwrap(); + let sync_result1 = test_env + .storage + .seal_and_sync_epoch(epoch1, table_id_set.clone()) + .await + .unwrap(); + let sync_result2 = test_env + .storage + .seal_and_sync_epoch(epoch2, table_id_set) + .await + .unwrap(); test_env .meta_client - .commit_epoch(epoch1, sync_result1) + .commit_epoch(epoch1, sync_result1, false) .await .unwrap(); test_env .meta_client - .commit_epoch(epoch2, sync_result2) + .commit_epoch(epoch2, sync_result2, false) .await .unwrap(); test_env.storage.try_wait_epoch_for_test(epoch2).await; @@ -1320,6 +1356,7 @@ async fn test_iter_with_min_epoch() { #[tokio::test] async fn test_hummock_version_reader() { const TEST_TABLE_ID: TableId = TableId { table_id: 233 }; + let table_id_set = HashSet::from_iter([TEST_TABLE_ID]); let test_env = prepare_hummock_test_env().await; test_env.register_table_id(TEST_TABLE_ID).await; let mut hummock_storage = test_env @@ -1514,26 +1551,38 @@ async fn test_hummock_version_reader() { } { - let sync_result1 = test_env.storage.seal_and_sync_epoch(epoch1).await.unwrap(); + let sync_result1 = test_env + .storage + .seal_and_sync_epoch(epoch1, table_id_set.clone()) + .await + .unwrap(); test_env .meta_client - .commit_epoch(epoch1, sync_result1) + .commit_epoch(epoch1, sync_result1, false) .await .unwrap(); test_env.storage.try_wait_epoch_for_test(epoch1).await; - let sync_result2 = test_env.storage.seal_and_sync_epoch(epoch2).await.unwrap(); + let sync_result2 = test_env + .storage + .seal_and_sync_epoch(epoch2, table_id_set.clone()) + .await + .unwrap(); test_env .meta_client - .commit_epoch(epoch2, sync_result2) + .commit_epoch(epoch2, sync_result2, false) .await .unwrap(); test_env.storage.try_wait_epoch_for_test(epoch2).await; - let sync_result3 = test_env.storage.seal_and_sync_epoch(epoch3).await.unwrap(); + let sync_result3 = test_env + .storage + .seal_and_sync_epoch(epoch3, table_id_set) + .await + .unwrap(); test_env .meta_client - .commit_epoch(epoch3, sync_result3) + .commit_epoch(epoch3, sync_result3, false) .await .unwrap(); test_env.storage.try_wait_epoch_for_test(epoch3).await; @@ -1764,6 +1813,7 @@ async fn test_hummock_version_reader() { #[tokio::test] async fn test_get_with_min_epoch() { const TEST_TABLE_ID: TableId = TableId { table_id: 233 }; + let table_id_set = HashSet::from_iter([TEST_TABLE_ID]); let test_env = prepare_hummock_test_env().await; test_env.register_table_id(TEST_TABLE_ID).await; let mut hummock_storage = test_env @@ -1908,16 +1958,24 @@ async fn test_get_with_min_epoch() { // test after sync - let sync_result1 = test_env.storage.seal_and_sync_epoch(epoch1).await.unwrap(); - let sync_result2 = test_env.storage.seal_and_sync_epoch(epoch2).await.unwrap(); + let sync_result1 = test_env + .storage + .seal_and_sync_epoch(epoch1, table_id_set.clone()) + .await + .unwrap(); + let sync_result2 = test_env + .storage + .seal_and_sync_epoch(epoch2, table_id_set) + .await + .unwrap(); test_env .meta_client - .commit_epoch(epoch1, sync_result1) + .commit_epoch(epoch1, sync_result1, false) .await .unwrap(); test_env .meta_client - .commit_epoch(epoch2, sync_result2) + .commit_epoch(epoch2, sync_result2, false) .await .unwrap(); diff --git a/src/storage/hummock_test/src/snapshot_tests.rs b/src/storage/hummock_test/src/snapshot_tests.rs index bde3c046ed6ca..b15e8a3fa372c 100644 --- a/src/storage/hummock_test/src/snapshot_tests.rs +++ b/src/storage/hummock_test/src/snapshot_tests.rs @@ -139,10 +139,13 @@ async fn test_snapshot_inner( hummock_storage.start_epoch(epoch2, HashSet::from_iter([Default::default()])); local.seal_current_epoch(epoch2, SealCurrentEpochOptions::for_test()); if enable_sync { - let res = hummock_storage.seal_and_sync_epoch(epoch1).await.unwrap(); + let res = hummock_storage + .seal_and_sync_epoch(epoch1, HashSet::from_iter([local.table_id()])) + .await + .unwrap(); if enable_commit { mock_hummock_meta_client - .commit_epoch(epoch1, res) + .commit_epoch(epoch1, res, false) .await .unwrap(); hummock_storage @@ -180,10 +183,13 @@ async fn test_snapshot_inner( hummock_storage.start_epoch(epoch3, HashSet::from_iter([Default::default()])); local.seal_current_epoch(epoch3, SealCurrentEpochOptions::for_test()); if enable_sync { - let res = hummock_storage.seal_and_sync_epoch(epoch2).await.unwrap(); + let res = hummock_storage + .seal_and_sync_epoch(epoch2, HashSet::from_iter([local.table_id()])) + .await + .unwrap(); if enable_commit { mock_hummock_meta_client - .commit_epoch(epoch2, res) + .commit_epoch(epoch2, res, false) .await .unwrap(); hummock_storage @@ -220,10 +226,13 @@ async fn test_snapshot_inner( .unwrap(); local.seal_current_epoch(u64::MAX, SealCurrentEpochOptions::for_test()); if enable_sync { - let res = hummock_storage.seal_and_sync_epoch(epoch3).await.unwrap(); + let res = hummock_storage + .seal_and_sync_epoch(epoch3, HashSet::from_iter([local.table_id()])) + .await + .unwrap(); if enable_commit { mock_hummock_meta_client - .commit_epoch(epoch3, res) + .commit_epoch(epoch3, res, false) .await .unwrap(); hummock_storage @@ -279,10 +288,13 @@ async fn test_snapshot_range_scan_inner( .unwrap(); local.seal_current_epoch(u64::MAX, SealCurrentEpochOptions::for_test()); if enable_sync { - let res = hummock_storage.seal_and_sync_epoch(epoch).await.unwrap(); + let res = hummock_storage + .seal_and_sync_epoch(epoch, HashSet::from_iter([local.table_id()])) + .await + .unwrap(); if enable_commit { mock_hummock_meta_client - .commit_epoch(epoch, res) + .commit_epoch(epoch, res, false) .await .unwrap(); hummock_storage diff --git a/src/storage/hummock_test/src/state_store_tests.rs b/src/storage/hummock_test/src/state_store_tests.rs index 67da2150735af..ab1e84aca2a66 100644 --- a/src/storage/hummock_test/src/state_store_tests.rs +++ b/src/storage/hummock_test/src/state_store_tests.rs @@ -375,8 +375,15 @@ async fn test_basic_v2() { .unwrap(); let len = count_stream(iter).await; assert_eq!(len, 4); - let res = hummock_storage.seal_and_sync_epoch(epoch1).await.unwrap(); - meta_client.commit_epoch(epoch1, res).await.unwrap(); + let res = hummock_storage + .seal_and_sync_epoch(epoch1, HashSet::from_iter([local.table_id()])) + .await + .unwrap(); + let is_log_store = false; + meta_client + .commit_epoch(epoch1, res, is_log_store) + .await + .unwrap(); hummock_storage .try_wait_epoch(HummockReadEpoch::Committed(epoch1)) .await @@ -516,11 +523,15 @@ async fn test_state_store_sync_v2() { local.seal_current_epoch(u64::MAX, SealCurrentEpochOptions::for_test()); // trigger a sync + let table_id_set = HashSet::from_iter([local.table_id()]); + hummock_storage + .seal_and_sync_epoch(epoch.prev_epoch(), table_id_set.clone()) + .await + .unwrap(); hummock_storage - .seal_and_sync_epoch(epoch.prev_epoch()) + .seal_and_sync_epoch(epoch, table_id_set) .await .unwrap(); - hummock_storage.seal_and_sync_epoch(epoch).await.unwrap(); // TODO: Uncomment the following lines after flushed sstable can be accessed. // FYI: https://github.com/risingwavelabs/risingwave/pull/1928#discussion_r852698719 @@ -1056,8 +1067,16 @@ async fn test_delete_get_v2() { hummock_storage.start_epoch(epoch2, HashSet::from_iter([Default::default()])); local.seal_current_epoch(epoch2, SealCurrentEpochOptions::for_test()); - let res = hummock_storage.seal_and_sync_epoch(epoch1).await.unwrap(); - meta_client.commit_epoch(epoch1, res).await.unwrap(); + let table_id_set = HashSet::from_iter([local.table_id()]); + let res = hummock_storage + .seal_and_sync_epoch(epoch1, table_id_set.clone()) + .await + .unwrap(); + let is_log_store = false; + meta_client + .commit_epoch(epoch1, res, is_log_store) + .await + .unwrap(); let batch2 = vec![( gen_key_from_str(VirtualNode::ZERO, "bb"), @@ -1074,8 +1093,14 @@ async fn test_delete_get_v2() { .await .unwrap(); local.seal_current_epoch(u64::MAX, SealCurrentEpochOptions::for_test()); - let res = hummock_storage.seal_and_sync_epoch(epoch2).await.unwrap(); - meta_client.commit_epoch(epoch2, res).await.unwrap(); + let res = hummock_storage + .seal_and_sync_epoch(epoch2, table_id_set) + .await + .unwrap(); + meta_client + .commit_epoch(epoch2, res, is_log_store) + .await + .unwrap(); hummock_storage .try_wait_epoch(HummockReadEpoch::Committed(epoch2)) .await @@ -1114,6 +1139,7 @@ async fn test_multiple_epoch_sync_v2() { let mut local = hummock_storage .new_local(NewLocalOptions::for_test(TableId::default())) .await; + let table_id_set = HashSet::from_iter([local.table_id()]); hummock_storage.start_epoch(epoch1, HashSet::from_iter([Default::default()])); local.init_for_test(epoch1).await.unwrap(); local @@ -1217,17 +1243,23 @@ async fn test_multiple_epoch_sync_v2() { } }; test_get().await; - let sync_result2 = hummock_storage.seal_and_sync_epoch(epoch2).await.unwrap(); - let sync_result3 = hummock_storage.seal_and_sync_epoch(epoch3).await.unwrap(); + let sync_result2 = hummock_storage + .seal_and_sync_epoch(epoch2, table_id_set.clone()) + .await + .unwrap(); + let sync_result3 = hummock_storage + .seal_and_sync_epoch(epoch3, table_id_set) + .await + .unwrap(); test_get().await; meta_client - .commit_epoch(epoch2, sync_result2) + .commit_epoch(epoch2, sync_result2, false) .await .unwrap(); meta_client - .commit_epoch(epoch3, sync_result3) + .commit_epoch(epoch3, sync_result3, false) .await .unwrap(); hummock_storage @@ -1251,6 +1283,7 @@ async fn test_gc_watermark_and_clear_shared_buffer() { let mut local_hummock_storage = hummock_storage .new_local(NewLocalOptions::for_test(Default::default())) .await; + let table_id_set = HashSet::from_iter([local_hummock_storage.table_id()]); let initial_epoch = hummock_storage.get_pinned_version().max_committed_epoch(); let epoch1 = initial_epoch.next_epoch(); @@ -1305,7 +1338,10 @@ async fn test_gc_watermark_and_clear_shared_buffer() { .unwrap() }; local_hummock_storage.seal_current_epoch(u64::MAX, SealCurrentEpochOptions::for_test()); - let sync_result1 = hummock_storage.seal_and_sync_epoch(epoch1).await.unwrap(); + let sync_result1 = hummock_storage + .seal_and_sync_epoch(epoch1, table_id_set.clone()) + .await + .unwrap(); let min_object_id_epoch1 = min_object_id(&sync_result1); assert_eq!( hummock_storage @@ -1313,7 +1349,10 @@ async fn test_gc_watermark_and_clear_shared_buffer() { .global_watermark_object_id(), min_object_id_epoch1, ); - let sync_result2 = hummock_storage.seal_and_sync_epoch(epoch2).await.unwrap(); + let sync_result2 = hummock_storage + .seal_and_sync_epoch(epoch2, table_id_set) + .await + .unwrap(); let min_object_id_epoch2 = min_object_id(&sync_result2); assert_eq!( hummock_storage @@ -1322,7 +1361,7 @@ async fn test_gc_watermark_and_clear_shared_buffer() { min_object_id_epoch1, ); meta_client - .commit_epoch(epoch1, sync_result1) + .commit_epoch(epoch1, sync_result1, false) .await .unwrap(); hummock_storage @@ -1555,12 +1594,13 @@ async fn test_iter_log() { hummock_storage.start_epoch(MAX_EPOCH, HashSet::from_iter([table_id])); let in_memory_state_store = MemoryStateStore::new(); + let is_log_store = true; let mut in_memory_local = in_memory_state_store .new_local(NewLocalOptions { table_id, op_consistency_level: OpConsistencyLevel::ConsistentOldValue { check_old_value: CHECK_BYTES_EQUAL.clone(), - is_log_store: true, + is_log_store, }, table_option: Default::default(), is_replicated: false, @@ -1575,7 +1615,7 @@ async fn test_iter_log() { table_id, op_consistency_level: OpConsistencyLevel::ConsistentOldValue { check_old_value: CHECK_BYTES_EQUAL.clone(), - is_log_store: true, + is_log_store, }, table_option: Default::default(), is_replicated: false, @@ -1585,13 +1625,17 @@ async fn test_iter_log() { // flush for about 10 times per epoch apply_test_log_data(test_log_data.clone(), &mut hummock_local, 0.001).await; + let table_id_set = HashSet::from_iter([table_id]); for (epoch, _) in &test_log_data { - let res = hummock_storage.seal_and_sync_epoch(*epoch).await.unwrap(); + let res = hummock_storage + .seal_and_sync_epoch(*epoch, table_id_set.clone()) + .await + .unwrap(); if *epoch != test_log_data[0].0 { assert!(!res.old_value_ssts.is_empty()); } assert!(!res.uncommitted_ssts.is_empty()); - meta_client.commit_epoch(*epoch, res).await.unwrap(); + meta_client.commit_epoch(*epoch, res, true).await.unwrap(); } hummock_storage diff --git a/src/storage/hummock_test/src/sync_point_tests.rs b/src/storage/hummock_test/src/sync_point_tests.rs index f5ee41783813d..84cdf5513cdeb 100644 --- a/src/storage/hummock_test/src/sync_point_tests.rs +++ b/src/storage/hummock_test/src/sync_point_tests.rs @@ -242,7 +242,7 @@ async fn test_syncpoints_get_in_delete_range_boundary() { hummock_meta_client.clone(), get_notification_client_for_test(env, hummock_manager_ref.clone(), worker_node.clone()), &hummock_manager_ref, - TableId::from(existing_table_id), + &[existing_table_id], ) .await; let (compact_ctx, filter_key_extractor_manager) = @@ -302,7 +302,13 @@ async fn test_syncpoints_get_in_delete_range_boundary() { test_epoch(101), risingwave_storage::store::SealCurrentEpochOptions::for_test(), ); - flush_and_commit(&hummock_meta_client, &storage, test_epoch(100)).await; + flush_and_commit( + &hummock_meta_client, + &storage, + test_epoch(100), + local.table_id(), + ) + .await; compact_once( hummock_manager_ref.clone(), compact_ctx.clone(), @@ -337,7 +343,13 @@ async fn test_syncpoints_get_in_delete_range_boundary() { test_epoch(102), risingwave_storage::store::SealCurrentEpochOptions::for_test(), ); - flush_and_commit(&hummock_meta_client, &storage, test_epoch(101)).await; + flush_and_commit( + &hummock_meta_client, + &storage, + test_epoch(101), + local.table_id(), + ) + .await; compact_once( hummock_manager_ref.clone(), compact_ctx.clone(), @@ -372,7 +384,13 @@ async fn test_syncpoints_get_in_delete_range_boundary() { test_epoch(103), risingwave_storage::store::SealCurrentEpochOptions::for_test(), ); - flush_and_commit(&hummock_meta_client, &storage, test_epoch(102)).await; + flush_and_commit( + &hummock_meta_client, + &storage, + test_epoch(102), + local.table_id(), + ) + .await; // move this two file to the same level. compact_once( hummock_manager_ref.clone(), @@ -401,7 +419,13 @@ async fn test_syncpoints_get_in_delete_range_boundary() { u64::MAX, risingwave_storage::store::SealCurrentEpochOptions::for_test(), ); - flush_and_commit(&hummock_meta_client, &storage, test_epoch(103)).await; + flush_and_commit( + &hummock_meta_client, + &storage, + test_epoch(103), + local.table_id(), + ) + .await; // move this two file to the same level. compact_once( hummock_manager_ref.clone(), diff --git a/src/storage/hummock_test/src/test_utils.rs b/src/storage/hummock_test/src/test_utils.rs index bf5c4a8dd8d8c..da861ff92810c 100644 --- a/src/storage/hummock_test/src/test_utils.rs +++ b/src/storage/hummock_test/src/test_utils.rs @@ -243,8 +243,24 @@ impl HummockTestEnv { // Seal, sync and commit a epoch. // On completion of this function call, the provided epoch should be committed and visible. pub async fn commit_epoch(&self, epoch: u64) { - let res = self.storage.seal_and_sync_epoch(epoch).await.unwrap(); - self.meta_client.commit_epoch(epoch, res).await.unwrap(); + let table_ids = self + .manager + .get_current_version() + .await + .state_table_info + .info() + .keys() + .cloned() + .collect(); + let res = self + .storage + .seal_and_sync_epoch(epoch, table_ids) + .await + .unwrap(); + self.meta_client + .commit_epoch(epoch, res, false) + .await + .unwrap(); self.storage.try_wait_epoch_for_test(epoch).await; } diff --git a/src/storage/hummock_trace/src/opts.rs b/src/storage/hummock_trace/src/opts.rs index 5d480cca96b58..ff8b43c15c458 100644 --- a/src/storage/hummock_trace/src/opts.rs +++ b/src/storage/hummock_trace/src/opts.rs @@ -109,7 +109,7 @@ pub struct TracedReadOptions { pub retention_seconds: Option, pub table_id: TracedTableId, pub read_version_from_backup: bool, - pub read_version_from_time_travel: bool, + pub read_committed: bool, } impl TracedReadOptions { @@ -125,7 +125,7 @@ impl TracedReadOptions { retention_seconds: None, table_id: TracedTableId { table_id }, read_version_from_backup: false, - read_version_from_time_travel: false, + read_committed: false, } } } diff --git a/src/storage/src/hummock/event_handler/hummock_event_handler.rs b/src/storage/src/hummock/event_handler/hummock_event_handler.rs index f2aa2ea7fd88d..1c8abc78ddffc 100644 --- a/src/storage/src/hummock/event_handler/hummock_event_handler.rs +++ b/src/storage/src/hummock/event_handler/hummock_event_handler.rs @@ -50,6 +50,7 @@ use crate::hummock::event_handler::{ ReadOnlyRwLockRef, }; use crate::hummock::local_version::pinned_version::PinnedVersion; +use crate::hummock::local_version::recent_versions::RecentVersions; use crate::hummock::store::version::{ HummockReadVersion, StagingData, StagingSstableInfo, VersionUpdate, }; @@ -197,7 +198,7 @@ pub struct HummockEventHandler { local_read_version_mapping: HashMap, version_update_notifier_tx: Arc>, - pinned_version: Arc>, + recent_versions: Arc>, write_conflict_detector: Option>, uploader: HummockUploader, @@ -355,7 +356,10 @@ impl HummockEventHandler { hummock_event_rx, version_update_rx, version_update_notifier_tx, - pinned_version: Arc::new(ArcSwap::from_pointee(pinned_version)), + recent_versions: Arc::new(ArcSwap::from_pointee(RecentVersions::new( + pinned_version, + storage_opts.max_cached_recent_versions_number, + ))), write_conflict_detector, read_version_mapping, local_read_version_mapping: Default::default(), @@ -371,8 +375,8 @@ impl HummockEventHandler { self.version_update_notifier_tx.clone() } - pub fn pinned_version(&self) -> Arc> { - self.pinned_version.clone() + pub fn recent_versions(&self) -> Arc> { + self.recent_versions.clone() } pub fn read_version_mapping(&self) -> ReadOnlyReadVersionMapping { @@ -529,17 +533,18 @@ impl HummockEventHandler { .await .expect("should not be empty"); let prev_version_id = latest_version_ref.id(); - let new_version = Self::resolve_version_update_info( + if let Some(new_version) = Self::resolve_version_update_info( latest_version_ref.clone(), version_update, None, - ); - info!( - ?prev_version_id, - new_version_id = ?new_version.id(), - "recv new version" - ); - latest_version = Some(new_version); + ) { + info!( + ?prev_version_id, + new_version_id = ?new_version.id(), + "recv new version" + ); + latest_version = Some(new_version); + } } self.apply_version_update( @@ -582,21 +587,21 @@ impl HummockEventHandler { .unwrap_or_else(|| self.uploader.hummock_version().clone()); let mut sst_delta_infos = vec![]; - let new_pinned_version = Self::resolve_version_update_info( + if let Some(new_pinned_version) = Self::resolve_version_update_info( pinned_version.clone(), version_payload, Some(&mut sst_delta_infos), - ); - - self.refiller - .start_cache_refill(sst_delta_infos, pinned_version, new_pinned_version); + ) { + self.refiller + .start_cache_refill(sst_delta_infos, pinned_version, new_pinned_version); + } } fn resolve_version_update_info( pinned_version: PinnedVersion, version_payload: HummockVersionUpdate, mut sst_delta_infos: Option<&mut Vec>, - ) -> PinnedVersion { + ) -> Option { let newly_pinned_version = match version_payload { HummockVersionUpdate::VersionDeltas(version_deltas) => { let mut version_to_apply = pinned_version.version().clone(); @@ -629,8 +634,9 @@ impl HummockEventHandler { .metrics .event_handler_on_apply_version_update .start_timer(); - self.pinned_version - .store(Arc::new(new_pinned_version.clone())); + self.recent_versions.rcu(|prev_recent_versions| { + prev_recent_versions.with_new_version(new_pinned_version.clone()) + }); { self.for_each_read_version( @@ -663,7 +669,10 @@ impl HummockEventHandler { // TODO: should we change the logic when supporting partial ckpt? if let Some(sstable_object_id_manager) = &self.sstable_object_id_manager { sstable_object_id_manager.remove_watermark_object_id(TrackerId::Epoch( - self.pinned_version.load().visible_table_committed_epoch(), + self.recent_versions + .load() + .latest_version() + .visible_table_committed_epoch(), )); } @@ -789,13 +798,13 @@ impl HummockEventHandler { is_replicated, vnodes, } => { - let pinned_version = self.pinned_version.load(); + let pinned_version = self.recent_versions.load().latest_version().clone(); let instance_id = self.generate_instance_id(); let basic_read_version = Arc::new(RwLock::new( HummockReadVersion::new_with_replication_option( table_id, instance_id, - (**pinned_version).clone(), + pinned_version, is_replicated, vnodes, ), @@ -992,7 +1001,7 @@ mod tests { ); let event_tx = event_handler.event_sender(); - let latest_version = event_handler.pinned_version.clone(); + let latest_version = event_handler.recent_versions.clone(); let latest_version_update_tx = event_handler.version_update_notifier_tx.clone(); let send_clear = |version_id| { @@ -1018,12 +1027,15 @@ mod tests { let (old_version, new_version, refill_finish_tx) = refill_task_rx.recv().await.unwrap(); assert_eq!(old_version.version(), initial_version.version()); assert_eq!(new_version.version(), &version1); - assert_eq!(latest_version.load().version(), initial_version.version()); + assert_eq!( + latest_version.load().latest_version().version(), + initial_version.version() + ); let mut changed = latest_version_update_tx.subscribe(); refill_finish_tx.send(()).unwrap(); changed.changed().await.unwrap(); - assert_eq!(latest_version.load().version(), &version1); + assert_eq!(latest_version.load().latest_version().version(), &version1); } // test recovery with pending refill task @@ -1050,11 +1062,11 @@ mod tests { refill_task_rx.recv().await.unwrap(); assert_eq!(old_version3.version(), &version2); assert_eq!(new_version3.version(), &version3); - assert_eq!(latest_version.load().version(), &version1); + assert_eq!(latest_version.load().latest_version().version(), &version1); let rx = send_clear(version3.id); rx.await.unwrap(); - assert_eq!(latest_version.load().version(), &version3); + assert_eq!(latest_version.load().latest_version().version(), &version3); } async fn assert_pending(fut: &mut (impl Future + Unpin)) { @@ -1081,7 +1093,7 @@ mod tests { ))) .unwrap(); rx.await.unwrap(); - assert_eq!(latest_version.load().version(), &version5); + assert_eq!(latest_version.load().latest_version().version(), &version5); } } diff --git a/src/storage/src/hummock/event_handler/uploader/mod.rs b/src/storage/src/hummock/event_handler/uploader/mod.rs index 4494049d93b0b..90e6a9306930a 100644 --- a/src/storage/src/hummock/event_handler/uploader/mod.rs +++ b/src/storage/src/hummock/event_handler/uploader/mod.rs @@ -1643,7 +1643,8 @@ pub(crate) mod tests { let new_pinned_version = uploader .context .pinned_version - .new_pin_version(test_hummock_version(epoch1)); + .new_pin_version(test_hummock_version(epoch1)) + .unwrap(); uploader.update_pinned_version(new_pinned_version); assert_eq!(epoch1, uploader.max_committed_epoch()); } @@ -1672,7 +1673,8 @@ pub(crate) mod tests { let new_pinned_version = uploader .context .pinned_version - .new_pin_version(test_hummock_version(epoch1)); + .new_pin_version(test_hummock_version(epoch1)) + .unwrap(); uploader.update_pinned_version(new_pinned_version); assert!(uploader.data().syncing_data.is_empty()); assert_eq!(epoch1, uploader.max_committed_epoch()); @@ -1706,7 +1708,8 @@ pub(crate) mod tests { let new_pinned_version = uploader .context .pinned_version - .new_pin_version(test_hummock_version(epoch1)); + .new_pin_version(test_hummock_version(epoch1)) + .unwrap(); uploader.update_pinned_version(new_pinned_version); assert!(uploader.data().syncing_data.is_empty()); assert_eq!(epoch1, uploader.max_committed_epoch()); @@ -1730,11 +1733,21 @@ pub(crate) mod tests { let epoch4 = epoch3.next_epoch(); let epoch5 = epoch4.next_epoch(); let epoch6 = epoch5.next_epoch(); - let version1 = initial_pinned_version.new_pin_version(test_hummock_version(epoch1)); - let version2 = initial_pinned_version.new_pin_version(test_hummock_version(epoch2)); - let version3 = initial_pinned_version.new_pin_version(test_hummock_version(epoch3)); - let version4 = initial_pinned_version.new_pin_version(test_hummock_version(epoch4)); - let version5 = initial_pinned_version.new_pin_version(test_hummock_version(epoch5)); + let version1 = initial_pinned_version + .new_pin_version(test_hummock_version(epoch1)) + .unwrap(); + let version2 = initial_pinned_version + .new_pin_version(test_hummock_version(epoch2)) + .unwrap(); + let version3 = initial_pinned_version + .new_pin_version(test_hummock_version(epoch3)) + .unwrap(); + let version4 = initial_pinned_version + .new_pin_version(test_hummock_version(epoch4)) + .unwrap(); + let version5 = initial_pinned_version + .new_pin_version(test_hummock_version(epoch5)) + .unwrap(); uploader.start_epochs_for_test([epoch6]); uploader.init_instance(TEST_LOCAL_INSTANCE_ID, TEST_TABLE_ID, epoch6); diff --git a/src/storage/src/hummock/hummock_meta_client.rs b/src/storage/src/hummock/hummock_meta_client.rs index 4445a74884d5a..038856a3ba2f3 100644 --- a/src/storage/src/hummock/hummock_meta_client.rs +++ b/src/storage/src/hummock/hummock_meta_client.rs @@ -80,7 +80,12 @@ impl HummockMetaClient for MonitoredHummockMetaClient { res } - async fn commit_epoch(&self, _epoch: HummockEpoch, _sync_result: SyncResult) -> Result<()> { + async fn commit_epoch( + &self, + _epoch: HummockEpoch, + _sync_result: SyncResult, + _is_log_store: bool, + ) -> Result<()> { panic!("Only meta service can commit_epoch in production.") } @@ -130,7 +135,11 @@ impl HummockMetaClient for MonitoredHummockMetaClient { self.meta_client.subscribe_compaction_event().await } - async fn get_version_by_epoch(&self, epoch: HummockEpoch) -> Result { - self.meta_client.get_version_by_epoch(epoch).await + async fn get_version_by_epoch( + &self, + epoch: HummockEpoch, + table_id: u32, + ) -> Result { + self.meta_client.get_version_by_epoch(epoch, table_id).await } } diff --git a/src/storage/src/hummock/local_version/mod.rs b/src/storage/src/hummock/local_version/mod.rs index 578e123c6574e..4a45c8dc9075c 100644 --- a/src/storage/src/hummock/local_version/mod.rs +++ b/src/storage/src/hummock/local_version/mod.rs @@ -13,3 +13,4 @@ // limitations under the License. pub mod pinned_version; +pub mod recent_versions; diff --git a/src/storage/src/hummock/local_version/pinned_version.rs b/src/storage/src/hummock/local_version/pinned_version.rs index 5ef53edcd26ef..afaafdf7cbe8a 100644 --- a/src/storage/src/hummock/local_version/pinned_version.rs +++ b/src/storage/src/hummock/local_version/pinned_version.rs @@ -92,22 +92,25 @@ impl PinnedVersion { } } - pub fn new_pin_version(&self, version: HummockVersion) -> Self { + pub fn new_pin_version(&self, version: HummockVersion) -> Option { assert!( version.id >= self.version.id, "pinning a older version {}. Current is {}", version.id, self.version.id ); + if version.id == self.version.id { + return None; + } let version_id = version.id; - PinnedVersion { + Some(PinnedVersion { version: Arc::new(version), guard: Arc::new(PinnedVersionGuard::new( version_id, self.guard.pinned_version_manager_tx.clone(), )), - } + }) } pub fn id(&self) -> HummockVersionId { diff --git a/src/storage/src/hummock/local_version/recent_versions.rs b/src/storage/src/hummock/local_version/recent_versions.rs new file mode 100644 index 0000000000000..8d3f1a015ad6a --- /dev/null +++ b/src/storage/src/hummock/local_version/recent_versions.rs @@ -0,0 +1,321 @@ +// Copyright 2024 RisingWave Labs +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use std::cmp::Ordering; + +use risingwave_common::catalog::TableId; +use risingwave_hummock_sdk::HummockEpoch; + +use crate::hummock::local_version::pinned_version::PinnedVersion; + +pub struct RecentVersions { + latest_version: PinnedVersion, + is_latest_committed: bool, + recent_versions: Vec, // earlier version at the front + max_version_num: usize, +} + +impl RecentVersions { + pub fn new(version: PinnedVersion, max_version_num: usize) -> Self { + assert!(max_version_num > 0); + Self { + latest_version: version, + is_latest_committed: true, // The first version is always treated as committed epochs + recent_versions: Vec::new(), + max_version_num, + } + } + + fn has_table_committed(&self, new_version: &PinnedVersion) -> bool { + let mut has_table_committed = false; + for (table_id, info) in new_version.version().state_table_info.info() { + if let Some(prev_info) = self + .latest_version + .version() + .state_table_info + .info() + .get(table_id) + { + match info.committed_epoch.cmp(&prev_info.committed_epoch) { + Ordering::Less => { + unreachable!( + "table {} has regress committed epoch {}, prev committed epoch {}", + table_id, info.committed_epoch, prev_info.committed_epoch + ); + } + Ordering::Equal => {} + Ordering::Greater => { + has_table_committed = true; + } + } + } else { + has_table_committed = true; + } + } + has_table_committed + } + + #[must_use] + pub fn with_new_version(&self, version: PinnedVersion) -> Self { + assert!(version.version().id > self.latest_version.version().id); + let is_committed = self.has_table_committed(&version); + let recent_versions = if self.is_latest_committed { + let prev_recent_versions = if self.recent_versions.len() >= self.max_version_num { + assert_eq!(self.recent_versions.len(), self.max_version_num); + &self.recent_versions[1..] + } else { + &self.recent_versions[..] + }; + let mut recent_versions = Vec::with_capacity(prev_recent_versions.len() + 1); + recent_versions.extend(prev_recent_versions.iter().cloned()); + recent_versions.push(self.latest_version.clone()); + recent_versions + } else { + self.recent_versions.clone() + }; + Self { + latest_version: version, + is_latest_committed: is_committed, + recent_versions, + max_version_num: self.max_version_num, + } + } + + pub fn latest_version(&self) -> &PinnedVersion { + &self.latest_version + } + + /// Return the latest version that is safe to read `epoch` on `table_id`. + /// + /// `safe to read` means that the `committed_epoch` of the `table_id` in the version won't be greater than the given `epoch` + pub fn get_safe_version( + &self, + table_id: TableId, + epoch: HummockEpoch, + ) -> Option { + if let Some(info) = self + .latest_version + .version() + .state_table_info + .info() + .get(&table_id) + { + if info.committed_epoch <= epoch { + Some(self.latest_version.clone()) + } else { + self.get_safe_version_from_recent(table_id, epoch) + } + } else { + None + } + } + + fn get_safe_version_from_recent( + &self, + table_id: TableId, + epoch: HummockEpoch, + ) -> Option { + if cfg!(debug_assertions) { + assert!( + epoch + < self + .latest_version + .version() + .state_table_info + .info() + .get(&table_id) + .expect("should exist") + .committed_epoch + ); + } + let result = self.recent_versions.binary_search_by(|version| { + let committed_epoch = version + .version() + .state_table_info + .info() + .get(&table_id) + .map(|info| info.committed_epoch); + if let Some(committed_epoch) = committed_epoch { + committed_epoch.cmp(&epoch) + } else { + // We have ensured that the table_id exists in the latest version, so if the table_id does not exist in a + // previous version, the table must have not created yet, and therefore has less ordering. + Ordering::Less + } + }); + match result { + Ok(index) => Some(self.recent_versions[index].clone()), + Err(index) => { + // `index` is index of the first version that has `committed_epoch` greater than `epoch` + // or `index` equals `recent_version.len()` when `epoch` is greater than all `committed_epoch` + let version = if index >= self.recent_versions.len() { + assert_eq!(index, self.recent_versions.len()); + self.recent_versions.last().cloned() + } else if index == 0 { + // The earliest version has a higher committed epoch + None + } else { + self.recent_versions.get(index - 1).cloned() + }; + version.and_then(|version| { + if version + .version() + .state_table_info + .info() + .contains_key(&table_id) + { + Some(version) + } else { + // if the table does not exist in the version, return `None` to try get a time travel version + None + } + }) + } + } + } +} + +#[cfg(test)] +mod tests { + use std::collections::HashMap; + + use risingwave_common::catalog::TableId; + use risingwave_hummock_sdk::version::HummockVersion; + use risingwave_pb::hummock::{PbHummockVersion, StateTableInfo}; + use tokio::sync::mpsc::unbounded_channel; + + use crate::hummock::local_version::pinned_version::PinnedVersion; + use crate::hummock::local_version::recent_versions::RecentVersions; + + const TEST_TABLE_ID1: TableId = TableId::new(233); + const TEST_TABLE_ID2: TableId = TableId::new(234); + + fn gen_pin_version( + version_id: u64, + table_committed_epoch: impl IntoIterator, + ) -> PinnedVersion { + PinnedVersion::new( + HummockVersion::from_rpc_protobuf(&PbHummockVersion { + id: version_id, + state_table_info: HashMap::from_iter(table_committed_epoch.into_iter().map( + |(table_id, committed_epoch)| { + ( + table_id.table_id, + StateTableInfo { + committed_epoch, + safe_epoch: 0, + compaction_group_id: 0, + }, + ) + }, + )), + ..Default::default() + }), + unbounded_channel().0, + ) + } + + fn assert_query_equal( + recent_version: &RecentVersions, + expected: &[(TableId, u64, Option<&PinnedVersion>)], + ) { + for (table_id, epoch, expected_version) in expected + .iter() + .cloned() + .chain([(TEST_TABLE_ID1, 0, None), (TEST_TABLE_ID2, 0, None)]) + { + let version = recent_version.get_safe_version(table_id, epoch); + assert_eq!( + version.as_ref().map(|version| version.id()), + expected_version.map(|version| version.id()) + ); + } + } + + #[test] + fn test_basic() { + let epoch1 = 233; + let epoch0 = epoch1 - 1; + let epoch2 = epoch1 + 1; + let epoch3 = epoch2 + 1; + let epoch4 = epoch3 + 1; + let version1 = gen_pin_version(1, [(TEST_TABLE_ID1, epoch1)]); + // with at most 2 historical versions + let recent_versions = RecentVersions::new(version1.clone(), 2); + assert!(recent_versions.recent_versions.is_empty()); + assert!(recent_versions.is_latest_committed); + assert_query_equal( + &recent_versions, + &[ + (TEST_TABLE_ID1, epoch0, None), + (TEST_TABLE_ID1, epoch1, Some(&version1)), + (TEST_TABLE_ID1, epoch2, Some(&version1)), + ], + ); + + let recent_versions = + recent_versions.with_new_version(gen_pin_version(2, [(TEST_TABLE_ID1, epoch1)])); + assert_eq!(recent_versions.recent_versions.len(), 1); + assert!(!recent_versions.is_latest_committed); + + let version3 = gen_pin_version(3, [(TEST_TABLE_ID1, epoch2)]); + let recent_versions = recent_versions.with_new_version(version3.clone()); + assert_eq!(recent_versions.recent_versions.len(), 1); + assert!(recent_versions.is_latest_committed); + assert_query_equal( + &recent_versions, + &[ + (TEST_TABLE_ID1, epoch0, None), + (TEST_TABLE_ID1, epoch1, Some(&version1)), + (TEST_TABLE_ID1, epoch2, Some(&version3)), + (TEST_TABLE_ID1, epoch3, Some(&version3)), + ], + ); + + let version4 = gen_pin_version(4, [(TEST_TABLE_ID2, epoch1), (TEST_TABLE_ID1, epoch2)]); + let recent_versions = recent_versions.with_new_version(version4.clone()); + assert_eq!(recent_versions.recent_versions.len(), 2); + assert!(recent_versions.is_latest_committed); + assert_query_equal( + &recent_versions, + &[ + (TEST_TABLE_ID1, epoch0, None), + (TEST_TABLE_ID1, epoch1, Some(&version1)), + (TEST_TABLE_ID1, epoch2, Some(&version4)), + (TEST_TABLE_ID1, epoch3, Some(&version4)), + (TEST_TABLE_ID2, epoch0, None), + (TEST_TABLE_ID2, epoch1, Some(&version4)), + (TEST_TABLE_ID2, epoch2, Some(&version4)), + ], + ); + + let version5 = gen_pin_version(5, [(TEST_TABLE_ID2, epoch1), (TEST_TABLE_ID1, epoch3)]); + let recent_versions = recent_versions.with_new_version(version5.clone()); + assert_eq!(recent_versions.recent_versions.len(), 2); + assert!(recent_versions.is_latest_committed); + assert_query_equal( + &recent_versions, + &[ + (TEST_TABLE_ID1, epoch0, None), + (TEST_TABLE_ID1, epoch1, None), + (TEST_TABLE_ID1, epoch2, Some(&version4)), + (TEST_TABLE_ID1, epoch3, Some(&version5)), + (TEST_TABLE_ID1, epoch4, Some(&version5)), + (TEST_TABLE_ID2, epoch0, None), + (TEST_TABLE_ID2, epoch1, Some(&version5)), + (TEST_TABLE_ID2, epoch2, Some(&version5)), + ], + ); + } +} diff --git a/src/storage/src/hummock/mod.rs b/src/storage/src/hummock/mod.rs index 14ac9532c8cb3..f10b6deee503e 100644 --- a/src/storage/src/hummock/mod.rs +++ b/src/storage/src/hummock/mod.rs @@ -172,8 +172,7 @@ pub fn get_from_batch( read_options: &ReadOptions, local_stats: &mut StoreLocalStatistic, ) -> Option<(HummockValue, EpochWithGap)> { - imm.get(table_key, read_epoch, read_options).map(|v| { + imm.get(table_key, read_epoch, read_options).inspect(|_| { local_stats.get_shared_buffer_hit_counts += 1; - v }) } diff --git a/src/storage/src/hummock/sstable/bloom.rs b/src/storage/src/hummock/sstable/bloom.rs index f2ca47ba00e12..b38a4c10ada30 100644 --- a/src/storage/src/hummock/sstable/bloom.rs +++ b/src/storage/src/hummock/sstable/bloom.rs @@ -102,7 +102,7 @@ impl BloomFilterReader { true } else { let nbits = self.data.bit_len(); - let delta = (h >> 17) | (h << 15); + let delta = h.rotate_left(15); for _ in 0..self.k { let bit_pos = h % (nbits as u32); if !self.data.get_bit(bit_pos as usize) { @@ -171,7 +171,7 @@ impl FilterBuilder for BloomFilterBuilder { filter.resize(nbytes, 0); for h in &self.key_hash_entries { let mut h = *h; - let delta = (h >> 17) | (h << 15); + let delta = h.rotate_left(15); for _ in 0..k { let bit_pos = (h as usize) % nbits; filter.set_bit(bit_pos, true); diff --git a/src/storage/src/hummock/sstable_store.rs b/src/storage/src/hummock/sstable_store.rs index b9f29c5740e4b..d1367b92a9ce8 100644 --- a/src/storage/src/hummock/sstable_store.rs +++ b/src/storage/src/hummock/sstable_store.rs @@ -26,7 +26,9 @@ use foyer::{ use futures::{future, StreamExt}; use itertools::Itertools; use risingwave_hummock_sdk::sstable_info::SstableInfo; -use risingwave_hummock_sdk::{HummockSstableObjectId, OBJECT_SUFFIX}; +use risingwave_hummock_sdk::{ + HummockSstableObjectId, HUMMOCK_SSTABLE_OBJECT_ID_MAX_DECIMAL_LENGTH, OBJECT_SUFFIX, +}; use risingwave_hummock_trace::TracedCachePolicy; use risingwave_object_store::object::{ ObjectError, ObjectMetadataIter, ObjectResult, ObjectStoreRef, ObjectStreamingUploader, @@ -519,10 +521,21 @@ impl SstableStore { let obj_prefix = self .store .get_object_prefix(object_id, self.use_new_object_prefix_strategy); - format!( - "{}/{}{}.{}", - self.path, obj_prefix, object_id, OBJECT_SUFFIX - ) + let mut path = String::with_capacity( + self.path.len() + + "/".len() + + obj_prefix.len() + + HUMMOCK_SSTABLE_OBJECT_ID_MAX_DECIMAL_LENGTH + + ".".len() + + OBJECT_SUFFIX.len(), + ); + path.push_str(&self.path); + path.push('/'); + path.push_str(&obj_prefix); + path.push_str(&object_id.to_string()); + path.push('.'); + path.push_str(OBJECT_SUFFIX); + path } pub fn get_object_id_from_path(path: &str) -> HummockSstableObjectId { diff --git a/src/storage/src/hummock/store/hummock_storage.rs b/src/storage/src/hummock/store/hummock_storage.rs index b64752fca7fd6..888de0db1af1c 100644 --- a/src/storage/src/hummock/store/hummock_storage.rs +++ b/src/storage/src/hummock/store/hummock_storage.rs @@ -14,7 +14,7 @@ use std::collections::HashSet; use std::future::Future; -use std::ops::{Bound, Deref}; +use std::ops::Bound; use std::sync::Arc; use arc_swap::ArcSwap; @@ -50,9 +50,10 @@ use crate::hummock::event_handler::{ }; use crate::hummock::iterator::change_log::ChangeLogIterator; use crate::hummock::local_version::pinned_version::{start_pinned_version_worker, PinnedVersion}; +use crate::hummock::local_version::recent_versions::RecentVersions; use crate::hummock::observer_manager::HummockObserverNode; use crate::hummock::time_travel_version_cache::SimpleTimeTravelVersionCache; -use crate::hummock::utils::{validate_safe_epoch, wait_for_epoch}; +use crate::hummock::utils::wait_for_epoch; use crate::hummock::write_limiter::{WriteLimiter, WriteLimiterRef}; use crate::hummock::{ HummockEpoch, HummockError, HummockResult, HummockStorageIterator, HummockStorageRevIterator, @@ -97,7 +98,7 @@ pub struct HummockStorage { version_update_notifier_tx: Arc>, - pinned_version: Arc>, + recent_versions: Arc>, hummock_version_reader: HummockVersionReader, @@ -223,7 +224,7 @@ impl HummockStorage { version_update_notifier_tx: hummock_event_handler.version_update_notifier_tx(), hummock_event_sender: event_tx.clone(), _version_update_sender: version_update_tx, - pinned_version: hummock_event_handler.pinned_version(), + recent_versions: hummock_event_handler.recent_versions(), hummock_version_reader: HummockVersionReader::new( sstable_store, state_store_metrics.clone(), @@ -260,15 +261,9 @@ impl HummockStorage { ) -> StorageResult> { let key_range = (Bound::Included(key.clone()), Bound::Included(key.clone())); - let (key_range, read_version_tuple) = if read_options.read_version_from_time_travel { - self.build_read_version_by_time_travel(epoch, read_options.table_id, key_range) - .await? - } else if read_options.read_version_from_backup { - self.build_read_version_tuple_from_backup(epoch, read_options.table_id, key_range) - .await? - } else { - self.build_read_version_tuple(epoch, read_options.table_id, key_range)? - }; + let (key_range, read_version_tuple) = self + .build_read_version_tuple(epoch, key_range, &read_options) + .await?; if is_empty_key_range(&key_range) { return Ok(None); @@ -285,15 +280,9 @@ impl HummockStorage { epoch: u64, read_options: ReadOptions, ) -> StorageResult { - let (key_range, read_version_tuple) = if read_options.read_version_from_time_travel { - self.build_read_version_by_time_travel(epoch, read_options.table_id, key_range) - .await? - } else if read_options.read_version_from_backup { - self.build_read_version_tuple_from_backup(epoch, read_options.table_id, key_range) - .await? - } else { - self.build_read_version_tuple(epoch, read_options.table_id, key_range)? - }; + let (key_range, read_version_tuple) = self + .build_read_version_tuple(epoch, key_range, &read_options) + .await?; self.hummock_version_reader .iter(key_range, epoch, read_options, read_version_tuple) @@ -306,36 +295,28 @@ impl HummockStorage { epoch: u64, read_options: ReadOptions, ) -> StorageResult { - let (key_range, read_version_tuple) = if read_options.read_version_from_time_travel { - self.build_read_version_by_time_travel(epoch, read_options.table_id, key_range) - .await? - } else if read_options.read_version_from_backup { - self.build_read_version_tuple_from_backup(epoch, read_options.table_id, key_range) - .await? - } else { - self.build_read_version_tuple(epoch, read_options.table_id, key_range)? - }; + let (key_range, read_version_tuple) = self + .build_read_version_tuple(epoch, key_range, &read_options) + .await?; self.hummock_version_reader .rev_iter(key_range, epoch, read_options, read_version_tuple, None) .await } - async fn build_read_version_by_time_travel( + async fn get_time_travel_version( &self, epoch: u64, table_id: TableId, - key_range: TableKeyRange, - ) -> StorageResult<(TableKeyRange, ReadVersionTuple)> { + ) -> StorageResult { let fetch = async { let pb_version = self .hummock_meta_client - .get_version_by_epoch(epoch) + .get_version_by_epoch(epoch, table_id.table_id()) .await .inspect_err(|e| tracing::error!("{}", e.to_report_string())) .map_err(|e| HummockError::meta_error(e.to_report_string()))?; let version = HummockVersion::from_rpc_protobuf(&pb_version); - validate_safe_epoch(&version, table_id, epoch)?; let (tx, _rx) = unbounded_channel(); Ok(PinnedVersion::new(version, tx)) }; @@ -343,9 +324,24 @@ impl HummockStorage { .simple_time_travel_version_cache .get_or_insert(epoch, fetch) .await?; - Ok(get_committed_read_version_tuple( - version, table_id, key_range, epoch, - )) + Ok(version) + } + + async fn build_read_version_tuple( + &self, + epoch: u64, + key_range: TableKeyRange, + read_options: &ReadOptions, + ) -> StorageResult<(TableKeyRange, ReadVersionTuple)> { + if read_options.read_version_from_backup { + self.build_read_version_tuple_from_backup(epoch, read_options.table_id, key_range) + .await + } else if read_options.read_committed { + self.build_read_version_tuple_from_committed(epoch, read_options.table_id, key_range) + .await + } else { + self.build_read_version_tuple_from_all(epoch, read_options.table_id, key_range) + } } async fn build_read_version_tuple_from_backup( @@ -359,16 +355,12 @@ impl HummockStorage { .try_get_hummock_version(table_id, epoch) .await { - Ok(Some(backup_version)) => { - validate_safe_epoch(backup_version.version(), table_id, epoch)?; - - Ok(get_committed_read_version_tuple( - backup_version, - table_id, - key_range, - epoch, - )) - } + Ok(Some(backup_version)) => Ok(get_committed_read_version_tuple( + backup_version, + table_id, + key_range, + epoch, + )), Ok(None) => Err(HummockError::read_backup_error(format!( "backup include epoch {} not found", epoch @@ -378,27 +370,47 @@ impl HummockStorage { } } - fn build_read_version_tuple( + async fn build_read_version_tuple_from_committed( &self, epoch: u64, table_id: TableId, key_range: TableKeyRange, ) -> StorageResult<(TableKeyRange, ReadVersionTuple)> { - let pinned_version = self.pinned_version.load(); - validate_safe_epoch(pinned_version.version(), table_id, epoch)?; - let table_committed_epoch = pinned_version + let version = match self + .recent_versions + .load() + .get_safe_version(table_id, epoch) + { + Some(version) => version, + None => self.get_time_travel_version(epoch, table_id).await?, + }; + Ok(get_committed_read_version_tuple( + version, table_id, key_range, epoch, + )) + } + + fn build_read_version_tuple_from_all( + &self, + epoch: u64, + table_id: TableId, + key_range: TableKeyRange, + ) -> StorageResult<(TableKeyRange, ReadVersionTuple)> { + let pinned_version = self.recent_versions.load().latest_version().clone(); + let info = pinned_version .version() .state_table_info .info() - .get(&table_id) - .map(|info| info.committed_epoch); + .get(&table_id); // check epoch if lower mce - let ret = if let Some(table_committed_epoch) = table_committed_epoch - && epoch <= table_committed_epoch + let ret = if let Some(info) = info + && epoch <= info.committed_epoch { + if epoch < info.safe_epoch { + return Err(HummockError::expired_epoch(table_id, info.safe_epoch, epoch).into()); + } // read committed_version directly without build snapshot - get_committed_read_version_tuple((**pinned_version).clone(), table_id, key_range, epoch) + get_committed_read_version_tuple(pinned_version, table_id, key_range, epoch) } else { let vnode = vnode(&key_range); let mut matched_replicated_read_version_cnt = 0; @@ -431,6 +443,7 @@ impl HummockStorage { // When the system has just started and no state has been created, the memory state // may be empty if read_version_vec.is_empty() { + let table_committed_epoch = info.map(|info| info.committed_epoch); if matched_replicated_read_version_cnt > 0 { tracing::warn!( "Read(table_id={} vnode={} epoch={}) is not allowed on replicated read version ({} found). Fall back to committed version (epoch={:?})", @@ -449,12 +462,7 @@ impl HummockStorage { table_committed_epoch ); } - get_committed_read_version_tuple( - (**pinned_version).clone(), - table_id, - key_range, - epoch, - ) + get_committed_read_version_tuple(pinned_version, table_id, key_range, epoch) } else { if read_version_vec.len() != 1 { let read_version_vnodes = read_version_vec @@ -538,7 +546,7 @@ impl HummockStorage { } pub fn get_pinned_version(&self) -> PinnedVersion { - self.pinned_version.load().deref().deref().clone() + self.recent_versions.load().latest_version().clone() } pub fn backup_reader(&self) -> BackupReaderRef { @@ -604,7 +612,7 @@ impl StateStoreRead for HummockStorage { key_range: TableKeyRange, options: ReadLogOptions, ) -> StorageResult { - let version = (**self.pinned_version.load()).clone(); + let version = self.recent_versions.load().latest_version().clone(); let iter = self .hummock_version_reader .iter_log(version, epoch_range, key_range, options) @@ -653,16 +661,8 @@ impl HummockStorage { pub async fn seal_and_sync_epoch( &self, epoch: u64, + table_ids: HashSet, ) -> StorageResult { - let table_ids = self - .pinned_version - .load() - .version() - .state_table_info - .info() - .keys() - .cloned() - .collect(); self.sync(epoch, table_ids).await } @@ -675,7 +675,7 @@ impl HummockStorage { .send(HummockVersionUpdate::PinnedVersion(Box::new(version))) .unwrap(); loop { - if self.pinned_version.load().id() >= version_id { + if self.recent_versions.load().latest_version().id() >= version_id { break; } @@ -686,7 +686,7 @@ impl HummockStorage { pub async fn wait_version(&self, version: HummockVersion) { use tokio::task::yield_now; loop { - if self.pinned_version.load().id() >= version.id { + if self.recent_versions.load().latest_version().id() >= version.id { break; } @@ -736,7 +736,7 @@ impl HummockStorage { pub async fn wait_version_update(&self, old_id: HummockVersionId) -> HummockVersionId { use tokio::task::yield_now; loop { - let cur_id = self.pinned_version.load().id(); + let cur_id = self.recent_versions.load().latest_version().id(); if cur_id > old_id { return cur_id; } diff --git a/src/storage/src/hummock/utils.rs b/src/storage/src/hummock/utils.rs index 3f2d1f989f529..c2f6cbafed79b 100644 --- a/src/storage/src/hummock/utils.rs +++ b/src/storage/src/hummock/utils.rs @@ -30,11 +30,10 @@ use risingwave_hummock_sdk::key::{ bound_table_key_range, EmptySliceRef, FullKey, TableKey, UserKey, }; use risingwave_hummock_sdk::sstable_info::SstableInfo; -use risingwave_hummock_sdk::version::HummockVersion; use risingwave_hummock_sdk::{can_concat, HummockEpoch}; use tokio::sync::oneshot::{channel, Receiver, Sender}; -use super::{HummockError, HummockResult, SstableStoreRef}; +use super::{HummockError, SstableStoreRef}; use crate::error::StorageResult; use crate::hummock::CachePolicy; use crate::mem_table::{KeyOp, MemTableError}; @@ -72,24 +71,6 @@ where !too_left && !too_right } -pub fn validate_safe_epoch( - version: &HummockVersion, - table_id: TableId, - epoch: u64, -) -> HummockResult<()> { - if let Some(info) = version.state_table_info.info().get(&table_id) - && epoch < info.safe_epoch - { - return Err(HummockError::expired_epoch( - table_id, - info.safe_epoch, - epoch, - )); - } - - Ok(()) -} - pub fn filter_single_sst(info: &SstableInfo, table_id: TableId, table_key_range: &R) -> bool where R: RangeBounds>, diff --git a/src/storage/src/lib.rs b/src/storage/src/lib.rs index e11d3e1cee1ca..779062767c7ae 100644 --- a/src/storage/src/lib.rs +++ b/src/storage/src/lib.rs @@ -18,7 +18,6 @@ #![feature(extract_if)] #![feature(coroutines)] #![feature(hash_extract_if)] -#![feature(lint_reasons)] #![feature(proc_macro_hygiene)] #![feature(stmt_expr_attributes)] #![feature(strict_provenance)] diff --git a/src/storage/src/opts.rs b/src/storage/src/opts.rs index f6d6f31fb3a4f..a3a787f55c97d 100644 --- a/src/storage/src/opts.rs +++ b/src/storage/src/opts.rs @@ -63,6 +63,8 @@ pub struct StorageOpts { /// max memory usage for large query. pub prefetch_buffer_capacity_mb: usize, + pub max_cached_recent_versions_number: usize, + pub max_prefetch_block_number: usize, pub disable_remote_compactor: bool, @@ -170,6 +172,10 @@ impl From<(&RwConfig, &SystemParamsReader, &StorageMemoryConfig)> for StorageOpt meta_cache_shard_num: s.meta_cache_shard_num, meta_cache_eviction_config: s.meta_cache_eviction_config.clone(), prefetch_buffer_capacity_mb: s.prefetch_buffer_capacity_mb, + max_cached_recent_versions_number: c + .storage + .max_cached_recent_versions_number + .unwrap_or(60), max_prefetch_block_number: c.storage.max_prefetch_block_number, disable_remote_compactor: c.storage.disable_remote_compactor, share_buffer_upload_concurrency: c.storage.share_buffer_upload_concurrency, diff --git a/src/storage/src/store.rs b/src/storage/src/store.rs index 91f79231f6939..ab80f712570ca 100644 --- a/src/storage/src/store.rs +++ b/src/storage/src/store.rs @@ -502,7 +502,7 @@ pub struct ReadOptions { /// Read from historical hummock version of meta snapshot backup. /// It should only be used by `StorageTable` for batch query. pub read_version_from_backup: bool, - pub read_version_from_time_travel: bool, + pub read_committed: bool, } impl From for ReadOptions { @@ -515,7 +515,7 @@ impl From for ReadOptions { retention_seconds: value.retention_seconds, table_id: value.table_id.into(), read_version_from_backup: value.read_version_from_backup, - read_version_from_time_travel: value.read_version_from_time_travel, + read_committed: value.read_committed, } } } @@ -530,7 +530,7 @@ impl From for TracedReadOptions { retention_seconds: value.retention_seconds, table_id: value.table_id.into(), read_version_from_backup: value.read_version_from_backup, - read_version_from_time_travel: value.read_version_from_time_travel, + read_committed: value.read_committed, } } } diff --git a/src/storage/src/table/batch_table/storage_table.rs b/src/storage/src/table/batch_table/storage_table.rs index 7a0ad76cce4a5..8c5f432f46c57 100644 --- a/src/storage/src/table/batch_table/storage_table.rs +++ b/src/storage/src/table/batch_table/storage_table.rs @@ -361,7 +361,10 @@ impl StorageTableInner { ) -> StorageResult> { let epoch = wait_epoch.get_epoch(); let read_backup = matches!(wait_epoch, HummockReadEpoch::Backup(_)); - let read_time_travel = matches!(wait_epoch, HummockReadEpoch::TimeTravel(_)); + let read_committed = matches!( + wait_epoch, + HummockReadEpoch::TimeTravel(_) | HummockReadEpoch::Committed(_) + ); self.store.try_wait_epoch(wait_epoch).await?; let serialized_pk = serialize_pk_with_vnode( &pk, @@ -382,7 +385,7 @@ impl StorageTableInner { retention_seconds: self.table_option.retention_seconds, table_id: self.table_id, read_version_from_backup: read_backup, - read_version_from_time_travel: read_time_travel, + read_committed, cache_policy: CachePolicy::Fill(CacheContext::Default), ..Default::default() }; @@ -487,14 +490,17 @@ impl StorageTableInner { let iterators: Vec<_> = try_join_all(table_key_ranges.map(|table_key_range| { let prefix_hint = prefix_hint.clone(); let read_backup = matches!(wait_epoch, HummockReadEpoch::Backup(_)); - let read_time_travel = matches!(wait_epoch, HummockReadEpoch::TimeTravel(_)); + let read_committed = matches!( + wait_epoch, + HummockReadEpoch::TimeTravel(_) | HummockReadEpoch::Committed(_) + ); async move { let read_options = ReadOptions { prefix_hint, retention_seconds: self.table_option.retention_seconds, table_id: self.table_id, read_version_from_backup: read_backup, - read_version_from_time_travel: read_time_travel, + read_committed, prefetch_options, cache_policy, ..Default::default() diff --git a/src/stream/src/common/log_store_impl/kv_log_store/mod.rs b/src/stream/src/common/log_store_impl/kv_log_store/mod.rs index f4e62e429effa..440c7188d2fa1 100644 --- a/src/stream/src/common/log_store_impl/kv_log_store/mod.rs +++ b/src/stream/src/common/log_store_impl/kv_log_store/mod.rs @@ -526,7 +526,11 @@ mod tests { let epoch3 = epoch2.next_epoch(); writer.flush_current_epoch(epoch3, true).await.unwrap(); - let sync_result = test_env.storage.seal_and_sync_epoch(epoch2).await.unwrap(); + let sync_result = test_env + .storage + .seal_and_sync_epoch(epoch2, HashSet::from_iter([table.id.into()])) + .await + .unwrap(); assert!(!sync_result.uncommitted_ssts.is_empty()); reader.init().await.unwrap(); diff --git a/src/stream/src/common/log_store_impl/kv_log_store/reader.rs b/src/stream/src/common/log_store_impl/kv_log_store/reader.rs index 5497b989a0873..c84db97002b02 100644 --- a/src/stream/src/common/log_store_impl/kv_log_store/reader.rs +++ b/src/stream/src/common/log_store_impl/kv_log_store/reader.rs @@ -16,7 +16,7 @@ use std::future::Future; use std::ops::Bound; use std::ops::Bound::{Excluded, Included, Unbounded}; use std::pin::Pin; -use std::time::{Duration, Instant}; +use std::time::Duration; use anyhow::anyhow; use await_tree::InstrumentAwait; @@ -53,18 +53,28 @@ use crate::common::log_store_impl::kv_log_store::serde::{ }; use crate::common::log_store_impl::kv_log_store::KvLogStoreMetrics; -type RewindBackoffPolicy = impl Iterator; pub(crate) const REWIND_BASE_DELAY: Duration = Duration::from_secs(1); pub(crate) const REWIND_BACKOFF_FACTOR: u64 = 2; pub(crate) const REWIND_MAX_DELAY: Duration = Duration::from_secs(180); -fn initial_rewind_backoff_policy() -> RewindBackoffPolicy { - tokio_retry::strategy::ExponentialBackoff::from_millis(REWIND_BASE_DELAY.as_millis() as _) - .factor(REWIND_BACKOFF_FACTOR) - .max_delay(REWIND_MAX_DELAY) - .map(tokio_retry::strategy::jitter) +mod rewind_backoff_policy { + use std::time::Duration; + + use crate::common::log_store_impl::kv_log_store::{ + REWIND_BACKOFF_FACTOR, REWIND_BASE_DELAY, REWIND_MAX_DELAY, + }; + + pub(super) type RewindBackoffPolicy = impl Iterator; + pub(super) fn initial_rewind_backoff_policy() -> RewindBackoffPolicy { + tokio_retry::strategy::ExponentialBackoff::from_millis(REWIND_BASE_DELAY.as_millis() as _) + .factor(REWIND_BACKOFF_FACTOR) + .max_delay(REWIND_MAX_DELAY) + .map(tokio_retry::strategy::jitter) + } } +use rewind_backoff_policy::*; + struct RewindDelay { last_rewind_truncate_offset: Option, backoff_policy: RewindBackoffPolicy, @@ -218,58 +228,71 @@ impl bool> AutoRebuildStateStoreReadIter } } -type TimeoutAutoRebuildIter = - AutoRebuildStateStoreReadIter bool + Send>; +mod timeout_auto_rebuild { + use std::time::{Duration, Instant}; -async fn iter_with_timeout_rebuild( - state_store: S, - range: TableKeyRange, - epoch: HummockEpoch, - options: ReadOptions, - timeout: Duration, -) -> StorageResult> { - const CHECK_TIMEOUT_PERIOD: usize = 100; - // use a struct here to avoid accidental copy instead of move on primitive usize - struct Count(usize); - let mut check_count = Count(0); - let mut total_count = Count(0); - let mut curr_iter_item_count = Count(0); - let mut start_time = Instant::now(); - let initial_start_time = start_time; - AutoRebuildStateStoreReadIter::new( - state_store, - move || { - check_count.0 += 1; - curr_iter_item_count.0 += 1; - total_count.0 += 1; - if check_count.0 == CHECK_TIMEOUT_PERIOD { - check_count.0 = 0; - if start_time.elapsed() > timeout { - let prev_iter_item_count = curr_iter_item_count.0; - curr_iter_item_count.0 = 0; - start_time = Instant::now(); - info!( - table_id = options.table_id.table_id, - iter_exist_time_secs = initial_start_time.elapsed().as_secs(), - prev_iter_item_count, - total_iter_item_count = total_count.0, - "kv log store iter is rebuilt" - ); - true + use risingwave_hummock_sdk::key::TableKeyRange; + use risingwave_hummock_sdk::HummockEpoch; + use risingwave_storage::error::StorageResult; + use risingwave_storage::store::{ReadOptions, StateStoreRead}; + + use crate::common::log_store_impl::kv_log_store::reader::AutoRebuildStateStoreReadIter; + + pub(super) type TimeoutAutoRebuildIter = + AutoRebuildStateStoreReadIter bool + Send>; + + pub(super) async fn iter_with_timeout_rebuild( + state_store: S, + range: TableKeyRange, + epoch: HummockEpoch, + options: ReadOptions, + timeout: Duration, + ) -> StorageResult> { + const CHECK_TIMEOUT_PERIOD: usize = 100; + // use a struct here to avoid accidental copy instead of move on primitive usize + struct Count(usize); + let mut check_count = Count(0); + let mut total_count = Count(0); + let mut curr_iter_item_count = Count(0); + let mut start_time = Instant::now(); + let initial_start_time = start_time; + AutoRebuildStateStoreReadIter::new( + state_store, + move || { + check_count.0 += 1; + curr_iter_item_count.0 += 1; + total_count.0 += 1; + if check_count.0 == CHECK_TIMEOUT_PERIOD { + check_count.0 = 0; + if start_time.elapsed() > timeout { + let prev_iter_item_count = curr_iter_item_count.0; + curr_iter_item_count.0 = 0; + start_time = Instant::now(); + info!( + table_id = options.table_id.table_id, + iter_exist_time_secs = initial_start_time.elapsed().as_secs(), + prev_iter_item_count, + total_iter_item_count = total_count.0, + "kv log store iter is rebuilt" + ); + true + } else { + false + } } else { false } - } else { - false - } - }, - range, - epoch, - options, - ) - .await + }, + range, + epoch, + options, + ) + .await + } } +use timeout_auto_rebuild::*; + impl bool + Send> StateStoreIter for AutoRebuildStateStoreReadIter { diff --git a/src/stream/src/executor/asof_join.rs b/src/stream/src/executor/asof_join.rs new file mode 100644 index 0000000000000..cb8a141481f28 --- /dev/null +++ b/src/stream/src/executor/asof_join.rs @@ -0,0 +1,1377 @@ +// Copyright 2024 RisingWave Labs +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +use std::collections::{BTreeMap, HashSet}; +use std::ops::Bound; +use std::time::Duration; + +use either::Either; +use itertools::Itertools; +use multimap::MultiMap; +use risingwave_common::array::Op; +use risingwave_common::hash::{HashKey, NullBitmap}; +use risingwave_common::util::epoch::EpochPair; +use risingwave_common::util::iter_util::ZipEqDebug; +use tokio::time::Instant; + +use self::builder::JoinChunkBuilder; +use super::barrier_align::*; +use super::join::hash_join::*; +use super::join::*; +use super::watermark::*; +use crate::executor::join::builder::JoinStreamChunkBuilder; +use crate::executor::prelude::*; + +/// Evict the cache every n rows. +const EVICT_EVERY_N_ROWS: u32 = 16; + +fn is_subset(vec1: Vec, vec2: Vec) -> bool { + HashSet::::from_iter(vec1).is_subset(&vec2.into_iter().collect()) +} + +pub struct JoinParams { + /// Indices of the join keys + pub join_key_indices: Vec, + /// Indices of the input pk after dedup + pub deduped_pk_indices: Vec, +} + +impl JoinParams { + pub fn new(join_key_indices: Vec, deduped_pk_indices: Vec) -> Self { + Self { + join_key_indices, + deduped_pk_indices, + } + } +} + +struct JoinSide { + /// Store all data from a one side stream + ht: JoinHashMap, + /// Indices of the join key columns + join_key_indices: Vec, + /// The data type of all columns without degree. + all_data_types: Vec, + /// The start position for the side in output new columns + start_pos: usize, + /// The mapping from input indices of a side to output columes. + i2o_mapping: Vec<(usize, usize)>, + i2o_mapping_indexed: MultiMap, + /// Whether degree table is needed for this side. + need_degree_table: bool, +} + +impl std::fmt::Debug for JoinSide { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + f.debug_struct("JoinSide") + .field("join_key_indices", &self.join_key_indices) + .field("col_types", &self.all_data_types) + .field("start_pos", &self.start_pos) + .field("i2o_mapping", &self.i2o_mapping) + .field("need_degree_table", &self.need_degree_table) + .finish() + } +} + +impl JoinSide { + // WARNING: Please do not call this until we implement it. + fn is_dirty(&self) -> bool { + unimplemented!() + } + + #[expect(dead_code)] + fn clear_cache(&mut self) { + assert!( + !self.is_dirty(), + "cannot clear cache while states of hash join are dirty" + ); + + // TODO: not working with rearranged chain + // self.ht.clear(); + } + + pub fn init(&mut self, epoch: EpochPair) { + self.ht.init(epoch); + } +} + +/// `AsOfJoinExecutor` takes two input streams and runs equal hash join on them. +/// The output columns are the concatenation of left and right columns. +pub struct AsOfJoinExecutor { + ctx: ActorContextRef, + info: ExecutorInfo, + + /// Left input executor + input_l: Option, + /// Right input executor + input_r: Option, + /// The data types of the formed new columns + actual_output_data_types: Vec, + /// The parameters of the left join executor + side_l: JoinSide, + /// The parameters of the right join executor + side_r: JoinSide, + + metrics: Arc, + /// The maximum size of the chunk produced by executor at a time + chunk_size: usize, + /// Count the messages received, clear to 0 when counted to `EVICT_EVERY_N_MESSAGES` + cnt_rows_received: u32, + + /// watermark column index -> `BufferedWatermarks` + watermark_buffers: BTreeMap>, + + high_join_amplification_threshold: usize, + /// `AsOf` join description + asof_desc: AsOfDesc, +} + +impl std::fmt::Debug + for AsOfJoinExecutor +{ + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + f.debug_struct("AsOfJoinExecutor") + .field("join_type", &T) + .field("input_left", &self.input_l.as_ref().unwrap().identity()) + .field("input_right", &self.input_r.as_ref().unwrap().identity()) + .field("side_l", &self.side_l) + .field("side_r", &self.side_r) + .field("pk_indices", &self.info.pk_indices) + .field("schema", &self.info.schema) + .field("actual_output_data_types", &self.actual_output_data_types) + .finish() + } +} + +impl Execute + for AsOfJoinExecutor +{ + fn execute(self: Box) -> BoxedMessageStream { + self.into_stream().boxed() + } +} + +struct EqJoinArgs<'a, K: HashKey, S: StateStore> { + ctx: &'a ActorContextRef, + side_l: &'a mut JoinSide, + side_r: &'a mut JoinSide, + asof_desc: &'a AsOfDesc, + actual_output_data_types: &'a [DataType], + // inequality_watermarks: &'a Watermark, + chunk: StreamChunk, + chunk_size: usize, + cnt_rows_received: &'a mut u32, + high_join_amplification_threshold: usize, +} + +impl AsOfJoinExecutor { + #[allow(clippy::too_many_arguments)] + pub fn new( + ctx: ActorContextRef, + info: ExecutorInfo, + input_l: Executor, + input_r: Executor, + params_l: JoinParams, + params_r: JoinParams, + null_safe: Vec, + output_indices: Vec, + state_table_l: StateTable, + degree_state_table_l: StateTable, + state_table_r: StateTable, + degree_state_table_r: StateTable, + watermark_epoch: AtomicU64Ref, + metrics: Arc, + chunk_size: usize, + high_join_amplification_threshold: usize, + asof_desc: AsOfDesc, + ) -> Self { + let side_l_column_n = input_l.schema().len(); + + let schema_fields = [ + input_l.schema().fields.clone(), + input_r.schema().fields.clone(), + ] + .concat(); + + let original_output_data_types = schema_fields + .iter() + .map(|field| field.data_type()) + .collect_vec(); + let actual_output_data_types = output_indices + .iter() + .map(|&idx| original_output_data_types[idx].clone()) + .collect_vec(); + + // Data types of of hash join state. + let state_all_data_types_l = input_l.schema().data_types(); + let state_all_data_types_r = input_r.schema().data_types(); + + let state_pk_indices_l = input_l.pk_indices().to_vec(); + let state_pk_indices_r = input_r.pk_indices().to_vec(); + + let state_order_key_indices_l = state_table_l.pk_indices(); + let state_order_key_indices_r = state_table_r.pk_indices(); + + let state_join_key_indices_l = params_l.join_key_indices; + let state_join_key_indices_r = params_r.join_key_indices; + + let degree_join_key_indices_l = (0..state_join_key_indices_l.len()).collect_vec(); + let degree_join_key_indices_r = (0..state_join_key_indices_r.len()).collect_vec(); + + let degree_pk_indices_l = (state_join_key_indices_l.len() + ..state_join_key_indices_l.len() + params_l.deduped_pk_indices.len()) + .collect_vec(); + let degree_pk_indices_r = (state_join_key_indices_r.len() + ..state_join_key_indices_r.len() + params_r.deduped_pk_indices.len()) + .collect_vec(); + + // If pk is contained in join key. + let pk_contained_in_jk_l = + is_subset(state_pk_indices_l.clone(), state_join_key_indices_l.clone()); + let pk_contained_in_jk_r = + is_subset(state_pk_indices_r.clone(), state_join_key_indices_r.clone()); + + let join_key_data_types_l = state_join_key_indices_l + .iter() + .map(|idx| state_all_data_types_l[*idx].clone()) + .collect_vec(); + + let join_key_data_types_r = state_join_key_indices_r + .iter() + .map(|idx| state_all_data_types_r[*idx].clone()) + .collect_vec(); + + assert_eq!(join_key_data_types_l, join_key_data_types_r); + + let degree_all_data_types_l = state_order_key_indices_l + .iter() + .map(|idx| state_all_data_types_l[*idx].clone()) + .collect_vec(); + let degree_all_data_types_r = state_order_key_indices_r + .iter() + .map(|idx| state_all_data_types_r[*idx].clone()) + .collect_vec(); + + let null_matched = K::Bitmap::from_bool_vec(null_safe); + + let need_degree_table_l = false; + let need_degree_table_r = false; + + let (left_to_output, right_to_output) = { + let (left_len, right_len) = if is_left_semi_or_anti(T) { + (state_all_data_types_l.len(), 0usize) + } else if is_right_semi_or_anti(T) { + (0usize, state_all_data_types_r.len()) + } else { + (state_all_data_types_l.len(), state_all_data_types_r.len()) + }; + JoinStreamChunkBuilder::get_i2o_mapping(&output_indices, left_len, right_len) + }; + + let l2o_indexed = MultiMap::from_iter(left_to_output.iter().copied()); + let r2o_indexed = MultiMap::from_iter(right_to_output.iter().copied()); + + // handle inequality watermarks + // https://github.com/risingwavelabs/risingwave/issues/18503 + // let inequality_watermarks = None; + let watermark_buffers = BTreeMap::new(); + + let inequal_key_idx_l = Some(asof_desc.left_idx); + let inequal_key_idx_r = Some(asof_desc.right_idx); + + Self { + ctx: ctx.clone(), + info, + input_l: Some(input_l), + input_r: Some(input_r), + actual_output_data_types, + side_l: JoinSide { + ht: JoinHashMap::new( + watermark_epoch.clone(), + join_key_data_types_l, + state_join_key_indices_l.clone(), + state_all_data_types_l.clone(), + state_table_l, + params_l.deduped_pk_indices, + degree_join_key_indices_l, + degree_all_data_types_l, + degree_state_table_l, + degree_pk_indices_l, + null_matched.clone(), + need_degree_table_l, + pk_contained_in_jk_l, + inequal_key_idx_l, + metrics.clone(), + ctx.id, + ctx.fragment_id, + "left", + ), + join_key_indices: state_join_key_indices_l, + all_data_types: state_all_data_types_l, + i2o_mapping: left_to_output, + i2o_mapping_indexed: l2o_indexed, + start_pos: 0, + need_degree_table: need_degree_table_l, + }, + side_r: JoinSide { + ht: JoinHashMap::new( + watermark_epoch, + join_key_data_types_r, + state_join_key_indices_r.clone(), + state_all_data_types_r.clone(), + state_table_r, + params_r.deduped_pk_indices, + degree_join_key_indices_r, + degree_all_data_types_r, + degree_state_table_r, + degree_pk_indices_r, + null_matched, + need_degree_table_r, + pk_contained_in_jk_r, + inequal_key_idx_r, + metrics.clone(), + ctx.id, + ctx.fragment_id, + "right", + ), + join_key_indices: state_join_key_indices_r, + all_data_types: state_all_data_types_r, + start_pos: side_l_column_n, + i2o_mapping: right_to_output, + i2o_mapping_indexed: r2o_indexed, + need_degree_table: need_degree_table_r, + }, + metrics, + chunk_size, + cnt_rows_received: 0, + watermark_buffers, + high_join_amplification_threshold, + asof_desc, + } + } + + #[try_stream(ok = Message, error = StreamExecutorError)] + async fn into_stream(mut self) { + let input_l = self.input_l.take().unwrap(); + let input_r = self.input_r.take().unwrap(); + let aligned_stream = barrier_align( + input_l.execute(), + input_r.execute(), + self.ctx.id, + self.ctx.fragment_id, + self.metrics.clone(), + "Join", + ); + pin_mut!(aligned_stream); + + let barrier = expect_first_barrier_from_aligned_stream(&mut aligned_stream).await?; + self.side_l.init(barrier.epoch); + self.side_r.init(barrier.epoch); + + // The first barrier message should be propagated. + yield Message::Barrier(barrier); + let actor_id_str = self.ctx.id.to_string(); + let fragment_id_str = self.ctx.fragment_id.to_string(); + + // initialized some metrics + let join_actor_input_waiting_duration_ns = self + .metrics + .join_actor_input_waiting_duration_ns + .with_guarded_label_values(&[&actor_id_str, &fragment_id_str]); + let left_join_match_duration_ns = self + .metrics + .join_match_duration_ns + .with_guarded_label_values(&[&actor_id_str, &fragment_id_str, "left"]); + let right_join_match_duration_ns = self + .metrics + .join_match_duration_ns + .with_guarded_label_values(&[&actor_id_str, &fragment_id_str, "right"]); + + let barrier_join_match_duration_ns = self + .metrics + .join_match_duration_ns + .with_guarded_label_values(&[&actor_id_str, &fragment_id_str, "barrier"]); + + let left_join_cached_entry_count = self + .metrics + .join_cached_entry_count + .with_guarded_label_values(&[&actor_id_str, &fragment_id_str, "left"]); + + let right_join_cached_entry_count = self + .metrics + .join_cached_entry_count + .with_guarded_label_values(&[&actor_id_str, &fragment_id_str, "right"]); + + let mut start_time = Instant::now(); + + while let Some(msg) = aligned_stream + .next() + .instrument_await("hash_join_barrier_align") + .await + { + join_actor_input_waiting_duration_ns.inc_by(start_time.elapsed().as_nanos() as u64); + match msg? { + AlignedMessage::WatermarkLeft(watermark) => { + for watermark_to_emit in self.handle_watermark(SideType::Left, watermark)? { + yield Message::Watermark(watermark_to_emit); + } + } + AlignedMessage::WatermarkRight(watermark) => { + for watermark_to_emit in self.handle_watermark(SideType::Right, watermark)? { + yield Message::Watermark(watermark_to_emit); + } + } + AlignedMessage::Left(chunk) => { + let mut left_time = Duration::from_nanos(0); + let mut left_start_time = Instant::now(); + #[for_await] + for chunk in Self::eq_join_left(EqJoinArgs { + ctx: &self.ctx, + side_l: &mut self.side_l, + side_r: &mut self.side_r, + asof_desc: &self.asof_desc, + actual_output_data_types: &self.actual_output_data_types, + // inequality_watermarks: &self.inequality_watermarks, + chunk, + chunk_size: self.chunk_size, + cnt_rows_received: &mut self.cnt_rows_received, + high_join_amplification_threshold: self.high_join_amplification_threshold, + }) { + left_time += left_start_time.elapsed(); + yield Message::Chunk(chunk?); + left_start_time = Instant::now(); + } + left_time += left_start_time.elapsed(); + left_join_match_duration_ns.inc_by(left_time.as_nanos() as u64); + self.try_flush_data().await?; + } + AlignedMessage::Right(chunk) => { + let mut right_time = Duration::from_nanos(0); + let mut right_start_time = Instant::now(); + #[for_await] + for chunk in Self::eq_join_right(EqJoinArgs { + ctx: &self.ctx, + side_l: &mut self.side_l, + side_r: &mut self.side_r, + asof_desc: &self.asof_desc, + actual_output_data_types: &self.actual_output_data_types, + // inequality_watermarks: &self.inequality_watermarks, + chunk, + chunk_size: self.chunk_size, + cnt_rows_received: &mut self.cnt_rows_received, + high_join_amplification_threshold: self.high_join_amplification_threshold, + }) { + right_time += right_start_time.elapsed(); + yield Message::Chunk(chunk?); + right_start_time = Instant::now(); + } + right_time += right_start_time.elapsed(); + right_join_match_duration_ns.inc_by(right_time.as_nanos() as u64); + self.try_flush_data().await?; + } + AlignedMessage::Barrier(barrier) => { + let barrier_start_time = Instant::now(); + self.flush_data(barrier.epoch).await?; + + // Update the vnode bitmap for state tables of both sides if asked. + if let Some(vnode_bitmap) = barrier.as_update_vnode_bitmap(self.ctx.id) { + if self.side_l.ht.update_vnode_bitmap(vnode_bitmap.clone()) { + self.watermark_buffers + .values_mut() + .for_each(|buffers| buffers.clear()); + // self.inequality_watermarks.fill(None); + } + self.side_r.ht.update_vnode_bitmap(vnode_bitmap); + } + + // Report metrics of cached join rows/entries + for (join_cached_entry_count, ht) in [ + (&left_join_cached_entry_count, &self.side_l.ht), + (&right_join_cached_entry_count, &self.side_r.ht), + ] { + join_cached_entry_count.set(ht.entry_count() as i64); + } + + barrier_join_match_duration_ns + .inc_by(barrier_start_time.elapsed().as_nanos() as u64); + yield Message::Barrier(barrier); + } + } + start_time = Instant::now(); + } + } + + async fn flush_data(&mut self, epoch: EpochPair) -> StreamExecutorResult<()> { + // All changes to the state has been buffered in the mem-table of the state table. Just + // `commit` them here. + self.side_l.ht.flush(epoch).await?; + self.side_r.ht.flush(epoch).await?; + Ok(()) + } + + async fn try_flush_data(&mut self) -> StreamExecutorResult<()> { + // All changes to the state has been buffered in the mem-table of the state table. Just + // `commit` them here. + self.side_l.ht.try_flush().await?; + self.side_r.ht.try_flush().await?; + Ok(()) + } + + // We need to manually evict the cache. + fn evict_cache( + side_update: &mut JoinSide, + side_match: &mut JoinSide, + cnt_rows_received: &mut u32, + ) { + *cnt_rows_received += 1; + if *cnt_rows_received == EVICT_EVERY_N_ROWS { + side_update.ht.evict(); + side_match.ht.evict(); + *cnt_rows_received = 0; + } + } + + fn handle_watermark( + &mut self, + side: SideTypePrimitive, + watermark: Watermark, + ) -> StreamExecutorResult> { + let (side_update, side_match) = if side == SideType::Left { + (&mut self.side_l, &mut self.side_r) + } else { + (&mut self.side_r, &mut self.side_l) + }; + + // State cleaning + if side_update.join_key_indices[0] == watermark.col_idx { + side_match.ht.update_watermark(watermark.val.clone()); + } + + // Select watermarks to yield. + let wm_in_jk = side_update + .join_key_indices + .iter() + .positions(|idx| *idx == watermark.col_idx); + let mut watermarks_to_emit = vec![]; + for idx in wm_in_jk { + let buffers = self + .watermark_buffers + .entry(idx) + .or_insert_with(|| BufferedWatermarks::with_ids([SideType::Left, SideType::Right])); + if let Some(selected_watermark) = buffers.handle_watermark(side, watermark.clone()) { + let empty_indices = vec![]; + let output_indices = side_update + .i2o_mapping_indexed + .get_vec(&side_update.join_key_indices[idx]) + .unwrap_or(&empty_indices) + .iter() + .chain( + side_match + .i2o_mapping_indexed + .get_vec(&side_match.join_key_indices[idx]) + .unwrap_or(&empty_indices), + ); + for output_idx in output_indices { + watermarks_to_emit.push(selected_watermark.clone().with_idx(*output_idx)); + } + }; + } + Ok(watermarks_to_emit) + } + + /// the data the hash table and match the coming + /// data chunk with the executor state + async fn hash_eq_match( + key: &K, + ht: &mut JoinHashMap, + ) -> StreamExecutorResult> { + if !key.null_bitmap().is_subset(ht.null_matched()) { + Ok(None) + } else { + ht.take_state(key).await.map(Some) + } + } + + #[try_stream(ok = StreamChunk, error = StreamExecutorError)] + async fn eq_join_left(args: EqJoinArgs<'_, K, S>) { + let EqJoinArgs { + ctx: _, + side_l, + side_r, + asof_desc, + actual_output_data_types, + // inequality_watermarks, + chunk, + chunk_size, + cnt_rows_received, + high_join_amplification_threshold: _, + } = args; + + let (side_update, side_match) = (side_l, side_r); + + let mut join_chunk_builder = + JoinChunkBuilder::::new(JoinStreamChunkBuilder::new( + chunk_size, + actual_output_data_types.to_vec(), + side_update.i2o_mapping.clone(), + side_match.i2o_mapping.clone(), + )); + + let keys = K::build_many(&side_update.join_key_indices, chunk.data_chunk()); + for (r, key) in chunk.rows_with_holes().zip_eq_debug(keys.iter()) { + let Some((op, row)) = r else { + continue; + }; + Self::evict_cache(side_update, side_match, cnt_rows_received); + + let matched_rows = if !side_update.ht.check_inequal_key_null(&row) { + Self::hash_eq_match(key, &mut side_match.ht).await? + } else { + None + }; + let inequal_key = side_update.ht.serialize_inequal_key_from_row(row); + + if let Some(matched_rows) = matched_rows { + let matched_row_by_inequality = match asof_desc.inequality_type { + AsOfInequalityType::Lt => matched_rows.lower_bound_by_inequality( + Bound::Excluded(&inequal_key), + &side_match.all_data_types, + ), + AsOfInequalityType::Le => matched_rows.lower_bound_by_inequality( + Bound::Included(&inequal_key), + &side_match.all_data_types, + ), + AsOfInequalityType::Gt => matched_rows.upper_bound_by_inequality( + Bound::Excluded(&inequal_key), + &side_match.all_data_types, + ), + AsOfInequalityType::Ge => matched_rows.upper_bound_by_inequality( + Bound::Included(&inequal_key), + &side_match.all_data_types, + ), + }; + match op { + Op::Insert | Op::UpdateInsert => { + if let Some(matched_row_by_inequality) = matched_row_by_inequality { + let matched_row = matched_row_by_inequality?; + + if let Some(chunk) = + join_chunk_builder.with_match_on_insert(&row, &matched_row) + { + yield chunk; + } + } else if let Some(chunk) = + join_chunk_builder.forward_if_not_matched(Op::Insert, row) + { + yield chunk; + } + side_update.ht.insert_row(key, row).await?; + } + Op::Delete | Op::UpdateDelete => { + if let Some(matched_row_by_inequality) = matched_row_by_inequality { + let matched_row = matched_row_by_inequality?; + + if let Some(chunk) = + join_chunk_builder.with_match_on_delete(&row, &matched_row) + { + yield chunk; + } + } else if let Some(chunk) = + join_chunk_builder.forward_if_not_matched(Op::Delete, row) + { + yield chunk; + } + side_update.ht.delete_row(key, row)?; + } + } + // Insert back the state taken from ht. + side_match.ht.update_state(key, matched_rows); + } else { + // Row which violates null-safe bitmap will never be matched so we need not + // store. + match op { + Op::Insert | Op::UpdateInsert => { + if let Some(chunk) = + join_chunk_builder.forward_if_not_matched(Op::Insert, row) + { + yield chunk; + } + } + Op::Delete | Op::UpdateDelete => { + if let Some(chunk) = + join_chunk_builder.forward_if_not_matched(Op::Delete, row) + { + yield chunk; + } + } + } + } + } + if let Some(chunk) = join_chunk_builder.take() { + yield chunk; + } + } + + #[try_stream(ok = StreamChunk, error = StreamExecutorError)] + async fn eq_join_right(args: EqJoinArgs<'_, K, S>) { + let EqJoinArgs { + ctx, + side_l, + side_r, + asof_desc, + actual_output_data_types, + // inequality_watermarks, + chunk, + chunk_size, + cnt_rows_received, + high_join_amplification_threshold, + } = args; + + let (side_update, side_match) = (side_r, side_l); + + let mut join_chunk_builder = JoinStreamChunkBuilder::new( + chunk_size, + actual_output_data_types.to_vec(), + side_update.i2o_mapping.clone(), + side_match.i2o_mapping.clone(), + ); + + let join_matched_rows_metrics = ctx + .streaming_metrics + .join_matched_join_keys + .with_guarded_label_values(&[ + &ctx.id.to_string(), + &ctx.fragment_id.to_string(), + &side_update.ht.table_id().to_string(), + ]); + + let keys = K::build_many(&side_update.join_key_indices, chunk.data_chunk()); + for (r, key) in chunk.rows_with_holes().zip_eq_debug(keys.iter()) { + let Some((op, row)) = r else { + continue; + }; + let mut join_matched_rows_cnt = 0; + + Self::evict_cache(side_update, side_match, cnt_rows_received); + + let matched_rows = if !side_update.ht.check_inequal_key_null(&row) { + Self::hash_eq_match(key, &mut side_match.ht).await? + } else { + None + }; + let inequal_key = side_update.ht.serialize_inequal_key_from_row(row); + + if let Some(matched_rows) = matched_rows { + let update_rows = Self::hash_eq_match(key, &mut side_update.ht).await?.expect("None is not expected because we have checked null in key when getting matched_rows"); + let right_inequality_index = update_rows.inequality_index(); + let (row_to_delete_r, row_to_insert_r) = + if let Some(pks) = right_inequality_index.get(&inequal_key) { + assert!(!pks.is_empty()); + let row_pk = side_match.ht.serialize_pk_from_row(row); + match op { + Op::Insert | Op::UpdateInsert => { + // If there are multiple rows match the inequality key in the right table, we use one with smallest pk. + let smallest_pk = pks.first_key_sorted().unwrap(); + if smallest_pk > &row_pk { + // smallest_pk is in the cache index, so it must exist in the cache. + if let Some(to_delete_row) = update_rows + .get_by_indexed_pk(smallest_pk, &side_update.all_data_types) + { + ( + Some(Either::Left(to_delete_row?.row)), + Some(Either::Right(row)), + ) + } else { + // Something wrong happened. Ignore this row in non strict consistency mode. + (None, None) + } + } else { + // No affected row in the right table. + (None, None) + } + } + Op::Delete | Op::UpdateDelete => { + let smallest_pk = pks.first_key_sorted().unwrap(); + if smallest_pk == &row_pk { + if let Some(second_smallest_pk) = pks.second_key_sorted() { + if let Some(to_insert_row) = update_rows.get_by_indexed_pk( + second_smallest_pk, + &side_update.all_data_types, + ) { + ( + Some(Either::Right(row)), + Some(Either::Left(to_insert_row?.row)), + ) + } else { + // Something wrong happened. Ignore this row in non strict consistency mode. + (None, None) + } + } else { + (Some(Either::Right(row)), None) + } + } else { + // No affected row in the right table. + (None, None) + } + } + } + } else { + match op { + // Decide the row_to_delete later + Op::Insert | Op::UpdateInsert => (None, Some(Either::Right(row))), + // Decide the row_to_insert later + Op::Delete | Op::UpdateDelete => (Some(Either::Right(row)), None), + } + }; + + // 4 cases for row_to_delete_r and row_to_insert_r: + // 1. Some(_), Some(_): delete row_to_delete_r and insert row_to_insert_r + // 2. None, Some(_) : row_to_delete to be decided by the nearest inequality key + // 3. Some(_), None : row_to_insert to be decided by the nearest inequality key + // 4. None, None : do nothing + if row_to_delete_r.is_none() && row_to_insert_r.is_none() { + // no row to delete or insert. + } else { + let prev_inequality_key = + right_inequality_index.upper_bound_key(Bound::Excluded(&inequal_key)); + let next_inequality_key = + right_inequality_index.lower_bound_key(Bound::Excluded(&inequal_key)); + let affected_row_r = match asof_desc.inequality_type { + AsOfInequalityType::Lt | AsOfInequalityType::Le => next_inequality_key + .and_then(|k| { + update_rows.get_first_by_inequality(k, &side_update.all_data_types) + }), + AsOfInequalityType::Gt | AsOfInequalityType::Ge => prev_inequality_key + .and_then(|k| { + update_rows.get_first_by_inequality(k, &side_update.all_data_types) + }), + } + .transpose()? + .map(|r| Either::Left(r.row)); + + let (row_to_delete_r, row_to_insert_r) = + match (&row_to_delete_r, &row_to_insert_r) { + (Some(_), Some(_)) => (row_to_delete_r, row_to_insert_r), + (None, Some(_)) => (affected_row_r, row_to_insert_r), + (Some(_), None) => (row_to_delete_r, affected_row_r), + (None, None) => unreachable!(), + }; + let range = match asof_desc.inequality_type { + AsOfInequalityType::Lt => ( + prev_inequality_key.map_or_else(|| Bound::Unbounded, Bound::Included), + Bound::Excluded(&inequal_key), + ), + AsOfInequalityType::Le => ( + prev_inequality_key.map_or_else(|| Bound::Unbounded, Bound::Excluded), + Bound::Included(&inequal_key), + ), + AsOfInequalityType::Gt => ( + Bound::Excluded(&inequal_key), + next_inequality_key.map_or_else(|| Bound::Unbounded, Bound::Included), + ), + AsOfInequalityType::Ge => ( + Bound::Included(&inequal_key), + next_inequality_key.map_or_else(|| Bound::Unbounded, Bound::Excluded), + ), + }; + + let rows_l = + matched_rows.range_by_inequality(range, &side_match.all_data_types); + for row_l in rows_l { + join_matched_rows_cnt += 1; + let row_l = row_l?.row; + if let Some(row_to_delete_r) = &row_to_delete_r { + if let Some(chunk) = + join_chunk_builder.append_row(Op::Delete, row_to_delete_r, &row_l) + { + yield chunk; + } + } else if is_as_of_left_outer(T) { + if let Some(chunk) = + join_chunk_builder.append_row_matched(Op::Delete, &row_l) + { + yield chunk; + } + } + if let Some(row_to_insert_r) = &row_to_insert_r { + if let Some(chunk) = + join_chunk_builder.append_row(Op::Insert, row_to_insert_r, &row_l) + { + yield chunk; + } + } else if is_as_of_left_outer(T) { + if let Some(chunk) = + join_chunk_builder.append_row_matched(Op::Insert, &row_l) + { + yield chunk; + } + } + } + } + // Insert back the state taken from ht. + side_match.ht.update_state(key, matched_rows); + side_update.ht.update_state(key, update_rows); + + match op { + Op::Insert | Op::UpdateInsert => { + side_update.ht.insert_row(key, row).await?; + } + Op::Delete | Op::UpdateDelete => { + side_update.ht.delete_row(key, row)?; + } + } + } else { + // Row which violates null-safe bitmap will never be matched so we need not + // store. + // Noop here because we only support left outer AsOf join. + } + join_matched_rows_metrics.observe(join_matched_rows_cnt as _); + if join_matched_rows_cnt > high_join_amplification_threshold { + let join_key_data_types = side_update.ht.join_key_data_types(); + let key = key.deserialize(join_key_data_types)?; + tracing::warn!(target: "high_join_amplification", + matched_rows_len = join_matched_rows_cnt, + update_table_id = side_update.ht.table_id(), + match_table_id = side_match.ht.table_id(), + join_key = ?key, + actor_id = ctx.id, + fragment_id = ctx.fragment_id, + "large rows matched for join key when AsOf join updating right side", + ); + } + } + if let Some(chunk) = join_chunk_builder.take() { + yield chunk; + } + } +} + +#[cfg(test)] +mod tests { + use std::sync::atomic::AtomicU64; + + use risingwave_common::array::*; + use risingwave_common::catalog::{ColumnDesc, ColumnId, Field, TableId}; + use risingwave_common::hash::Key64; + use risingwave_common::util::epoch::test_epoch; + use risingwave_common::util::sort_util::OrderType; + use risingwave_storage::memory::MemoryStateStore; + + use super::*; + use crate::executor::test_utils::{MessageSender, MockSource, StreamExecutorTestExt}; + + async fn create_in_memory_state_table( + mem_state: MemoryStateStore, + data_types: &[DataType], + order_types: &[OrderType], + pk_indices: &[usize], + table_id: u32, + ) -> (StateTable, StateTable) { + let column_descs = data_types + .iter() + .enumerate() + .map(|(id, data_type)| ColumnDesc::unnamed(ColumnId::new(id as i32), data_type.clone())) + .collect_vec(); + let state_table = StateTable::new_without_distribution( + mem_state.clone(), + TableId::new(table_id), + column_descs, + order_types.to_vec(), + pk_indices.to_vec(), + ) + .await; + + // Create degree table + let mut degree_table_column_descs = vec![]; + pk_indices.iter().enumerate().for_each(|(pk_id, idx)| { + degree_table_column_descs.push(ColumnDesc::unnamed( + ColumnId::new(pk_id as i32), + data_types[*idx].clone(), + )) + }); + degree_table_column_descs.push(ColumnDesc::unnamed( + ColumnId::new(pk_indices.len() as i32), + DataType::Int64, + )); + let degree_state_table = StateTable::new_without_distribution( + mem_state, + TableId::new(table_id + 1), + degree_table_column_descs, + order_types.to_vec(), + pk_indices.to_vec(), + ) + .await; + (state_table, degree_state_table) + } + + async fn create_executor( + asof_desc: AsOfDesc, + ) -> (MessageSender, MessageSender, BoxedMessageStream) { + let schema = Schema { + fields: vec![ + Field::unnamed(DataType::Int64), // join key + Field::unnamed(DataType::Int64), + Field::unnamed(DataType::Int64), + ], + }; + let (tx_l, source_l) = MockSource::channel(); + let source_l = source_l.into_executor(schema.clone(), vec![1]); + let (tx_r, source_r) = MockSource::channel(); + let source_r = source_r.into_executor(schema, vec![1]); + let params_l = JoinParams::new(vec![0], vec![1]); + let params_r = JoinParams::new(vec![0], vec![1]); + + let mem_state = MemoryStateStore::new(); + + let (state_l, degree_state_l) = create_in_memory_state_table( + mem_state.clone(), + &[DataType::Int64, DataType::Int64, DataType::Int64], + &[ + OrderType::ascending(), + OrderType::ascending(), + OrderType::ascending(), + ], + &[0, asof_desc.left_idx, 1], + 0, + ) + .await; + + let (state_r, degree_state_r) = create_in_memory_state_table( + mem_state, + &[DataType::Int64, DataType::Int64, DataType::Int64], + &[ + OrderType::ascending(), + OrderType::ascending(), + OrderType::ascending(), + ], + &[0, asof_desc.right_idx, 1], + 2, + ) + .await; + + let schema: Schema = [source_l.schema().fields(), source_r.schema().fields()] + .concat() + .into_iter() + .collect(); + let schema_len = schema.len(); + let info = ExecutorInfo { + schema, + pk_indices: vec![1], + identity: "HashJoinExecutor".to_string(), + }; + + let executor = AsOfJoinExecutor::::new( + ActorContext::for_test(123), + info, + source_l, + source_r, + params_l, + params_r, + vec![false], + (0..schema_len).collect_vec(), + state_l, + degree_state_l, + state_r, + degree_state_r, + Arc::new(AtomicU64::new(0)), + Arc::new(StreamingMetrics::unused()), + 1024, + 2048, + asof_desc, + ); + (tx_l, tx_r, executor.boxed().execute()) + } + + #[tokio::test] + async fn test_as_of_inner_join() -> StreamExecutorResult<()> { + let asof_desc = AsOfDesc { + left_idx: 0, + right_idx: 2, + inequality_type: AsOfInequalityType::Lt, + }; + + let chunk_l1 = StreamChunk::from_pretty( + " I I I + + 1 4 7 + + 2 5 8 + + 3 6 9", + ); + let chunk_l2 = StreamChunk::from_pretty( + " I I I + + 3 8 1 + - 3 8 1", + ); + let chunk_r1 = StreamChunk::from_pretty( + " I I I + + 2 1 7 + + 2 2 1 + + 2 3 4 + + 2 4 2 + + 6 1 9 + + 6 2 9", + ); + let chunk_r2 = StreamChunk::from_pretty( + " I I I + - 2 3 4", + ); + let chunk_r3 = StreamChunk::from_pretty( + " I I I + + 2 3 3", + ); + let chunk_l3 = StreamChunk::from_pretty( + " I I I + - 2 5 8", + ); + let chunk_l4 = StreamChunk::from_pretty( + " I I I + + 6 3 1 + + 6 4 1", + ); + let chunk_r4 = StreamChunk::from_pretty( + " I I I + - 6 1 9", + ); + + let (mut tx_l, mut tx_r, mut hash_join) = + create_executor::<{ AsOfJoinType::Inner }>(asof_desc).await; + + // push the init barrier for left and right + tx_l.push_barrier(test_epoch(1), false); + tx_r.push_barrier(test_epoch(1), false); + hash_join.next_unwrap_ready_barrier()?; + + // push the 1st left chunk + tx_l.push_chunk(chunk_l1); + hash_join.next_unwrap_pending(); + + // push the init barrier for left and right + tx_l.push_barrier(test_epoch(2), false); + tx_r.push_barrier(test_epoch(2), false); + hash_join.next_unwrap_ready_barrier()?; + + // push the 2nd left chunk + tx_l.push_chunk(chunk_l2); + hash_join.next_unwrap_pending(); + + // push the 1st right chunk + tx_r.push_chunk(chunk_r1); + let chunk = hash_join.next_unwrap_ready_chunk()?; + assert_eq!( + chunk, + StreamChunk::from_pretty( + " I I I I I I + + 2 5 8 2 1 7 + - 2 5 8 2 1 7 + + 2 5 8 2 3 4" + ) + ); + + // push the 2nd right chunk + tx_r.push_chunk(chunk_r2); + let chunk = hash_join.next_unwrap_ready_chunk()?; + assert_eq!( + chunk, + StreamChunk::from_pretty( + " I I I I I I + - 2 5 8 2 3 4 + + 2 5 8 2 1 7" + ) + ); + + // push the 3rd right chunk + tx_r.push_chunk(chunk_r3); + let chunk = hash_join.next_unwrap_ready_chunk()?; + assert_eq!( + chunk, + StreamChunk::from_pretty( + " I I I I I I + - 2 5 8 2 1 7 + + 2 5 8 2 3 3" + ) + ); + + // push the 3rd left chunk + tx_l.push_chunk(chunk_l3); + let chunk = hash_join.next_unwrap_ready_chunk()?; + assert_eq!( + chunk, + StreamChunk::from_pretty( + " I I I I I I + - 2 5 8 2 3 3" + ) + ); + + // push the 4th left chunk + tx_l.push_chunk(chunk_l4); + let chunk = hash_join.next_unwrap_ready_chunk()?; + assert_eq!( + chunk, + StreamChunk::from_pretty( + " I I I I I I + + 6 3 1 6 1 9 + + 6 4 1 6 1 9" + ) + ); + + // push the 4th right chunk + tx_r.push_chunk(chunk_r4); + let chunk = hash_join.next_unwrap_ready_chunk()?; + assert_eq!( + chunk, + StreamChunk::from_pretty( + " I I I I I I + - 6 3 1 6 1 9 + + 6 3 1 6 2 9 + - 6 4 1 6 1 9 + + 6 4 1 6 2 9" + ) + ); + + Ok(()) + } + + #[tokio::test] + async fn test_as_of_left_outer_join() -> StreamExecutorResult<()> { + let asof_desc = AsOfDesc { + left_idx: 1, + right_idx: 2, + inequality_type: AsOfInequalityType::Ge, + }; + + let chunk_l1 = StreamChunk::from_pretty( + " I I I + + 1 4 7 + + 2 5 8 + + 3 6 9", + ); + let chunk_l2 = StreamChunk::from_pretty( + " I I I + + 3 8 1 + - 3 8 1", + ); + let chunk_r1 = StreamChunk::from_pretty( + " I I I + + 2 3 4 + + 2 2 5 + + 2 1 5 + + 6 1 8 + + 6 2 9", + ); + let chunk_r2 = StreamChunk::from_pretty( + " I I I + - 2 3 4 + - 2 1 5 + - 2 2 5", + ); + let chunk_l3 = StreamChunk::from_pretty( + " I I I + + 6 8 9", + ); + let chunk_r3 = StreamChunk::from_pretty( + " I I I + - 6 1 8", + ); + + let (mut tx_l, mut tx_r, mut hash_join) = + create_executor::<{ AsOfJoinType::LeftOuter }>(asof_desc).await; + + // push the init barrier for left and right + tx_l.push_barrier(test_epoch(1), false); + tx_r.push_barrier(test_epoch(1), false); + hash_join.next_unwrap_ready_barrier()?; + + // push the 1st left chunk + tx_l.push_chunk(chunk_l1); + let chunk = hash_join.next_unwrap_ready_chunk()?; + assert_eq!( + chunk, + StreamChunk::from_pretty( + " I I I I I I + + 1 4 7 . . . + + 2 5 8 . . . + + 3 6 9 . . ." + ) + ); + + // push the init barrier for left and right + tx_l.push_barrier(test_epoch(2), false); + tx_r.push_barrier(test_epoch(2), false); + hash_join.next_unwrap_ready_barrier()?; + + // push the 2nd left chunk + tx_l.push_chunk(chunk_l2); + let chunk = hash_join.next_unwrap_ready_chunk()?; + assert_eq!( + chunk, + StreamChunk::from_pretty( + " I I I I I I + + 3 8 1 . . . + - 3 8 1 . . ." + ) + ); + + // push the 1st right chunk + tx_r.push_chunk(chunk_r1); + let chunk = hash_join.next_unwrap_ready_chunk()?; + assert_eq!( + chunk, + StreamChunk::from_pretty( + " I I I I I I + - 2 5 8 . . . + + 2 5 8 2 3 4 + - 2 5 8 2 3 4 + + 2 5 8 2 2 5 + - 2 5 8 2 2 5 + + 2 5 8 2 1 5" + ) + ); + + // push the 2nd right chunk + tx_r.push_chunk(chunk_r2); + let chunk = hash_join.next_unwrap_ready_chunk()?; + assert_eq!( + chunk, + StreamChunk::from_pretty( + " I I I I I I + - 2 5 8 2 1 5 + + 2 5 8 2 2 5 + - 2 5 8 2 2 5 + + 2 5 8 . . ." + ) + ); + + // push the 3rd left chunk + tx_l.push_chunk(chunk_l3); + let chunk = hash_join.next_unwrap_ready_chunk()?; + assert_eq!( + chunk, + StreamChunk::from_pretty( + " I I I I I I + + 6 8 9 6 1 8" + ) + ); + + // push the 3rd right chunk + tx_r.push_chunk(chunk_r3); + let chunk = hash_join.next_unwrap_ready_chunk()?; + assert_eq!( + chunk, + StreamChunk::from_pretty( + " I I I I I I + - 6 8 9 6 1 8 + + 6 8 9 . . ." + ) + ); + Ok(()) + } +} diff --git a/src/stream/src/executor/exchange/input.rs b/src/stream/src/executor/exchange/input.rs index e00a0da45979a..7ecac2c625e69 100644 --- a/src/stream/src/executor/exchange/input.rs +++ b/src/stream/src/executor/exchange/input.rs @@ -15,16 +15,13 @@ use std::pin::Pin; use std::task::{Context, Poll}; -use anyhow::{anyhow, Context as _}; -use futures::pin_mut; -use futures_async_stream::try_stream; +use anyhow::anyhow; +use local_input::LocalInputStreamInner; use pin_project::pin_project; use risingwave_common::util::addr::{is_local_address, HostAddr}; -use risingwave_pb::task_service::{permits, GetStreamResponse}; use risingwave_rpc_client::ComputeClientPool; use tokio::sync::mpsc; -use super::error::ExchangeChannelClosed; use super::permit::Receiver; use crate::executor::prelude::*; use crate::executor::{DispatcherBarrier, DispatcherMessage}; @@ -64,7 +61,6 @@ pub struct LocalInput { actor_id: ActorId, } -type LocalInputStreamInner = impl MessageStream; async fn process_msg<'a>( msg: DispatcherMessage, @@ -110,7 +106,7 @@ impl LocalInput { local_barrier_manager: LocalBarrierManager, ) -> Self { Self { - inner: Self::run( + inner: local_input::run( channel, upstream_actor_id, self_actor_id, @@ -119,9 +115,36 @@ impl LocalInput { actor_id: upstream_actor_id, } } +} + +mod local_input { + use await_tree::InstrumentAwait; + + use crate::executor::exchange::error::ExchangeChannelClosed; + use crate::executor::exchange::input::process_msg; + use crate::executor::exchange::permit::Receiver; + use crate::executor::prelude::try_stream; + use crate::executor::{Message, StreamExecutorError}; + use crate::task::{ActorId, LocalBarrierManager}; + + pub(super) type LocalInputStreamInner = impl crate::executor::MessageStream; + + pub(super) fn run( + channel: Receiver, + upstream_actor_id: ActorId, + self_actor_id: ActorId, + local_barrier_manager: LocalBarrierManager, + ) -> LocalInputStreamInner { + run_inner( + channel, + upstream_actor_id, + self_actor_id, + local_barrier_manager, + ) + } #[try_stream(ok = Message, error = StreamExecutorError)] - async fn run( + async fn run_inner( mut channel: Receiver, upstream_actor_id: ActorId, self_actor_id: ActorId, @@ -166,7 +189,8 @@ pub struct RemoteInput { actor_id: ActorId, } -type RemoteInputStreamInner = impl MessageStream; + +use remote_input::RemoteInputStreamInner; impl RemoteInput { /// Create a remote input from compute client and related info. Should provide the corresponding @@ -184,7 +208,7 @@ impl RemoteInput { Self { actor_id, - inner: Self::run( + inner: remote_input::run( local_barrier_manager, client_pool, upstream_addr, @@ -195,9 +219,48 @@ impl RemoteInput { ), } } +} + +mod remote_input { + use std::sync::Arc; + + use anyhow::Context; + use await_tree::InstrumentAwait; + use risingwave_common::util::addr::HostAddr; + use risingwave_pb::task_service::{permits, GetStreamResponse}; + use risingwave_rpc_client::ComputeClientPool; + + use crate::executor::exchange::error::ExchangeChannelClosed; + use crate::executor::exchange::input::process_msg; + use crate::executor::monitor::StreamingMetrics; + use crate::executor::prelude::{pin_mut, try_stream, StreamExt}; + use crate::executor::{DispatcherMessage, Message, StreamExecutorError}; + use crate::task::{LocalBarrierManager, UpDownActorIds, UpDownFragmentIds}; + + pub(super) type RemoteInputStreamInner = impl crate::executor::MessageStream; + + pub(super) fn run( + local_barrier_manager: LocalBarrierManager, + client_pool: ComputeClientPool, + upstream_addr: HostAddr, + up_down_ids: UpDownActorIds, + up_down_frag: UpDownFragmentIds, + metrics: Arc, + batched_permits_limit: usize, + ) -> RemoteInputStreamInner { + run_inner( + local_barrier_manager, + client_pool, + upstream_addr, + up_down_ids, + up_down_frag, + metrics, + batched_permits_limit, + ) + } #[try_stream(ok = Message, error = StreamExecutorError)] - async fn run( + async fn run_inner( local_barrier_manager: LocalBarrierManager, client_pool: ComputeClientPool, upstream_addr: HostAddr, diff --git a/src/stream/src/executor/hash_join.rs b/src/stream/src/executor/hash_join.rs index e1a1b177bcfcc..e23c17724be02 100644 --- a/src/stream/src/executor/hash_join.rs +++ b/src/stream/src/executor/hash_join.rs @@ -396,6 +396,7 @@ impl HashJoinExecutor HashJoinExecutor; +type InequalKeyType = Vec; pub type StateValueType = EncodedJoinRow; pub type HashValueType = Box; @@ -154,6 +157,21 @@ impl JoinHashMapMetrics { } } +/// Inequality key description for `AsOf` join. +struct InequalityKeyDesc { + idx: usize, + serializer: OrderedRowSerde, +} + +impl InequalityKeyDesc { + /// Serialize the inequality key from a row. + pub fn serialize_inequal_key_from_row(&self, row: impl Row) -> InequalKeyType { + let indices = vec![self.idx]; + let inequality_key = row.project(&indices); + inequality_key.memcmp_serialize(&self.serializer) + } +} + pub struct JoinHashMap { /// Store the join states. inner: JoinHashMapInner, @@ -182,6 +200,8 @@ pub struct JoinHashMap { need_degree_table: bool, /// Pk is part of the join key. pk_contained_in_jk: bool, + /// Inequality key description for `AsOf` join. + inequality_key_desc: Option, /// Metrics of the hash map metrics: JoinHashMapMetrics, } @@ -230,6 +250,7 @@ impl JoinHashMap { null_matched: K::Bitmap, need_degree_table: bool, pk_contained_in_jk: bool, + inequality_key_idx: Option, metrics: Arc, actor_id: ActorId, fragment_id: FragmentId, @@ -246,6 +267,14 @@ impl JoinHashMap { vec![OrderType::ascending(); state_pk_indices.len()], ); + let inequality_key_desc = inequality_key_idx.map(|idx| { + let serializer = OrderedRowSerde::new( + vec![state_all_data_types[idx].clone()], + vec![OrderType::ascending()], + ); + InequalityKeyDesc { idx, serializer } + }); + let join_table_id = state_table.table_id(); let state = TableInner { pk_indices: state_pk_indices, @@ -286,6 +315,7 @@ impl JoinHashMap { degree_state, need_degree_table, pk_contained_in_jk, + inequality_key_desc, metrics: JoinHashMapMetrics::new(&metrics, actor_id, fragment_id, side, join_table_id), } } @@ -427,11 +457,16 @@ impl JoinHashMap { let degree_i64 = degree_row .datum_at(degree_row.len() - 1) .expect("degree should not be NULL"); + let inequality_key = self + .inequality_key_desc + .as_ref() + .map(|desc| desc.serialize_inequal_key_from_row(row.row())); entry_state .insert( pk, JoinRow::new(row.row(), degree_i64.into_int64() as u64) .encode(), + inequality_key, ) .with_context(|| self.state.error_context(row.row()))?; } @@ -459,6 +494,10 @@ impl JoinHashMap { .as_ref() .project(&self.state.pk_indices) .memcmp_serialize(&self.pk_serializer); + let inequality_key = self + .inequality_key_desc + .as_ref() + .map(|desc| desc.serialize_inequal_key_from_row(row.row())); let degree_i64 = degree_row .datum_at(degree_row.len() - 1) .expect("degree should not be NULL"); @@ -466,6 +505,7 @@ impl JoinHashMap { .insert( pk, JoinRow::new(row.row(), degree_i64.into_int64() as u64).encode(), + inequality_key, ) .with_context(|| self.state.error_context(row.row()))?; } @@ -486,8 +526,12 @@ impl JoinHashMap { .as_ref() .project(&self.state.pk_indices) .memcmp_serialize(&self.pk_serializer); + let inequality_key = self + .inequality_key_desc + .as_ref() + .map(|desc| desc.serialize_inequal_key_from_row(row.row())); entry_state - .insert(pk, JoinRow::new(row.row(), 0).encode()) + .insert(pk, JoinRow::new(row.row(), 0).encode(), inequality_key) .with_context(|| self.state.error_context(row.row()))?; } }; @@ -511,9 +555,12 @@ impl JoinHashMap { /// Insert a join row #[allow(clippy::unused_async)] pub async fn insert(&mut self, key: &K, value: JoinRow) -> StreamExecutorResult<()> { - let pk = (&value.row) - .project(&self.state.pk_indices) - .memcmp_serialize(&self.pk_serializer); + let pk = self.serialize_pk_from_row(&value.row); + + let inequality_key = self + .inequality_key_desc + .as_ref() + .map(|desc| desc.serialize_inequal_key_from_row(&value.row)); // TODO(yuhao): avoid this `contains`. // https://github.com/risingwavelabs/risingwave/issues/9233 @@ -521,14 +568,14 @@ impl JoinHashMap { // Update cache let mut entry = self.inner.get_mut(key).unwrap(); entry - .insert(pk, value.encode()) + .insert(pk, value.encode(), inequality_key) .with_context(|| self.state.error_context(&value.row))?; } else if self.pk_contained_in_jk { // Refill cache when the join key exist in neither cache or storage. self.metrics.insert_cache_miss_count += 1; let mut state = JoinEntryState::default(); state - .insert(pk, value.encode()) + .insert(pk, value.encode(), inequality_key) .with_context(|| self.state.error_context(&value.row))?; self.update_state(key, state.into()); } @@ -545,24 +592,25 @@ impl JoinHashMap { #[allow(clippy::unused_async)] pub async fn insert_row(&mut self, key: &K, value: impl Row) -> StreamExecutorResult<()> { let join_row = JoinRow::new(&value, 0); - let pk = (&value) - .project(&self.state.pk_indices) - .memcmp_serialize(&self.pk_serializer); - + let pk = self.serialize_pk_from_row(&value); + let inequality_key = self + .inequality_key_desc + .as_ref() + .map(|desc| desc.serialize_inequal_key_from_row(&value)); // TODO(yuhao): avoid this `contains`. // https://github.com/risingwavelabs/risingwave/issues/9233 if self.inner.contains(key) { // Update cache let mut entry = self.inner.get_mut(key).unwrap(); entry - .insert(pk, join_row.encode()) + .insert(pk, join_row.encode(), inequality_key) .with_context(|| self.state.error_context(&value))?; } else if self.pk_contained_in_jk { // Refill cache when the join key exist in neither cache or storage. self.metrics.insert_cache_miss_count += 1; let mut state = JoinEntryState::default(); state - .insert(pk, join_row.encode()) + .insert(pk, join_row.encode(), inequality_key) .with_context(|| self.state.error_context(&value))?; self.update_state(key, state.into()); } @@ -578,8 +626,12 @@ impl JoinHashMap { let pk = (&value.row) .project(&self.state.pk_indices) .memcmp_serialize(&self.pk_serializer); + let inequality_key = self + .inequality_key_desc + .as_ref() + .map(|desc| desc.serialize_inequal_key_from_row(&value.row)); entry - .remove(pk) + .remove(pk, inequality_key.as_ref()) .with_context(|| self.state.error_context(&value.row))?; } @@ -597,8 +649,13 @@ impl JoinHashMap { let pk = (&value) .project(&self.state.pk_indices) .memcmp_serialize(&self.pk_serializer); + + let inequality_key = self + .inequality_key_desc + .as_ref() + .map(|desc| desc.serialize_inequal_key_from_row(&value)); entry - .remove(pk) + .remove(pk, inequality_key.as_ref()) .with_context(|| self.state.error_context(&value))?; } @@ -680,6 +737,29 @@ impl JoinHashMap { pub fn join_key_data_types(&self) -> &[DataType] { &self.join_key_data_types } + + /// Return true if the inequality key is null. + /// # Panics + /// Panics if the inequality key is not set. + pub fn check_inequal_key_null(&self, row: &impl Row) -> bool { + let desc = self.inequality_key_desc.as_ref().unwrap(); + row.datum_at(desc.idx).is_none() + } + + /// Serialize the inequality key from a row. + /// # Panics + /// Panics if the inequality key is not set. + pub fn serialize_inequal_key_from_row(&self, row: impl Row) -> InequalKeyType { + self.inequality_key_desc + .as_ref() + .unwrap() + .serialize_inequal_key_from_row(&row) + } + + pub fn serialize_pk_from_row(&self, row: impl Row) -> PkType { + row.project(&self.state.pk_indices) + .memcmp_serialize(&self.pk_serializer) + } } use risingwave_common_estimate_size::KvSize; @@ -695,7 +775,9 @@ use super::*; #[derive(Default)] pub struct JoinEntryState { /// The full copy of the state. - cached: join_row_set::JoinRowSet, + cached: JoinRowSet, + /// Index used for AS OF join. The key is inequal column value. The value is the primary key in `cached`. + inequality_index: JoinRowSet>, kv_heap_size: KvSize, } @@ -710,9 +792,11 @@ impl EstimateSize for JoinEntryState { #[derive(Error, Debug)] pub enum JoinEntryError { #[error("double inserting a join state entry")] - OccupiedError, + Occupied, #[error("removing a join state entry but it is not in the cache")] - RemoveError, + Remove, + #[error("retrieving a pk from the inequality index but it is not in the cache")] + InequalIndex, } impl JoinEntryState { @@ -721,11 +805,15 @@ impl JoinEntryState { &mut self, key: PkType, value: StateValueType, + inequality_key: Option, ) -> Result<&mut StateValueType, JoinEntryError> { let mut removed = false; if !enable_strict_consistency() { // strict consistency is off, let's remove existing (if any) first if let Some(old_value) = self.cached.remove(&key) { + if let Some(inequality_key) = inequality_key.as_ref() { + self.remove_pk_from_inequality_index(&key, inequality_key); + } self.kv_heap_size.sub(&key, &old_value); removed = true; } @@ -733,6 +821,9 @@ impl JoinEntryState { self.kv_heap_size.add(&key, &value); + if let Some(inequality_key) = inequality_key { + self.insert_pk_to_inequality_index(key.clone(), inequality_key); + } let ret = self.cached.try_insert(key.clone(), value); if !enable_strict_consistency() { @@ -743,22 +834,77 @@ impl JoinEntryState { } } - ret.map_err(|_| JoinEntryError::OccupiedError) + ret.map_err(|_| JoinEntryError::Occupied) } /// Delete from the cache. - pub fn remove(&mut self, pk: PkType) -> Result<(), JoinEntryError> { + pub fn remove( + &mut self, + pk: PkType, + inequality_key: Option<&InequalKeyType>, + ) -> Result<(), JoinEntryError> { if let Some(value) = self.cached.remove(&pk) { self.kv_heap_size.sub(&pk, &value); + if let Some(inequality_key) = inequality_key { + self.remove_pk_from_inequality_index(&pk, inequality_key); + } Ok(()) } else if enable_strict_consistency() { - Err(JoinEntryError::RemoveError) + Err(JoinEntryError::Remove) } else { consistency_error!(?pk, "removing a join state entry but it's not in the cache"); Ok(()) } } + fn remove_pk_from_inequality_index(&mut self, pk: &PkType, inequality_key: &InequalKeyType) { + if let Some(pk_set) = self.inequality_index.get_mut(inequality_key) { + if pk_set.remove(pk).is_none() { + if enable_strict_consistency() { + panic!("removing a pk that it not in the inequality index"); + } else { + consistency_error!(?pk, "removing a pk that it not in the inequality index"); + }; + } else { + self.kv_heap_size.sub(pk, &()); + } + if pk_set.is_empty() { + self.inequality_index.remove(inequality_key); + } + } + } + + fn insert_pk_to_inequality_index(&mut self, pk: PkType, inequality_key: InequalKeyType) { + if let Some(pk_set) = self.inequality_index.get_mut(&inequality_key) { + let pk_size = pk.estimated_size(); + if pk_set.try_insert(pk, ()).is_err() { + if enable_strict_consistency() { + panic!("inserting a pk that it already in the inequality index"); + } else { + consistency_error!("inserting a pk that it already in the inequality index"); + }; + } else { + self.kv_heap_size.add_size(pk_size); + } + } else { + let mut pk_set = JoinRowSet::default(); + pk_set.try_insert(pk, ()).unwrap(); + self.inequality_index + .try_insert(inequality_key, pk_set) + .unwrap(); + } + } + + pub fn get( + &self, + pk: &PkType, + data_types: &[DataType], + ) -> Option>> { + self.cached + .get(pk) + .map(|encoded| encoded.decode(data_types)) + } + /// Note: the first item in the tuple is the mutable reference to the value in this entry, while /// the second item is the decoded value. To mutate the degree, one **must not** forget to apply /// the changes to the first item. @@ -782,6 +928,92 @@ impl JoinEntryState { pub fn len(&self) -> usize { self.cached.len() } + + /// Range scan the cache using the inequality index. + pub fn range_by_inequality<'a, R>( + &'a self, + range: R, + data_types: &'a [DataType], + ) -> impl Iterator>> + 'a + where + R: RangeBounds + 'a, + { + self.inequality_index.range(range).flat_map(|(_, pk_set)| { + pk_set + .keys() + .flat_map(|pk| self.get_by_indexed_pk(pk, data_types)) + }) + } + + /// Get the records whose inequality key upper bound satisfy the given bound. + pub fn upper_bound_by_inequality<'a>( + &'a self, + bound: Bound<&InequalKeyType>, + data_types: &'a [DataType], + ) -> Option>> { + if let Some((_, pk_set)) = self.inequality_index.upper_bound(bound) { + if let Some(pk) = pk_set.first_key_sorted() { + self.get_by_indexed_pk(pk, data_types) + } else { + panic!("pk set for a index record must has at least one element"); + } + } else { + None + } + } + + pub fn get_by_indexed_pk( + &self, + pk: &PkType, + data_types: &[DataType], + ) -> Option>> +where { + if let Some(value) = self.cached.get(pk) { + Some(value.decode(data_types)) + } else if enable_strict_consistency() { + Some(Err(anyhow!(JoinEntryError::InequalIndex).into())) + } else { + consistency_error!(?pk, "{}", JoinEntryError::InequalIndex.as_report()); + None + } + } + + /// Get the records whose inequality key lower bound satisfy the given bound. + pub fn lower_bound_by_inequality<'a>( + &'a self, + bound: Bound<&InequalKeyType>, + data_types: &'a [DataType], + ) -> Option>> { + if let Some((_, pk_set)) = self.inequality_index.lower_bound(bound) { + if let Some(pk) = pk_set.first_key_sorted() { + self.get_by_indexed_pk(pk, data_types) + } else { + panic!("pk set for a index record must has at least one element"); + } + } else { + None + } + } + + pub fn get_first_by_inequality<'a>( + &'a self, + inequality_key: &InequalKeyType, + data_types: &'a [DataType], + ) -> Option>> { + if let Some(pk_set) = self.inequality_index.get(inequality_key) { + if let Some(pk) = pk_set.first_key_sorted() { + self.get_by_indexed_pk(pk, data_types) + } else { + panic!("pk set for a index record must has at least one element"); + } + } else { + None + } + } + + pub fn inequality_index(&self) -> &JoinRowSet> { + &self.inequality_index + } } #[cfg(test)] @@ -795,16 +1027,36 @@ mod tests { fn insert_chunk( managed_state: &mut JoinEntryState, pk_indices: &[usize], + col_types: &[DataType], + inequality_key_idx: Option, data_chunk: &DataChunk, ) { + let pk_col_type = pk_indices + .iter() + .map(|idx| col_types[*idx].clone()) + .collect_vec(); + let pk_serializer = + OrderedRowSerde::new(pk_col_type, vec![OrderType::ascending(); pk_indices.len()]); + let inequality_key_type = inequality_key_idx.map(|idx| col_types[idx].clone()); + let inequality_key_serializer = inequality_key_type + .map(|data_type| OrderedRowSerde::new(vec![data_type], vec![OrderType::ascending()])); for row_ref in data_chunk.rows() { let row: OwnedRow = row_ref.into_owned_row(); let value_indices = (0..row.len() - 1).collect_vec(); let pk = pk_indices.iter().map(|idx| row[*idx].clone()).collect_vec(); // Pk is only a `i64` here, so encoding method does not matter. - let pk = OwnedRow::new(pk).project(&value_indices).value_serialize(); + let pk = OwnedRow::new(pk) + .project(&value_indices) + .memcmp_serialize(&pk_serializer); + let inequality_key = inequality_key_idx.map(|idx| { + (&row) + .project(&[idx]) + .memcmp_serialize(inequality_key_serializer.as_ref().unwrap()) + }); let join_row = JoinRow { row, degree: 0 }; - managed_state.insert(pk, join_row.encode()).unwrap(); + managed_state + .insert(pk, join_row.encode(), inequality_key) + .unwrap(); } } @@ -826,7 +1078,7 @@ mod tests { } #[tokio::test] - async fn test_managed_all_or_none_state() { + async fn test_managed_join_state() { let mut managed_state = JoinEntryState::default(); let col_types = vec![DataType::Int64, DataType::Int64]; let pk_indices = [0]; @@ -841,7 +1093,13 @@ mod tests { ); // `Vec` in state - insert_chunk(&mut managed_state, &pk_indices, &data_chunk1); + insert_chunk( + &mut managed_state, + &pk_indices, + &col_types, + None, + &data_chunk1, + ); check(&mut managed_state, &col_types, &col1, &col2); // `BtreeMap` in state @@ -852,7 +1110,76 @@ mod tests { 5 8 4 9", ); - insert_chunk(&mut managed_state, &pk_indices, &data_chunk2); + insert_chunk( + &mut managed_state, + &pk_indices, + &col_types, + None, + &data_chunk2, + ); check(&mut managed_state, &col_types, &col1, &col2); } + + #[tokio::test] + async fn test_managed_join_state_w_inequality_index() { + let mut managed_state = JoinEntryState::default(); + let col_types = vec![DataType::Int64, DataType::Int64]; + let pk_indices = [0]; + let inequality_key_idx = Some(1); + let inequality_key_serializer = + OrderedRowSerde::new(vec![DataType::Int64], vec![OrderType::ascending()]); + + let col1 = [3, 2, 1]; + let col2 = [4, 5, 5]; + let data_chunk1 = DataChunk::from_pretty( + "I I + 3 4 + 2 5 + 1 5", + ); + + // `Vec` in state + insert_chunk( + &mut managed_state, + &pk_indices, + &col_types, + inequality_key_idx, + &data_chunk1, + ); + check(&mut managed_state, &col_types, &col1, &col2); + let bound = OwnedRow::new(vec![Some(ScalarImpl::Int64(5))]) + .memcmp_serialize(&inequality_key_serializer); + let row = managed_state + .upper_bound_by_inequality(Bound::Included(&bound), &col_types) + .unwrap() + .unwrap(); + assert_eq!(row.row[0], Some(ScalarImpl::Int64(1))); + let row = managed_state + .upper_bound_by_inequality(Bound::Excluded(&bound), &col_types) + .unwrap() + .unwrap(); + assert_eq!(row.row[0], Some(ScalarImpl::Int64(3))); + + // `BtreeMap` in state + let col1 = [1, 2, 3, 4, 5]; + let col2 = [5, 5, 4, 4, 8]; + let data_chunk2 = DataChunk::from_pretty( + "I I + 5 8 + 4 4", + ); + insert_chunk( + &mut managed_state, + &pk_indices, + &col_types, + inequality_key_idx, + &data_chunk2, + ); + check(&mut managed_state, &col_types, &col1, &col2); + + let bound = OwnedRow::new(vec![Some(ScalarImpl::Int64(8))]) + .memcmp_serialize(&inequality_key_serializer); + let row = managed_state.lower_bound_by_inequality(Bound::Excluded(&bound), &col_types); + assert!(row.is_none()); + } } diff --git a/src/stream/src/executor/join/join_row_set.rs b/src/stream/src/executor/join/join_row_set.rs index de6f5ce2f0279..b34e163410eec 100644 --- a/src/stream/src/executor/join/join_row_set.rs +++ b/src/stream/src/executor/join/join_row_set.rs @@ -12,10 +12,12 @@ // See the License for the specific language governing permissions and // limitations under the License. +use std::borrow::Borrow; use std::collections::btree_map::OccupiedError as BTreeMapOccupiedError; use std::collections::BTreeMap; use std::fmt::Debug; use std::mem; +use std::ops::{Bound, RangeBounds}; use auto_enums::auto_enum; use enum_as_inner::EnumAsInner; @@ -110,6 +112,13 @@ impl JoinRowSet { } } + pub fn is_empty(&self) -> bool { + match self { + Self::BTree(inner) => inner.is_empty(), + Self::Vec(inner) => inner.is_empty(), + } + } + #[auto_enum(Iterator)] pub fn values_mut(&mut self) -> impl Iterator { match self { @@ -117,4 +126,161 @@ impl JoinRowSet { Self::Vec(inner) => inner.iter_mut().map(|(_, v)| v), } } + + #[auto_enum(Iterator)] + pub fn keys(&self) -> impl Iterator { + match self { + Self::BTree(inner) => inner.keys(), + Self::Vec(inner) => inner.iter().map(|(k, _v)| k), + } + } + + #[auto_enum(Iterator)] + pub fn range(&self, range: R) -> impl Iterator + where + T: Ord + ?Sized, + K: Borrow + Ord, + R: RangeBounds, + { + match self { + Self::BTree(inner) => inner.range(range), + Self::Vec(inner) => inner + .iter() + .filter(move |(k, _)| range.contains(k.borrow())) + .map(|(k, v)| (k, v)), + } + } + + pub fn lower_bound_key(&self, bound: Bound<&K>) -> Option<&K> { + self.lower_bound(bound).map(|(k, _v)| k) + } + + pub fn upper_bound_key(&self, bound: Bound<&K>) -> Option<&K> { + self.upper_bound(bound).map(|(k, _v)| k) + } + + pub fn lower_bound(&self, bound: Bound<&K>) -> Option<(&K, &V)> { + match self { + Self::BTree(inner) => inner.lower_bound(bound).next(), + Self::Vec(inner) => inner + .iter() + .filter(|(k, _)| (bound, Bound::Unbounded).contains(k)) + .min_by_key(|(k, _)| k) + .map(|(k, v)| (k, v)), + } + } + + pub fn upper_bound(&self, bound: Bound<&K>) -> Option<(&K, &V)> { + match self { + Self::BTree(inner) => inner.upper_bound(bound).prev(), + Self::Vec(inner) => inner + .iter() + .filter(|(k, _)| (Bound::Unbounded, bound).contains(k)) + .max_by_key(|(k, _)| k) + .map(|(k, v)| (k, v)), + } + } + + pub fn get_mut(&mut self, key: &K) -> Option<&mut V> { + match self { + Self::BTree(inner) => inner.get_mut(key), + Self::Vec(inner) => inner.iter_mut().find(|(k, _)| k == key).map(|(_, v)| v), + } + } + + pub fn get(&self, key: &K) -> Option<&V> { + match self { + Self::BTree(inner) => inner.get(key), + Self::Vec(inner) => inner.iter().find(|(k, _)| k == key).map(|(_, v)| v), + } + } + + /// Returns the key-value pair with smallest key in the map. + pub fn first_key_sorted(&self) -> Option<&K> { + match self { + Self::BTree(inner) => inner.first_key_value().map(|(k, _)| k), + Self::Vec(inner) => inner.iter().map(|(k, _)| k).min(), + } + } + + /// Returns the key-value pair with the second smallest key in the map. + pub fn second_key_sorted(&self) -> Option<&K> { + match self { + Self::BTree(inner) => inner.iter().nth(1).map(|(k, _)| k), + Self::Vec(inner) => { + let mut res = None; + let mut smallest = None; + for (k, _) in inner { + if let Some(smallest_k) = smallest { + if k < smallest_k { + res = Some(smallest_k); + smallest = Some(k); + } else if let Some(res_k) = res { + if k < res_k { + res = Some(k); + } + } else { + res = Some(k); + } + } else { + smallest = Some(k); + } + } + res + } + } + } +} + +#[cfg(test)] +mod tests { + use super::*; + #[test] + fn test_join_row_set_bounds() { + let mut join_row_set: JoinRowSet = JoinRowSet::default(); + + // Insert elements + assert!(join_row_set.try_insert(1, 10).is_ok()); + assert!(join_row_set.try_insert(2, 20).is_ok()); + assert!(join_row_set.try_insert(3, 30).is_ok()); + + // Check lower bound + assert_eq!(join_row_set.lower_bound_key(Bound::Included(&2)), Some(&2)); + assert_eq!(join_row_set.lower_bound_key(Bound::Excluded(&2)), Some(&3)); + + // Check upper bound + assert_eq!(join_row_set.upper_bound_key(Bound::Included(&2)), Some(&2)); + assert_eq!(join_row_set.upper_bound_key(Bound::Excluded(&2)), Some(&1)); + } + + #[test] + fn test_join_row_set_first_and_second_key_sorted() { + { + let mut join_row_set: JoinRowSet = JoinRowSet::default(); + + // Insert elements + assert!(join_row_set.try_insert(3, 30).is_ok()); + assert!(join_row_set.try_insert(1, 10).is_ok()); + assert!(join_row_set.try_insert(2, 20).is_ok()); + + // Check first key sorted + assert_eq!(join_row_set.first_key_sorted(), Some(&1)); + + // Check second key sorted + assert_eq!(join_row_set.second_key_sorted(), Some(&2)); + } + { + let mut join_row_set: JoinRowSet = JoinRowSet::default(); + + // Insert elements + assert!(join_row_set.try_insert(1, 10).is_ok()); + assert!(join_row_set.try_insert(2, 20).is_ok()); + + // Check first key sorted + assert_eq!(join_row_set.first_key_sorted(), Some(&1)); + + // Check second key sorted + assert_eq!(join_row_set.second_key_sorted(), Some(&2)); + } + } } diff --git a/src/stream/src/executor/join/mod.rs b/src/stream/src/executor/join/mod.rs index b8bd5ff84d95f..ea53a7992f265 100644 --- a/src/stream/src/executor/join/mod.rs +++ b/src/stream/src/executor/join/mod.rs @@ -12,6 +12,11 @@ // See the License for the specific language governing permissions and // limitations under the License. +use risingwave_expr::bail; +use risingwave_pb::plan_common::{AsOfJoinDesc, AsOfJoinInequalityType}; + +use crate::error::StreamResult; + pub mod builder; pub mod hash_join; pub mod join_row_set; @@ -35,6 +40,15 @@ pub mod JoinType { pub const RightAnti: JoinTypePrimitive = 7; } +pub type AsOfJoinTypePrimitive = u8; + +#[allow(non_snake_case, non_upper_case_globals)] +pub mod AsOfJoinType { + use super::AsOfJoinTypePrimitive; + pub const Inner: AsOfJoinTypePrimitive = 0; + pub const LeftOuter: AsOfJoinTypePrimitive = 1; +} + pub type SideTypePrimitive = u8; #[allow(non_snake_case, non_upper_case_globals)] pub mod SideType { @@ -43,6 +57,38 @@ pub mod SideType { pub const Right: SideTypePrimitive = 1; } +pub enum AsOfInequalityType { + Le, + Lt, + Ge, + Gt, +} + +pub struct AsOfDesc { + pub left_idx: usize, + pub right_idx: usize, + pub inequality_type: AsOfInequalityType, +} + +impl AsOfDesc { + pub fn from_protobuf(desc_proto: &AsOfJoinDesc) -> StreamResult { + let typ = match desc_proto.inequality_type() { + AsOfJoinInequalityType::AsOfInequalityTypeLt => AsOfInequalityType::Lt, + AsOfJoinInequalityType::AsOfInequalityTypeLe => AsOfInequalityType::Le, + AsOfJoinInequalityType::AsOfInequalityTypeGt => AsOfInequalityType::Gt, + AsOfJoinInequalityType::AsOfInequalityTypeGe => AsOfInequalityType::Ge, + AsOfJoinInequalityType::AsOfInequalityTypeUnspecified => { + bail!("unspecified AsOf join inequality type") + } + }; + Ok(Self { + left_idx: desc_proto.left_idx as usize, + right_idx: desc_proto.right_idx as usize, + inequality_type: typ, + }) + } +} + pub const fn is_outer_side(join_type: JoinTypePrimitive, side_type: SideTypePrimitive) -> bool { join_type == JoinType::FullOuter || (join_type == JoinType::LeftOuter && side_type == SideType::Left) @@ -106,3 +152,7 @@ pub const fn need_right_degree(join_type: JoinTypePrimitive) -> bool { || join_type == JoinType::RightAnti || join_type == JoinType::RightSemi } + +pub const fn is_as_of_left_outer(join_type: AsOfJoinTypePrimitive) -> bool { + join_type == AsOfJoinType::LeftOuter +} diff --git a/src/stream/src/executor/mod.rs b/src/stream/src/executor/mod.rs index 8b9f7b3f2242b..3d1ca35b6d610 100644 --- a/src/stream/src/executor/mod.rs +++ b/src/stream/src/executor/mod.rs @@ -57,6 +57,7 @@ pub mod monitor; pub mod agg_common; pub mod aggregation; +pub mod asof_join; mod backfill; mod barrier_recv; mod batch_query; @@ -133,7 +134,7 @@ pub use filter::FilterExecutor; pub use hash_agg::HashAggExecutor; pub use hash_join::*; pub use hop_window::HopWindowExecutor; -pub use join::JoinType; +pub use join::{AsOfDesc, AsOfJoinType, JoinType}; pub use lookup::*; pub use lookup_union::LookupUnionExecutor; pub use merge::MergeExecutor; diff --git a/src/stream/src/executor/nested_loop_temporal_join.rs b/src/stream/src/executor/nested_loop_temporal_join.rs index 0888d8981fc8c..55d21b468a777 100644 --- a/src/stream/src/executor/nested_loop_temporal_join.rs +++ b/src/stream/src/executor/nested_loop_temporal_join.rs @@ -98,8 +98,7 @@ async fn phase1_handle_chunk( } impl NestedLoopTemporalJoinExecutor { - #[allow(clippy::too_many_arguments)] - #[expect(dead_code)] + #[expect(clippy::too_many_arguments)] pub fn new( ctx: ActorContextRef, info: ExecutorInfo, diff --git a/src/stream/src/executor/stream_reader.rs b/src/stream/src/executor/stream_reader.rs index 30de0804b0ac0..bd22e47c737ad 100644 --- a/src/stream/src/executor/stream_reader.rs +++ b/src/stream/src/executor/stream_reader.rs @@ -16,7 +16,7 @@ use std::pin::Pin; use std::task::Poll; use either::Either; -use futures::stream::{select_with_strategy, BoxStream, PollNext, SelectWithStrategy}; +use futures::stream::BoxStream; use futures::{Stream, StreamExt, TryStreamExt}; use crate::executor::error::StreamExecutorResult; @@ -25,8 +25,34 @@ use crate::executor::Message; type ExecutorMessageStream = BoxStream<'static, StreamExecutorResult>; type StreamReaderData = StreamExecutorResult>; type ReaderArm = BoxStream<'static, StreamReaderData>; -type StreamReaderWithPauseInner = - SelectWithStrategy, ReaderArm, impl FnMut(&mut PollNext) -> PollNext, PollNext>; + +mod stream_reader_with_pause { + use futures::stream::{select_with_strategy, PollNext, SelectWithStrategy}; + + use crate::executor::stream_reader::ReaderArm; + + pub(super) type StreamReaderWithPauseInner = SelectWithStrategy< + ReaderArm, + ReaderArm, + impl FnMut(&mut PollNext) -> PollNext, + PollNext, + >; + + pub(super) fn new_inner( + message_stream: ReaderArm, + data_stream: ReaderArm, + ) -> StreamReaderWithPauseInner { + let strategy = if BIASED { + |_: &mut PollNext| PollNext::Left + } else { + // The poll strategy is not biased: we poll the two streams in a round robin way. + |last: &mut PollNext| last.toggle() + }; + select_with_strategy(message_stream, data_stream, strategy) + } +} + +use stream_reader_with_pause::*; /// [`StreamReaderWithPause`] merges two streams, with one receiving barriers (and maybe other types /// of messages) and the other receiving data only (no barrier). The merged stream can be paused @@ -40,7 +66,7 @@ type StreamReaderWithPauseInner = /// priority over the right-hand one. Otherwise, the two streams will be polled in a round robin /// fashion. pub(super) struct StreamReaderWithPause { - inner: StreamReaderWithPauseInner, + inner: StreamReaderWithPauseInner, /// Whether the source stream is paused. paused: bool, } @@ -54,26 +80,13 @@ impl StreamReaderWithPause { ) -> Self { let message_stream_arm = message_stream.map_ok(Either::Left).boxed(); let data_stream_arm = data_stream.map_ok(Either::Right).boxed(); - let inner = Self::new_inner(message_stream_arm, data_stream_arm); + let inner = new_inner(message_stream_arm, data_stream_arm); Self { inner, paused: false, } } - fn new_inner( - message_stream: ReaderArm, - data_stream: ReaderArm, - ) -> StreamReaderWithPauseInner { - let strategy = if BIASED { - |_: &mut PollNext| PollNext::Left - } else { - // The poll strategy is not biased: we poll the two streams in a round robin way. - |last: &mut PollNext| last.toggle() - }; - select_with_strategy(message_stream, data_stream, strategy) - } - /// Replace the data stream with a new one for given `stream`. Used for split change. pub fn replace_data_stream( &mut self, @@ -87,7 +100,7 @@ impl StreamReaderWithPause { // Note: create a new `SelectWithStrategy` instead of replacing the source stream arm here, // to ensure the internal state of the `SelectWithStrategy` is reset. (#6300) - self.inner = Self::new_inner( + self.inner = new_inner( barrier_receiver_arm, data_stream.map_ok(Either::Right).boxed(), ); diff --git a/src/stream/src/from_proto/asof_join.rs b/src/stream/src/from_proto/asof_join.rs new file mode 100644 index 0000000000000..3d74ac884b4f0 --- /dev/null +++ b/src/stream/src/from_proto/asof_join.rs @@ -0,0 +1,192 @@ +// Copyright 2024 RisingWave Labs +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use std::sync::Arc; + +use risingwave_common::hash::{HashKey, HashKeyDispatcher}; +use risingwave_common::types::DataType; +use risingwave_pb::plan_common::AsOfJoinType as JoinTypeProto; +use risingwave_pb::stream_plan::AsOfJoinNode; + +use super::*; +use crate::common::table::state_table::StateTable; +use crate::executor::asof_join::*; +use crate::executor::monitor::StreamingMetrics; +use crate::executor::{ActorContextRef, AsOfDesc, AsOfJoinType, JoinType}; +use crate::task::AtomicU64Ref; + +pub struct AsOfJoinExecutorBuilder; + +impl ExecutorBuilder for AsOfJoinExecutorBuilder { + type Node = AsOfJoinNode; + + async fn new_boxed_executor( + params: ExecutorParams, + node: &Self::Node, + store: impl StateStore, + ) -> StreamResult { + // This assert is to make sure AsOf join can use `JoinChunkBuilder` as Hash join. + assert_eq!(AsOfJoinType::Inner, JoinType::Inner); + assert_eq!(AsOfJoinType::LeftOuter, JoinType::LeftOuter); + let vnodes = Arc::new(params.vnode_bitmap.expect("vnodes not set for AsOf join")); + + let [source_l, source_r]: [_; 2] = params.input.try_into().unwrap(); + + let table_l = node.get_left_table()?; + let degree_table_l = node.get_left_degree_table()?; + + let table_r = node.get_right_table()?; + let degree_table_r = node.get_right_degree_table()?; + + let params_l = JoinParams::new( + node.get_left_key() + .iter() + .map(|key| *key as usize) + .collect_vec(), + node.get_left_deduped_input_pk_indices() + .iter() + .map(|key| *key as usize) + .collect_vec(), + ); + let params_r = JoinParams::new( + node.get_right_key() + .iter() + .map(|key| *key as usize) + .collect_vec(), + node.get_right_deduped_input_pk_indices() + .iter() + .map(|key| *key as usize) + .collect_vec(), + ); + let null_safe = node.get_null_safe().to_vec(); + let output_indices = node + .get_output_indices() + .iter() + .map(|&x| x as usize) + .collect_vec(); + + let join_key_data_types = params_l + .join_key_indices + .iter() + .map(|idx| source_l.schema().fields[*idx].data_type()) + .collect_vec(); + + let state_table_l = + StateTable::from_table_catalog(table_l, store.clone(), Some(vnodes.clone())).await; + let degree_state_table_l = + StateTable::from_table_catalog(degree_table_l, store.clone(), Some(vnodes.clone())) + .await; + + let state_table_r = + StateTable::from_table_catalog(table_r, store.clone(), Some(vnodes.clone())).await; + let degree_state_table_r = + StateTable::from_table_catalog(degree_table_r, store, Some(vnodes)).await; + + let join_type_proto = node.get_join_type()?; + let as_of_desc_proto = node.get_asof_desc()?; + let asof_desc = AsOfDesc::from_protobuf(as_of_desc_proto)?; + + let args = AsOfJoinExecutorDispatcherArgs { + ctx: params.actor_context, + info: params.info.clone(), + source_l, + source_r, + params_l, + params_r, + null_safe, + output_indices, + state_table_l, + degree_state_table_l, + state_table_r, + degree_state_table_r, + lru_manager: params.watermark_epoch, + metrics: params.executor_stats, + join_type_proto, + join_key_data_types, + chunk_size: params.env.config().developer.chunk_size, + high_join_amplification_threshold: params + .env + .config() + .developer + .high_join_amplification_threshold, + asof_desc, + }; + + let exec = args.dispatch()?; + Ok((params.info, exec).into()) + } +} + +struct AsOfJoinExecutorDispatcherArgs { + ctx: ActorContextRef, + info: ExecutorInfo, + source_l: Executor, + source_r: Executor, + params_l: JoinParams, + params_r: JoinParams, + null_safe: Vec, + output_indices: Vec, + state_table_l: StateTable, + degree_state_table_l: StateTable, + state_table_r: StateTable, + degree_state_table_r: StateTable, + lru_manager: AtomicU64Ref, + metrics: Arc, + join_type_proto: JoinTypeProto, + join_key_data_types: Vec, + chunk_size: usize, + high_join_amplification_threshold: usize, + asof_desc: AsOfDesc, +} + +impl HashKeyDispatcher for AsOfJoinExecutorDispatcherArgs { + type Output = StreamResult>; + + fn dispatch_impl(self) -> Self::Output { + /// This macro helps to fill the const generic type parameter. + macro_rules! build { + ($join_type:ident) => { + Ok(AsOfJoinExecutor::::new( + self.ctx, + self.info, + self.source_l, + self.source_r, + self.params_l, + self.params_r, + self.null_safe, + self.output_indices, + self.state_table_l, + self.degree_state_table_l, + self.state_table_r, + self.degree_state_table_r, + self.lru_manager, + self.metrics, + self.chunk_size, + self.high_join_amplification_threshold, + self.asof_desc, + ) + .boxed()) + }; + } + match self.join_type_proto { + JoinTypeProto::Unspecified => unreachable!(), + JoinTypeProto::Inner => build!(Inner), + JoinTypeProto::LeftOuter => build!(LeftOuter), + } + } + + fn data_types(&self) -> &[DataType] { + &self.join_key_data_types + } +} diff --git a/src/stream/src/from_proto/mod.rs b/src/stream/src/from_proto/mod.rs index 6f185695eadf7..1f63b6cd5db85 100644 --- a/src/stream/src/from_proto/mod.rs +++ b/src/stream/src/from_proto/mod.rs @@ -16,6 +16,7 @@ mod agg_common; mod append_only_dedup; +mod asof_join; mod barrier_recv; mod batch_query; mod cdc_filter; diff --git a/src/stream/src/lib.rs b/src/stream/src/lib.rs index 876deabc80f98..577b829945620 100644 --- a/src/stream/src/lib.rs +++ b/src/stream/src/lib.rs @@ -17,7 +17,6 @@ #![feature(trait_alias)] #![feature(type_alias_impl_trait)] #![feature(more_qualified_paths)] -#![feature(lint_reasons)] #![feature(let_chains)] #![feature(hash_extract_if)] #![feature(extract_if)] diff --git a/src/stream/src/task/barrier_manager/managed_state.rs b/src/stream/src/task/barrier_manager/managed_state.rs index 43a979af2b568..8f4ab2b49ea2e 100644 --- a/src/stream/src/task/barrier_manager/managed_state.rs +++ b/src/stream/src/task/barrier_manager/managed_state.rs @@ -42,7 +42,7 @@ use super::{BarrierCompleteResult, SubscribeMutationItem}; use crate::error::{StreamError, StreamResult}; use crate::executor::monitor::StreamingMetrics; use crate::executor::{Barrier, Mutation}; -use crate::task::{await_tree_key, ActorId, PartialGraphId, SharedContext, StreamActorManager}; +use crate::task::{ActorId, PartialGraphId, SharedContext, StreamActorManager}; struct IssuedState { pub mutation: Option>, @@ -88,8 +88,59 @@ pub(super) struct BarrierState { inner: ManagedBarrierStateInner, } -type AwaitEpochCompletedFuture = - impl Future)> + 'static; +mod await_epoch_completed_future { + use std::future::Future; + + use futures::future::BoxFuture; + use futures::FutureExt; + use risingwave_hummock_sdk::SyncResult; + use risingwave_pb::stream_service::barrier_complete_response::PbCreateMviewProgress; + + use crate::error::StreamResult; + use crate::executor::Barrier; + use crate::task::{await_tree_key, BarrierCompleteResult}; + + pub(super) type AwaitEpochCompletedFuture = + impl Future)> + 'static; + + pub(super) fn instrument_complete_barrier_future( + complete_barrier_future: Option>>, + barrier: Barrier, + barrier_await_tree_reg: Option<&await_tree::Registry>, + create_mview_progress: Vec, + ) -> AwaitEpochCompletedFuture { + let prev_epoch = barrier.epoch.prev; + let future = async move { + if let Some(future) = complete_barrier_future { + let result = future.await; + result.map(Some) + } else { + Ok(None) + } + } + .map(move |result| { + ( + barrier, + result.map(|sync_result| BarrierCompleteResult { + sync_result, + create_mview_progress, + }), + ) + }); + if let Some(reg) = barrier_await_tree_reg { + reg.register( + await_tree_key::BarrierAwait { prev_epoch }, + format!("SyncEpoch({})", prev_epoch), + ) + .instrument(future) + .left_future() + } else { + future.right_future() + } + } +} + +use await_epoch_completed_future::*; fn sync_epoch( state_store: &S, @@ -787,33 +838,12 @@ impl PartialGraphManagedBarrierState { let barrier = barrier_state.barrier.clone(); self.await_epoch_completed_futures.push_back({ - let future = async move { - if let Some(future) = complete_barrier_future { - let result = future.await; - result.map(Some) - } else { - Ok(None) - } - } - .map(move |result| { - ( - barrier, - result.map(|sync_result| BarrierCompleteResult { - sync_result, - create_mview_progress, - }), - ) - }); - if let Some(reg) = &self.barrier_await_tree_reg { - reg.register( - await_tree_key::BarrierAwait { prev_epoch }, - format!("SyncEpoch({})", prev_epoch), - ) - .instrument(future) - .left_future() - } else { - future.right_future() - } + instrument_complete_barrier_future( + complete_barrier_future, + barrier, + self.barrier_await_tree_reg.as_ref(), + create_mview_progress, + ) }); } } diff --git a/src/tests/compaction_test/src/delete_range_runner.rs b/src/tests/compaction_test/src/delete_range_runner.rs index 13df85bf25d97..4fd246208b69a 100644 --- a/src/tests/compaction_test/src/delete_range_runner.rs +++ b/src/tests/compaction_test/src/delete_range_runner.rs @@ -12,6 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. +use std::collections::HashSet; use std::future::Future; use std::ops::{Bound, RangeBounds}; use std::pin::{pin, Pin}; @@ -317,6 +318,7 @@ async fn run_compare_result( let mut normal = NormalState::new(hummock, 1, init_epoch).await; let mut delete_range = DeleteRangeState::new(hummock, 2, init_epoch).await; + let table_id_set = HashSet::from_iter([1.into(), 2.into()]); const RANGE_BASE: u64 = 4000; let range_mod = test_range / RANGE_BASE; @@ -381,9 +383,12 @@ async fn run_compare_result( normal.commit(next_epoch).await?; delete_range.commit(next_epoch).await?; // let checkpoint = epoch % 10 == 0; - let ret = hummock.seal_and_sync_epoch(epoch).await.unwrap(); + let ret = hummock + .seal_and_sync_epoch(epoch, table_id_set.clone()) + .await + .unwrap(); meta_client - .commit_epoch(epoch, ret) + .commit_epoch(epoch, ret, false) .await .map_err(|e| format!("{:?}", e))?; if (epoch / test_epoch(1)) % 200 == 0 { diff --git a/src/tests/simulation/src/lib.rs b/src/tests/simulation/src/lib.rs index aa6303b8e2f65..af9cf158a3350 100644 --- a/src/tests/simulation/src/lib.rs +++ b/src/tests/simulation/src/lib.rs @@ -13,7 +13,6 @@ // limitations under the License. #![feature(trait_alias)] -#![feature(lint_reasons)] #![feature(let_chains)] #![feature(try_blocks)] #![feature(register_tool)] diff --git a/src/tests/simulation/src/slt.rs b/src/tests/simulation/src/slt.rs index 799602a00aa3f..7ac5a7b27d70b 100644 --- a/src/tests/simulation/src/slt.rs +++ b/src/tests/simulation/src/slt.rs @@ -497,8 +497,6 @@ fn hack_kafka_test(path: &Path) -> tempfile::NamedTempFile { let complex_avsc_full_path = std::fs::canonicalize("src/connector/src/test_data/complex-schema.avsc") .expect("failed to get schema path"); - let proto_full_path = std::fs::canonicalize("src/connector/src/test_data/complex-schema") - .expect("failed to get schema path"); let json_schema_full_path = std::fs::canonicalize("src/connector/src/test_data/complex-schema.json") .expect("failed to get schema path"); @@ -513,10 +511,6 @@ fn hack_kafka_test(path: &Path) -> tempfile::NamedTempFile { "/risingwave/avro-complex-schema.avsc", complex_avsc_full_path.to_str().unwrap(), ) - .replace( - "/risingwave/proto-complex-schema", - proto_full_path.to_str().unwrap(), - ) .replace( "/risingwave/json-complex-schema", json_schema_full_path.to_str().unwrap(), diff --git a/src/utils/futures_util/src/lib.rs b/src/utils/futures_util/src/lib.rs index 4d086951dbb5f..115da2e7676f9 100644 --- a/src/utils/futures_util/src/lib.rs +++ b/src/utils/futures_util/src/lib.rs @@ -12,8 +12,6 @@ // See the License for the specific language governing permissions and // limitations under the License. -#![feature(lint_reasons)] - use std::future::Future; use futures::stream::TryStream; diff --git a/src/utils/iter_util/src/lib.rs b/src/utils/iter_util/src/lib.rs index 58758c64a1ce5..92f19a0ee46fc 100644 --- a/src/utils/iter_util/src/lib.rs +++ b/src/utils/iter_util/src/lib.rs @@ -12,8 +12,6 @@ // See the License for the specific language governing permissions and // limitations under the License. -#![feature(lint_reasons)] - pub trait ZipEqFast: ExactSizeIterator + Sized where B::IntoIter: ExactSizeIterator, diff --git a/src/utils/local_stats_alloc/src/lib.rs b/src/utils/local_stats_alloc/src/lib.rs index 3950d0cb4931e..94265768815c2 100644 --- a/src/utils/local_stats_alloc/src/lib.rs +++ b/src/utils/local_stats_alloc/src/lib.rs @@ -13,7 +13,6 @@ // limitations under the License. #![feature(allocator_api)] -#![feature(lint_reasons)] use std::alloc::Allocator; use std::ops::Deref; diff --git a/src/utils/pgwire/src/lib.rs b/src/utils/pgwire/src/lib.rs index 8d1c00541bb95..fae5489e81097 100644 --- a/src/utils/pgwire/src/lib.rs +++ b/src/utils/pgwire/src/lib.rs @@ -12,7 +12,6 @@ // See the License for the specific language governing permissions and // limitations under the License. -#![feature(lint_reasons)] #![feature(trait_alias)] #![feature(iterator_try_collect)] #![feature(trusted_len)]