From b1e189c7e44c30787e922e252d7f26bb8d437790 Mon Sep 17 00:00:00 2001 From: Noel Kwan <47273164+kwannoel@users.noreply.github.com> Date: Tue, 31 Oct 2023 11:59:08 +0800 Subject: [PATCH 1/7] chore(ci): add skip-ci for `main-cron` (#13126) --- ci/workflows/main-cron.yml | 39 +++++++++++++++++++++++++++++++++++++- 1 file changed, 38 insertions(+), 1 deletion(-) diff --git a/ci/workflows/main-cron.yml b/ci/workflows/main-cron.yml index d8e78952c141f..6f58222424e18 100644 --- a/ci/workflows/main-cron.yml +++ b/ci/workflows/main-cron.yml @@ -7,6 +7,7 @@ auto-retry: &auto-retry steps: - label: "build" command: "ci/scripts/build.sh -p ci-release" + if: (!build.pull_request.labels includes "ci/main-cron/skip-ci" || build.pull_request.labels includes "ci/run-build") key: "build" plugins: - docker-compose#v4.9.0: @@ -18,6 +19,7 @@ steps: - label: "build other components" command: "ci/scripts/build-other.sh" + if: (!build.pull_request.labels includes "ci/main-cron/skip-ci" || build.pull_request.labels includes "ci/run-build-other") key: "build-other" plugins: - seek-oss/aws-sm#v2.3.1: @@ -34,6 +36,7 @@ steps: - label: "build (deterministic simulation)" command: "ci/scripts/build-simulation.sh" + if: (!build.pull_request.labels includes "ci/main-cron/skip-ci" || build.pull_request.labels includes "ci/run-build-simulation") key: "build-simulation" plugins: - docker-compose#v4.9.0: @@ -45,6 +48,7 @@ steps: - label: "docslt" command: "ci/scripts/docslt.sh" + if: (!build.pull_request.labels includes "ci/main-cron/skip-ci" || build.pull_request.labels includes "ci/run-docslt") key: "docslt" plugins: - docker-compose#v4.9.0: @@ -56,6 +60,7 @@ steps: - label: "end-to-end test (release)" command: "ci/scripts/cron-e2e-test.sh -p ci-release -m ci-3streaming-2serving-3fe" + if: (!build.pull_request.labels includes "ci/main-cron/skip-ci" || build.pull_request.labels includes "ci/run-e2e-test") depends_on: - "build" - "docslt" @@ -70,6 +75,7 @@ steps: - label: "end-to-end test (parallel) (release)" command: "ci/scripts/e2e-test-parallel.sh -p ci-release" + if: (!build.pull_request.labels includes "ci/main-cron/skip-ci" || build.pull_request.labels includes "ci/run-e2e-parallel-tests") depends_on: - "build" - "docslt" @@ -90,6 +96,7 @@ steps: - label: "end-to-end test (parallel, in-memory) (release)" command: "ci/scripts/e2e-test-parallel-in-memory.sh -p ci-release" + if: (!build.pull_request.labels includes "ci/main-cron/skip-ci" || build.pull_request.labels includes "ci/run-e2e-parallel-in-memory-tests") depends_on: - "build" - "docslt" @@ -104,6 +111,7 @@ steps: - label: "end-to-end source test (release)" command: "ci/scripts/e2e-source-test.sh -p ci-release" + if: (!build.pull_request.labels includes "ci/main-cron/skip-ci" || build.pull_request.labels includes "ci/run-e2e-source-tests") depends_on: - "build" - "build-other" @@ -118,6 +126,7 @@ steps: - label: "end-to-end sink test (release)" command: "ci/scripts/e2e-sink-test.sh -p ci-release" + if: (!build.pull_request.labels includes "ci/main-cron/skip-ci" || build.pull_request.labels includes "ci/run-e2e-sink-tests") depends_on: - "build" - "build-other" @@ -132,6 +141,7 @@ steps: - label: "fuzz test" command: "ci/scripts/cron-fuzz-test.sh -p ci-release" + if: (!build.pull_request.labels includes "ci/main-cron/skip-ci" || build.pull_request.labels includes "ci/run-sqlsmith-fuzzing-tests") depends_on: - "build" - "build-simulation" @@ -149,6 +159,7 @@ steps: # This ensures our `main-cron` workflow will be stable. - label: "unit test" command: "ci/scripts/unit-test.sh" + if: (!build.pull_request.labels includes "ci/main-cron/skip-ci" || build.pull_request.labels includes "ci/run-unit-test") plugins: - ./ci/plugins/swapfile - seek-oss/aws-sm#v2.3.1: @@ -164,6 +175,7 @@ steps: - label: "unit test (deterministic simulation)" command: "MADSIM_TEST_NUM=100 timeout 15m ci/scripts/deterministic-unit-test.sh" + if: (!build.pull_request.labels includes "ci/main-cron/skip-ci" || build.pull_request.labels includes "ci/run-unit-test-deterministic-simulation") plugins: - docker-compose#v4.9.0: run: rw-build-env @@ -174,6 +186,7 @@ steps: - label: "integration test (deterministic simulation) - scale" command: "TEST_NUM=60 ci/scripts/deterministic-it-test.sh scale::" + if: (!build.pull_request.labels includes "ci/main-cron/skip-ci" || build.pull_request.labels includes "ci/run-integration-test-deterministic-simulation") depends_on: "build-simulation" plugins: - docker-compose#v4.9.0: @@ -185,6 +198,7 @@ steps: - label: "integration test (deterministic simulation) - recovery" command: "TEST_NUM=60 ci/scripts/deterministic-it-test.sh recovery::" + if: (!build.pull_request.labels includes "ci/main-cron/skip-ci" || build.pull_request.labels includes "ci/run-integration-test-deterministic-simulation") depends_on: "build-simulation" plugins: - docker-compose#v4.9.0: @@ -196,6 +210,7 @@ steps: - label: "integration test (deterministic simulation) - others" command: "TEST_NUM=30 ci/scripts/deterministic-it-test.sh backfill_tests:: storage:: sink::" + if: (!build.pull_request.labels includes "ci/main-cron/skip-ci" || build.pull_request.labels includes "ci/run-integration-test-deterministic-simulation") depends_on: "build-simulation" plugins: - docker-compose#v4.9.0: @@ -207,6 +222,7 @@ steps: - label: "end-to-end test (deterministic simulation)" command: "TEST_NUM=64 timeout 55m ci/scripts/deterministic-e2e-test.sh" + if: (!build.pull_request.labels includes "ci/main-cron/skip-ci" || build.pull_request.labels includes "ci/run-e2e-test-deterministic-simulation") depends_on: "build-simulation" plugins: - seek-oss/aws-sm#v2.3.1: @@ -224,6 +240,7 @@ steps: - label: "recovery test (deterministic simulation)" command: "TEST_NUM=12 KILL_RATE=1.0 timeout 55m ci/scripts/deterministic-recovery-test.sh" + if: (!build.pull_request.labels includes "ci/main-cron/skip-ci" || build.pull_request.labels includes "ci/run-recovery-test-deterministic-simulation") depends_on: "build-simulation" plugins: - docker-compose#v4.9.0: @@ -236,6 +253,7 @@ steps: - label: "misc check" command: "ci/scripts/misc-check.sh" + if: (!build.pull_request.labels includes "ci/main-cron/skip-ci" || build.pull_request.labels includes "ci/run-misc-check") plugins: - docker-compose#v4.9.0: run: rw-build-env @@ -247,6 +265,7 @@ steps: - label: "end-to-end iceberg sink test (release)" command: "ci/scripts/e2e-iceberg-sink-test.sh -p ci-release" + if: (!build.pull_request.labels includes "ci/main-cron/skip-ci" || build.pull_request.labels includes "ci/run-e2e-iceberg-sink-tests") depends_on: - "build" - "build-other" @@ -261,6 +280,7 @@ steps: - label: "end-to-end iceberg sink v2 test (release)" command: "ci/scripts/e2e-iceberg-sink-v2-test.sh -p ci-release" + if: (!build.pull_request.labels includes "ci/main-cron/skip-ci" || build.pull_request.labels includes "ci/run-e2e-iceberg-sink-v2-tests") depends_on: - "build" - "build-other" @@ -275,6 +295,7 @@ steps: - label: "e2e java-binding test (release)" command: "ci/scripts/java-binding-test.sh -p ci-release" + if: (!build.pull_request.labels includes "ci/main-cron/skip-ci" || build.pull_request.labels includes "ci/run-e2e-java-binding-tests") depends_on: - "build" - "build-other" @@ -291,6 +312,7 @@ steps: - label: "S3 source check on AWS (json parser)" command: "ci/scripts/s3-source-test.sh -p ci-release -s run.py" + if: (!build.pull_request.labels includes "ci/main-cron/skip-ci" || build.pull_request.labels includes "ci/run-s3-source-tests") depends_on: build plugins: - seek-oss/aws-sm#v2.3.1: @@ -308,6 +330,7 @@ steps: - label: "S3 source check on AWS (json parser)" command: "ci/scripts/s3-source-test.sh -p ci-release -s json_file.py" + if: (!build.pull_request.labels includes "ci/main-cron/skip-ci" || build.pull_request.labels includes "ci/run-s3-source-tests") depends_on: build plugins: - seek-oss/aws-sm#v2.3.1: @@ -325,6 +348,7 @@ steps: - label: "S3 source check on AWS (csv parser)" command: "ci/scripts/s3-source-test.sh -p ci-release -s run_csv.py" + if: (!build.pull_request.labels includes "ci/main-cron/skip-ci" || build.pull_request.labels includes "ci/run-s3-source-tests") depends_on: build plugins: - seek-oss/aws-sm#v2.3.1: @@ -342,6 +366,7 @@ steps: - label: "S3_v2 source check on AWS (json parser)" command: "ci/scripts/s3-source-test.sh -p ci-release -s 'fs_source_v2.py json'" + if: (!build.pull_request.labels includes "ci/main-cron/skip-ci" || build.pull_request.labels includes "ci/run-s3-source-tests") depends_on: build plugins: - seek-oss/aws-sm#v2.3.1: @@ -359,6 +384,7 @@ steps: - label: "S3_v2 source check on AWS (csv parser)" command: "ci/scripts/s3-source-test.sh -p ci-release -s 'fs_source_v2.py csv_without_header'" + if: (!build.pull_request.labels includes "ci/main-cron/skip-ci" || build.pull_request.labels includes "ci/run-s3-source-tests") depends_on: build plugins: - seek-oss/aws-sm#v2.3.1: @@ -376,6 +402,7 @@ steps: - label: "S3 source on OpenDAL fs engine" command: "ci/scripts/s3-source-test-for-opendal-fs-engine.sh -p ci-release -s run" + if: (!build.pull_request.labels includes "ci/main-cron/skip-ci" || build.pull_request.labels includes "ci/run-s3-source-tests") depends_on: build plugins: - seek-oss/aws-sm#v2.3.1: @@ -393,6 +420,7 @@ steps: - label: "pulsar source check" command: "ci/scripts/pulsar-source-test.sh -p ci-release" + if: (!build.pull_request.labels includes "ci/main-cron/skip-ci" || build.pull_request.labels includes "ci/run-pulsar-source-tests") depends_on: - build - build-other @@ -413,6 +441,7 @@ steps: - label: "micro benchmark" command: "ci/scripts/run-micro-benchmarks.sh" + if: (!build.pull_request.labels includes "ci/main-cron/skip-ci" || build.pull_request.labels includes "ci/run-micro-benchmarks") key: "run-micro-benchmarks" plugins: - docker-compose#v4.9.0: @@ -423,7 +452,7 @@ steps: retry: *auto-retry - label: "upload micro-benchmark" - if: build.branch == "main" || build.pull_request.labels includes "ci/upload-micro-benchmark" + if: build.branch == "main" || (!build.pull_request.labels includes "ci/main-cron/skip-ci" || build.pull_request.labels includes "ci/run-micro-benchmarks") command: - "BUILDKITE_BUILD_NUMBER=$BUILDKITE_BUILD_NUMBER ci/scripts/upload-micro-bench-results.sh" depends_on: "run-micro-benchmarks" @@ -444,6 +473,7 @@ steps: # Backwards compatibility tests - label: "Backwards compatibility tests" command: "RW_COMMIT=$BUILDKITE_COMMIT ci/scripts/backwards-compat-test.sh -p ci-release" + if: (!build.pull_request.labels includes "ci/main-cron/skip-ci" || build.pull_request.labels includes "ci/run-backwards-compat-tests") depends_on: - "build" plugins: @@ -458,6 +488,7 @@ steps: # Sqlsmith differential testing - label: "Sqlsmith Differential Testing" command: "ci/scripts/sqlsmith-differential-test.sh -p ci-release" + if: (!build.pull_request.labels includes "ci/main-cron/skip-ci" || build.pull_request.labels includes "ci/run-sqlsmith-differential-tests") depends_on: - "build" plugins: @@ -470,6 +501,7 @@ steps: - label: "Backfill tests" command: "ci/scripts/backfill-test.sh -p ci-release" + if: (!build.pull_request.labels includes "ci/main-cron/skip-ci" || build.pull_request.labels includes "ci/run-backfill-tests") depends_on: - "build" plugins: @@ -483,6 +515,7 @@ steps: - label: "e2e standalone binary test" command: "ci/scripts/e2e-test.sh -p ci-release -m standalone" + if: (!build.pull_request.labels includes "ci/main-cron/skip-ci" || build.pull_request.labels includes "ci/run-e2e-standalone-tests") depends_on: - "build" - "build-other" @@ -498,6 +531,7 @@ steps: - label: "end-to-end test for opendal (parallel)" command: "ci/scripts/e2e-test-parallel-for-opendal.sh -p ci-release" + if: (!build.pull_request.labels includes "ci/main-cron/skip-ci" || build.pull_request.labels includes "ci/run-e2e-parallel-tests-for-opendal") depends_on: - "build" - "docslt" @@ -512,6 +546,7 @@ steps: - label: "end-to-end clickhouse sink test" command: "ci/scripts/e2e-clickhouse-sink-test.sh -p ci-release" + if: (!build.pull_request.labels includes "ci/main-cron/skip-ci" || build.pull_request.labels includes "ci/run-e2e-clickhouse-sink-tests") depends_on: - "build" - "build-other" @@ -526,6 +561,7 @@ steps: - label: "end-to-end pulsar sink test" command: "ci/scripts/e2e-pulsar-sink-test.sh -p ci-release" + if: (!build.pull_request.labels includes "ci/main-cron/skip-ci" || build.pull_request.labels includes "ci/run-e2e-pulsar-sink-tests") depends_on: - "build" - "build-other" @@ -540,6 +576,7 @@ steps: - label: "connector node integration test Java {{matrix.java_version}}" command: "ci/scripts/connector-node-integration-test.sh -p ci-release -v {{matrix.java_version}}" + if: (!build.pull_request.labels includes "ci/main-cron/skip-ci" || build.pull_request.labels includes "ci/run-connector-node-integration-tests") depends_on: - "build" - "build-other" From c583e2c6c054764249acf484438c7bf7197765f4 Mon Sep 17 00:00:00 2001 From: xxchan Date: Tue, 31 Oct 2023 12:16:40 +0800 Subject: [PATCH 2/7] chore: cargo +nightly fmt (#13162) --- src/batch/src/executor/project_set.rs | 18 +- src/batch/src/executor/top_n.rs | 4 +- src/common/src/cache.rs | 8 +- src/common/src/types/interval.rs | 8 +- src/common/src/util/column_index_mapping.rs | 6 +- src/compute/tests/cdc_tests.rs | 6 +- src/connector/src/parser/mod.rs | 19 +- src/connector/src/parser/mysql.rs | 4 +- src/connector/src/parser/unified/debezium.rs | 62 +++--- src/connector/src/parser/unified/upsert.rs | 4 +- src/connector/src/parser/upsert_parser.rs | 4 +- src/connector/src/sink/encoder/template.rs | 11 +- src/connector/src/sink/log_store.rs | 4 +- src/connector/src/source/external.rs | 4 +- .../src/source/google_pubsub/source/reader.rs | 4 +- .../src/source/kafka/private_link.rs | 8 +- src/ctl/src/cmd_impl/meta/reschedule.rs | 4 +- src/ctl/src/cmd_impl/meta/serving.rs | 4 +- src/ctl/src/cmd_impl/scale/resize.rs | 4 +- src/expr/core/src/expr/build.rs | 4 +- src/expr/core/src/expr/expr_coalesce.rs | 4 +- .../impl/src/aggregate/percentile_cont.rs | 29 +-- .../impl/src/aggregate/percentile_disc.rs | 22 ++- src/expr/impl/src/scalar/to_timestamp.rs | 4 +- src/expr/macro/src/gen.rs | 28 ++- src/expr/macro/src/parse.rs | 4 +- src/frontend/src/binder/expr/column.rs | 6 +- src/frontend/src/binder/expr/function.rs | 19 +- src/frontend/src/binder/relation/join.rs | 4 +- src/frontend/src/binder/relation/mod.rs | 21 +- .../src/binder/relation/table_or_source.rs | 8 +- src/frontend/src/binder/select.rs | 90 ++++++--- src/frontend/src/expr/function_call.rs | 8 +- src/frontend/src/expr/mod.rs | 16 +- src/frontend/src/handler/alter_user.rs | 4 +- src/frontend/src/handler/create_user.rs | 4 +- src/frontend/src/handler/extended_handle.rs | 8 +- .../plan_expr_rewriter/cse_rewriter.rs | 4 +- .../optimizer/plan_node/generic/hop_window.rs | 4 +- .../src/optimizer/plan_node/logical_join.rs | 5 +- .../plan_node/logical_over_window.rs | 185 ++++++++---------- .../src/optimizer/plan_node/logical_source.rs | 4 +- .../optimizer/plan_node/stream_group_topn.rs | 4 +- .../optimizer/plan_node/stream_hash_join.rs | 4 +- .../src/optimizer/rule/except_merge_rule.rs | 4 +- .../optimizer/rule/index_delta_join_rule.rs | 7 +- .../optimizer/rule/intersect_merge_rule.rs | 4 +- .../rule/over_window_to_topn_rule.rs | 4 +- .../src/optimizer/rule/union_merge_rule.rs | 4 +- src/frontend/src/planner/query.rs | 4 +- src/frontend/src/planner/select.rs | 8 +- .../src/scheduler/distributed/stage.rs | 5 +- src/frontend/src/scheduler/local.rs | 13 +- src/frontend/src/utils/condition.rs | 4 +- .../src/utils/stream_graph_formatter.rs | 71 +++---- .../picker/space_reclaim_compaction_picker.rs | 8 +- src/meta/src/hummock/manager/mod.rs | 2 +- src/meta/src/manager/catalog/fragment.rs | 13 +- src/meta/src/manager/catalog/mod.rs | 3 +- src/meta/src/rpc/ddl_controller.rs | 8 +- src/meta/src/rpc/election/etcd.rs | 4 +- src/meta/src/stream/stream_manager.rs | 6 +- src/object_store/src/object/mem.rs | 4 +- .../opendal_engine/opendal_object_store.rs | 4 +- src/object_store/src/object/s3.rs | 4 +- .../src/config/provide_expander.rs | 4 +- src/risedevtool/src/preflight_check.rs | 5 +- src/risedevtool/src/task/compactor_service.rs | 4 +- .../src/task/compute_node_service.rs | 4 +- src/risedevtool/src/task/frontend_service.rs | 4 +- src/risedevtool/src/task/meta_node_service.rs | 4 +- src/rpc_client/src/meta_client.rs | 3 +- src/sqlparser/src/tokenizer.rs | 8 +- .../src/hummock/compactor/compactor_runner.rs | 16 +- .../src/hummock/event_handler/uploader.rs | 4 +- .../hummock/iterator/delete_range_iterator.rs | 5 +- src/storage/src/hummock/mod.rs | 10 +- .../src/hummock/sstable/multi_builder.rs | 17 +- src/storage/src/hummock/sstable_store.rs | 4 +- src/storage/src/memory.rs | 4 +- .../src/common/log_store_impl/in_mem.rs | 4 +- .../log_store_impl/kv_log_store/buffer.rs | 8 +- .../log_store_impl/kv_log_store/serde.rs | 4 +- src/stream/src/common/table/state_table.rs | 30 ++- .../executor/backfill/arrangement_backfill.rs | 18 +- .../src/executor/backfill/cdc_backfill.rs | 52 +++-- src/stream/src/executor/hash_agg.rs | 11 +- src/stream/src/executor/hash_join.rs | 3 +- .../src/executor/over_window/general.rs | 4 +- src/stream/src/executor/project_set.rs | 18 +- .../executor/source/state_table_handler.rs | 7 +- src/stream/src/executor/temporal_join.rs | 6 +- src/stream/src/executor/top_n/top_n_cache.rs | 42 ++-- src/stream/src/executor/top_n/top_n_state.rs | 8 +- src/stream/src/executor/watermark_filter.rs | 9 +- .../src/executor/wrapper/epoch_check.rs | 4 +- .../src/from_proto/source/trad_source.rs | 17 +- src/stream/src/task/stream_manager.rs | 4 +- src/tests/regress/src/schedule.rs | 12 +- src/tests/sqlsmith/src/runner.rs | 14 +- src/tests/sqlsmith/tests/frontend/mod.rs | 16 +- src/utils/runtime/src/logger.rs | 4 +- src/utils/runtime/src/panic_hook.rs | 4 +- 103 files changed, 793 insertions(+), 465 deletions(-) diff --git a/src/batch/src/executor/project_set.rs b/src/batch/src/executor/project_set.rs index fa3dfac917e8a..1df7f9e246d74 100644 --- a/src/batch/src/executor/project_set.rs +++ b/src/batch/src/executor/project_set.rs @@ -92,11 +92,15 @@ impl ProjectSetExecutor { // for each column for (item, value) in results.iter_mut().zip_eq_fast(&mut row[1..]) { *value = match item { - Either::Left(state) => if let Some((i, value)) = state.peek() && i == row_idx { - valid = true; - value - } else { - None + Either::Left(state) => { + if let Some((i, value)) = state.peek() + && i == row_idx + { + valid = true; + value + } else { + None + } } Either::Right(array) => array.value_at(row_idx), }; @@ -110,7 +114,9 @@ impl ProjectSetExecutor { } // move to the next row for item in &mut results { - if let Either::Left(state) = item && matches!(state.peek(), Some((i, _)) if i == row_idx) { + if let Either::Left(state) = item + && matches!(state.peek(), Some((i, _)) if i == row_idx) + { state.next().await?; } } diff --git a/src/batch/src/executor/top_n.rs b/src/batch/src/executor/top_n.rs index cffbae855de61..b43f1bc07c24d 100644 --- a/src/batch/src/executor/top_n.rs +++ b/src/batch/src/executor/top_n.rs @@ -180,7 +180,9 @@ impl TopNHeap { let mut ties_with_peek = vec![]; // pop all the ties with peek ties_with_peek.push(self.heap.pop().unwrap()); - while let Some(e) = self.heap.peek() && e.encoded_row == peek.encoded_row { + while let Some(e) = self.heap.peek() + && e.encoded_row == peek.encoded_row + { ties_with_peek.push(self.heap.pop().unwrap()); } self.heap.push(elem); diff --git a/src/common/src/cache.rs b/src/common/src/cache.rs index 5f80592fed27e..f6af1ec60c0da 100644 --- a/src/common/src/cache.rs +++ b/src/common/src/cache.rs @@ -757,7 +757,9 @@ impl LruCache { shard.release(handle) }; // do not deallocate data with holding mutex. - if let Some((key, value)) = data && let Some(listener) = &self.listener { + if let Some((key, value)) = data + && let Some(listener) = &self.listener + { listener.on_release(key, value); } } @@ -819,7 +821,9 @@ impl LruCache { shard.erase(hash, key) }; // do not deallocate data with holding mutex. - if let Some((key, value)) = data && let Some(listener) = &self.listener { + if let Some((key, value)) = data + && let Some(listener) = &self.listener + { listener.on_release(key, value); } } diff --git a/src/common/src/types/interval.rs b/src/common/src/types/interval.rs index aca4d090bcac2..c921905d8d9f0 100644 --- a/src/common/src/types/interval.rs +++ b/src/common/src/types/interval.rs @@ -1386,7 +1386,9 @@ impl Interval { fn parse_postgres(s: &str) -> Result { use DateTimeField::*; let mut tokens = parse_interval(s)?; - if tokens.len() % 2 != 0 && let Some(TimeStrToken::Num(_)) = tokens.last() { + if tokens.len() % 2 != 0 + && let Some(TimeStrToken::Num(_)) = tokens.last() + { tokens.push(TimeStrToken::TimeUnit(DateTimeField::Second)); } if tokens.len() % 2 != 0 { @@ -1394,7 +1396,9 @@ impl Interval { } let mut token_iter = tokens.into_iter(); let mut result = Interval::from_month_day_usec(0, 0, 0); - while let Some(num) = token_iter.next() && let Some(interval_unit) = token_iter.next() { + while let Some(num) = token_iter.next() + && let Some(interval_unit) = token_iter.next() + { match (num, interval_unit) { (TimeStrToken::Num(num), TimeStrToken::TimeUnit(interval_unit)) => { result = (|| match interval_unit { diff --git a/src/common/src/util/column_index_mapping.rs b/src/common/src/util/column_index_mapping.rs index 2c12dc47efb11..212c07df1e285 100644 --- a/src/common/src/util/column_index_mapping.rs +++ b/src/common/src/util/column_index_mapping.rs @@ -67,8 +67,10 @@ impl ColIndexMapping { return false; } for (src, tar) in self.map.iter().enumerate() { - if let Some(tar_value) = tar && src == *tar_value { - continue + if let Some(tar_value) = tar + && src == *tar_value + { + continue; } else { return false; } diff --git a/src/compute/tests/cdc_tests.rs b/src/compute/tests/cdc_tests.rs index 6a50b8410bbd4..fff56a17d9117 100644 --- a/src/compute/tests/cdc_tests.rs +++ b/src/compute/tests/cdc_tests.rs @@ -323,9 +323,11 @@ async fn consume_message_stream(mut stream: BoxedMessageStream) -> StreamResult< println!("[mv] chunk: {:#?}", c); } Message::Barrier(b) => { - if let Some(m) = b.mutation && matches!(*m, Mutation::Stop(_)) { + if let Some(m) = b.mutation + && matches!(*m, Mutation::Stop(_)) + { println!("encounter stop barrier"); - break + break; } } } diff --git a/src/connector/src/parser/mod.rs b/src/connector/src/parser/mod.rs index c7b8bf702e1cc..bdbb110daf7fc 100644 --- a/src/connector/src/parser/mod.rs +++ b/src/connector/src/parser/mod.rs @@ -169,12 +169,19 @@ impl MessageMeta<'_> { SourceColumnType::Offset => Datum::Some(self.offset.into()).into(), // Extract custom meta data per connector. SourceColumnType::Meta if let SourceMeta::Kafka(kafka_meta) = self.meta => { - assert_eq!(desc.name.as_str(), KAFKA_TIMESTAMP_COLUMN_NAME, "unexpected meta column name"); - kafka_meta.timestamp.map(|ts| { - risingwave_common::cast::i64_to_timestamptz(ts) - .unwrap() - .to_scalar_value() - }).into() + assert_eq!( + desc.name.as_str(), + KAFKA_TIMESTAMP_COLUMN_NAME, + "unexpected meta column name" + ); + kafka_meta + .timestamp + .map(|ts| { + risingwave_common::cast::i64_to_timestamptz(ts) + .unwrap() + .to_scalar_value() + }) + .into() } // For other cases, return `None`. diff --git a/src/connector/src/parser/mysql.rs b/src/connector/src/parser/mysql.rs index 58be305a69118..0a0f8f52e90b2 100644 --- a/src/connector/src/parser/mysql.rs +++ b/src/connector/src/parser/mysql.rs @@ -143,7 +143,9 @@ mod tests { }); pin_mut!(row_stream); while let Some(row) = row_stream.next().await { - if let Ok(ro) = row && ro.is_some() { + if let Ok(ro) = row + && ro.is_some() + { let owned_row = ro.unwrap(); let d = owned_row.datum_at(2); if let Some(scalar) = d { diff --git a/src/connector/src/parser/unified/debezium.rs b/src/connector/src/parser/unified/debezium.rs index e16df28aebdf2..e392e31e3644d 100644 --- a/src/connector/src/parser/unified/debezium.rs +++ b/src/connector/src/parser/unified/debezium.rs @@ -145,42 +145,36 @@ pub fn extract_bson_id(id_type: &DataType, bson_doc: &serde_json::Value) -> anyh .get("_id") .ok_or_else(|| anyhow::format_err!("Debezuim Mongo requires document has a `_id` field"))?; let id: Datum = match id_type { - DataType::Jsonb => ScalarImpl::Jsonb(id_field.clone().into()).into(), - DataType::Varchar => match id_field { - serde_json::Value::String(s) => Some(ScalarImpl::Utf8(s.clone().into())), - serde_json::Value::Object(obj) if obj.contains_key("$oid") => Some(ScalarImpl::Utf8( - obj["$oid"].as_str().to_owned().unwrap_or_default().into(), - )), - _ => anyhow::bail!( - "Can not convert bson {:?} to {:?}", - id_field, id_type - ), - }, - DataType::Int32 => { - if let serde_json::Value::Object(ref obj) = id_field && obj.contains_key("$numberInt") { - let int_str = obj["$numberInt"].as_str().unwrap_or_default(); - Some(ScalarImpl::Int32(int_str.parse().unwrap_or_default())) - } else { - anyhow::bail!( - "Can not convert bson {:?} to {:?}", - id_field, id_type - ) + DataType::Jsonb => ScalarImpl::Jsonb(id_field.clone().into()).into(), + DataType::Varchar => match id_field { + serde_json::Value::String(s) => Some(ScalarImpl::Utf8(s.clone().into())), + serde_json::Value::Object(obj) if obj.contains_key("$oid") => Some(ScalarImpl::Utf8( + obj["$oid"].as_str().to_owned().unwrap_or_default().into(), + )), + _ => anyhow::bail!("Can not convert bson {:?} to {:?}", id_field, id_type), + }, + DataType::Int32 => { + if let serde_json::Value::Object(ref obj) = id_field + && obj.contains_key("$numberInt") + { + let int_str = obj["$numberInt"].as_str().unwrap_or_default(); + Some(ScalarImpl::Int32(int_str.parse().unwrap_or_default())) + } else { + anyhow::bail!("Can not convert bson {:?} to {:?}", id_field, id_type) + } } - } - DataType::Int64 => { - if let serde_json::Value::Object(ref obj) = id_field && obj.contains_key("$numberLong") - { - let int_str = obj["$numberLong"].as_str().unwrap_or_default(); - Some(ScalarImpl::Int64(int_str.parse().unwrap_or_default())) - } else { - anyhow::bail!( - "Can not convert bson {:?} to {:?}", - id_field, id_type - ) + DataType::Int64 => { + if let serde_json::Value::Object(ref obj) = id_field + && obj.contains_key("$numberLong") + { + let int_str = obj["$numberLong"].as_str().unwrap_or_default(); + Some(ScalarImpl::Int64(int_str.parse().unwrap_or_default())) + } else { + anyhow::bail!("Can not convert bson {:?} to {:?}", id_field, id_type) + } } - } - _ => unreachable!("DebeziumMongoJsonParser::new must ensure _id column datatypes."), -}; + _ => unreachable!("DebeziumMongoJsonParser::new must ensure _id column datatypes."), + }; Ok(id) } impl MongoProjection { diff --git a/src/connector/src/parser/unified/upsert.rs b/src/connector/src/parser/unified/upsert.rs index a0be4f050b9c9..2697d4bdf8151 100644 --- a/src/connector/src/parser/unified/upsert.rs +++ b/src/connector/src/parser/unified/upsert.rs @@ -114,7 +114,9 @@ where other => return other, }; - if let Some(key_as_column_name) = &self.key_as_column_name && name == key_as_column_name { + if let Some(key_as_column_name) = &self.key_as_column_name + && name == key_as_column_name + { return self.access(&["key"], Some(type_expected)); } diff --git a/src/connector/src/parser/upsert_parser.rs b/src/connector/src/parser/upsert_parser.rs index f9ce0caa7e254..71210b9e4b8f8 100644 --- a/src/connector/src/parser/upsert_parser.rs +++ b/src/connector/src/parser/upsert_parser.rs @@ -102,7 +102,9 @@ impl UpsertParser { row_op = row_op.with_key(self.key_builder.generate_accessor(data).await?); } // Empty payload of kafka is Some(vec![]) - if let Some(data) = payload && !data.is_empty() { + if let Some(data) = payload + && !data.is_empty() + { row_op = row_op.with_value(self.payload_builder.generate_accessor(data).await?); change_event_op = ChangeEventOperation::Upsert; } diff --git a/src/connector/src/sink/encoder/template.rs b/src/connector/src/sink/encoder/template.rs index 97d8271f9e83a..1f70836ab453d 100644 --- a/src/connector/src/sink/encoder/template.rs +++ b/src/connector/src/sink/encoder/template.rs @@ -50,9 +50,14 @@ impl TemplateEncoder { )); } for capture in re.captures_iter(format) { - if let Some(inner_content) = capture.get(1) && !set.contains(inner_content.as_str()){ - return Err(SinkError::Redis(format!("Can't find field({:?}) in key_format or value_format",inner_content.as_str()))) - } + if let Some(inner_content) = capture.get(1) + && !set.contains(inner_content.as_str()) + { + return Err(SinkError::Redis(format!( + "Can't find field({:?}) in key_format or value_format", + inner_content.as_str() + ))); + } } Ok(()) } diff --git a/src/connector/src/sink/log_store.rs b/src/connector/src/sink/log_store.rs index f7d99141139f5..49e28bce2f795 100644 --- a/src/connector/src/sink/log_store.rs +++ b/src/connector/src/sink/log_store.rs @@ -400,7 +400,9 @@ impl<'a, F: TryFuture + Unpin + 'static> DeliveryFutureManagerAddFuture pub async fn await_one_delivery(&mut self) -> Result<(), F::Error> { for (_, item) in &mut self.0.items { - if let DeliveryFutureManagerItem::Chunk {futures, ..} = item && let Some(mut delivery_future) = futures.pop_front() { + if let DeliveryFutureManagerItem::Chunk { futures, .. } = item + && let Some(mut delivery_future) = futures.pop_front() + { self.0.future_count -= 1; return poll_fn(|cx| delivery_future.try_poll_unpin(cx)).await; } else { diff --git a/src/connector/src/source/external.rs b/src/connector/src/source/external.rs index 9eff3991a4d4a..953277ba36106 100644 --- a/src/connector/src/source/external.rs +++ b/src/connector/src/source/external.rs @@ -286,7 +286,9 @@ impl ExternalTableReader for MySqlExternalTableReader { impl MySqlExternalTableReader { pub fn new(properties: HashMap, rw_schema: Schema) -> ConnectorResult { - if let Some(field) = rw_schema.fields.last() && field.name.as_str() != OFFSET_COLUMN_NAME { + if let Some(field) = rw_schema.fields.last() + && field.name.as_str() != OFFSET_COLUMN_NAME + { return Err(ConnectorError::Config(anyhow!( "last column of schema must be `_rw_offset`" ))); diff --git a/src/connector/src/source/google_pubsub/source/reader.rs b/src/connector/src/source/google_pubsub/source/reader.rs index dfe95eeb1b808..8241e1657c496 100644 --- a/src/connector/src/source/google_pubsub/source/reader.rs +++ b/src/connector/src/source/google_pubsub/source/reader.rs @@ -96,7 +96,9 @@ impl CommonSplitReader for PubsubSplitReader { yield chunk; // Stop if we've approached the stop_offset - if let Some(stop_offset) = self.stop_offset && latest_offset >= stop_offset { + if let Some(stop_offset) = self.stop_offset + && latest_offset >= stop_offset + { return Ok(()); } } diff --git a/src/connector/src/source/kafka/private_link.rs b/src/connector/src/source/kafka/private_link.rs index 573e14c3e073f..5a6688a4cf8e9 100644 --- a/src/connector/src/source/kafka/private_link.rs +++ b/src/connector/src/source/kafka/private_link.rs @@ -120,7 +120,9 @@ impl PrivateLinkConsumerContext { impl ClientContext for PrivateLinkConsumerContext { /// this func serves as a callback when `poll` is completed. fn stats(&self, statistics: Statistics) { - if let Some(metrics) = &self.metrics && let Some(id) = &self.identifier { + if let Some(metrics) = &self.metrics + && let Some(id) = &self.identifier + { metrics.report(id.as_str(), &statistics); } } @@ -160,7 +162,9 @@ impl PrivateLinkProducerContext { impl ClientContext for PrivateLinkProducerContext { fn stats(&self, statistics: Statistics) { - if let Some(metrics) = &self.metrics && let Some(id) = &self.identifier { + if let Some(metrics) = &self.metrics + && let Some(id) = &self.identifier + { metrics.report(id.as_str(), &statistics); } } diff --git a/src/ctl/src/cmd_impl/meta/reschedule.rs b/src/ctl/src/cmd_impl/meta/reschedule.rs index 6d7765e7b1a22..737d5c2e88b07 100644 --- a/src/ctl/src/cmd_impl/meta/reschedule.rs +++ b/src/ctl/src/cmd_impl/meta/reschedule.rs @@ -273,7 +273,9 @@ pub async fn unregister_workers( .ok() .or_else(|| worker_index_by_host.get(&worker).cloned()); - if let Some(worker_id) = worker_id && worker_ids.contains(&worker_id) { + if let Some(worker_id) = worker_id + && worker_ids.contains(&worker_id) + { if !target_worker_ids.insert(worker_id) { println!("Warn: {} and {} are the same worker", worker, worker_id); } diff --git a/src/ctl/src/cmd_impl/meta/serving.rs b/src/ctl/src/cmd_impl/meta/serving.rs index 867317c0915b4..cff3c6f911282 100644 --- a/src/ctl/src/cmd_impl/meta/serving.rs +++ b/src/ctl/src/cmd_impl/meta/serving.rs @@ -82,7 +82,9 @@ pub async fn list_serving_fragment_mappings(context: &CtlContext) -> anyhow::Res ) .into(), ); - if let Some(w) = worker && let Some(addr) = w.host.as_ref() { + if let Some(w) = worker + && let Some(addr) = w.host.as_ref() + { row.add_cell(format!("id: {}; {}:{}", w.id, addr.host, addr.port).into()); } else { row.add_cell("".into()); diff --git a/src/ctl/src/cmd_impl/scale/resize.rs b/src/ctl/src/cmd_impl/scale/resize.rs index 786d0fa4c83b7..59c2280d17873 100644 --- a/src/ctl/src/cmd_impl/scale/resize.rs +++ b/src/ctl/src/cmd_impl/scale/resize.rs @@ -393,7 +393,9 @@ pub async fn update_schedulability( .ok() .or_else(|| worker_index_by_host.get(&worker).cloned()); - if let Some(worker_id) = worker_id && worker_ids.contains(&worker_id){ + if let Some(worker_id) = worker_id + && worker_ids.contains(&worker_id) + { if !target_worker_ids.insert(worker_id) { println!("Warn: {} and {} are the same worker", worker, worker_id); } diff --git a/src/expr/core/src/expr/build.rs b/src/expr/core/src/expr/build.rs index 7dffbcd42d66b..f0fd3397c4fa8 100644 --- a/src/expr/core/src/expr/build.rs +++ b/src/expr/core/src/expr/build.rs @@ -349,7 +349,9 @@ pub(crate) fn lexer(input: &str) -> Vec { ':' => Token::Colon, '$' => { let mut number = String::new(); - while let Some(c) = chars.peek() && c.is_ascii_digit() { + while let Some(c) = chars.peek() + && c.is_ascii_digit() + { number.push(chars.next().unwrap()); } let index = number.parse::().expect("Invalid number"); diff --git a/src/expr/core/src/expr/expr_coalesce.rs b/src/expr/core/src/expr/expr_coalesce.rs index 71c7392c7ec37..b7916f414136c 100644 --- a/src/expr/core/src/expr/expr_coalesce.rs +++ b/src/expr/core/src/expr/expr_coalesce.rs @@ -56,7 +56,9 @@ impl Expression for CoalesceExpression { } let mut builder = self.return_type.create_array_builder(len); for (i, sel) in selection.iter().enumerate() { - if init_vis.is_set(i) && let Some(child_idx) = sel { + if init_vis.is_set(i) + && let Some(child_idx) = sel + { builder.append(children_array[*child_idx].value_at(i)); } else { builder.append_null() diff --git a/src/expr/impl/src/aggregate/percentile_cont.rs b/src/expr/impl/src/aggregate/percentile_cont.rs index 46002d1f596f7..eeaa257627cd7 100644 --- a/src/expr/impl/src/aggregate/percentile_cont.rs +++ b/src/expr/impl/src/aggregate/percentile_cont.rs @@ -118,19 +118,22 @@ impl AggregateFunction for PercentileCont { async fn get_result(&self, state: &AggregateState) -> Result { let state = &state.downcast_ref::().0; - Ok(if let Some(fraction) = self.fraction && !state.is_empty() { - let rn = fraction * (state.len() - 1) as f64; - let crn = f64::ceil(rn); - let frn = f64::floor(rn); - let result = if crn == frn { - state[crn as usize] + Ok( + if let Some(fraction) = self.fraction + && !state.is_empty() + { + let rn = fraction * (state.len() - 1) as f64; + let crn = f64::ceil(rn); + let frn = f64::floor(rn); + let result = if crn == frn { + state[crn as usize] + } else { + (crn - rn) * state[frn as usize] + (rn - frn) * state[crn as usize] + }; + Some(result.into()) } else { - (crn - rn) * state[frn as usize] - + (rn - frn) * state[crn as usize] - }; - Some(result.into()) - } else { - None - }) + None + }, + ) } } diff --git a/src/expr/impl/src/aggregate/percentile_disc.rs b/src/expr/impl/src/aggregate/percentile_disc.rs index c9143dcf8e640..80ebbfd24e544 100644 --- a/src/expr/impl/src/aggregate/percentile_disc.rs +++ b/src/expr/impl/src/aggregate/percentile_disc.rs @@ -143,15 +143,19 @@ impl AggregateFunction for PercentileDisc { async fn get_result(&self, state: &AggregateState) -> Result { let state = &state.downcast_ref::().0; - Ok(if let Some(fractions) = self.fractions && !state.is_empty() { - let idx = if fractions == 0.0 { - 0 + Ok( + if let Some(fractions) = self.fractions + && !state.is_empty() + { + let idx = if fractions == 0.0 { + 0 + } else { + f64::ceil(fractions * state.len() as f64) as usize - 1 + }; + Some(state[idx].clone()) } else { - f64::ceil(fractions * state.len() as f64) as usize - 1 - }; - Some(state[idx].clone()) - } else { - None - }) + None + }, + ) } } diff --git a/src/expr/impl/src/scalar/to_timestamp.rs b/src/expr/impl/src/scalar/to_timestamp.rs index bc93720373c74..e4ef9edc235eb 100644 --- a/src/expr/impl/src/scalar/to_timestamp.rs +++ b/src/expr/impl/src/scalar/to_timestamp.rs @@ -108,7 +108,9 @@ fn build_dummy(_return_type: DataType, _children: Vec) -> Resul )] pub fn to_date(s: &str, tmpl: &ChronoPattern) -> Result { let mut parsed = parse(s, tmpl)?; - if let Some(year) = &mut parsed.year && *year < 0 { + if let Some(year) = &mut parsed.year + && *year < 0 + { *year += 1; } Ok(parsed.to_naive_date()?.into()) diff --git a/src/expr/macro/src/gen.rs b/src/expr/macro/src/gen.rs index 454d2a3169137..5056d5dc24d11 100644 --- a/src/expr/macro/src/gen.rs +++ b/src/expr/macro/src/gen.rs @@ -609,14 +609,18 @@ impl FunctionAttr { .collect_vec(); let downcast_state = if custom_state.is_some() { quote! { let mut state: &mut #state_type = state0.downcast_mut(); } - } else if let Some(s) = &self.state && s == "ref" { + } else if let Some(s) = &self.state + && s == "ref" + { quote! { let mut state: Option<#state_type> = state0.as_datum_mut().as_ref().map(|x| x.as_scalar_ref_impl().try_into().unwrap()); } } else { quote! { let mut state: Option<#state_type> = state0.as_datum_mut().take().map(|s| s.try_into().unwrap()); } }; let restore_state = if custom_state.is_some() { quote! {} - } else if let Some(s) = &self.state && s == "ref" { + } else if let Some(s) = &self.state + && s == "ref" + { quote! { *state0.as_datum_mut() = state.map(|x| x.to_owned_scalar().into()); } } else { quote! { *state0.as_datum_mut() = state.map(|s| s.into()); } @@ -694,10 +698,14 @@ impl FunctionAttr { let first_state = if self.init_state.is_some() { // for count, the state will never be None quote! { unreachable!() } - } else if let Some(s) = &self.state && s == "ref" { + } else if let Some(s) = &self.state + && s == "ref" + { // for min/max/first/last, the state is the first value quote! { Some(v0) } - } else if let AggregateFnOrImpl::Impl(impl_) = user_fn && impl_.create_state.is_some() { + } else if let AggregateFnOrImpl::Impl(impl_) = user_fn + && impl_.create_state.is_some() + { // use user-defined create_state function quote! {{ let state = self.function.create_state(); @@ -727,7 +735,9 @@ impl FunctionAttr { }; let get_result = if custom_state.is_some() { quote! { Ok(Some(state.downcast_ref::<#state_type>().into())) } - } else if let AggregateFnOrImpl::Impl(impl_) = user_fn && impl_.finalize.is_some() { + } else if let AggregateFnOrImpl::Impl(impl_) = user_fn + && impl_.finalize.is_some() + { quote! { let state = match state.as_datum() { Some(s) => s.as_scalar_ref_impl().try_into().unwrap(), @@ -1109,8 +1119,12 @@ fn data_type(ty: &str) -> TokenStream2 { /// output_types("struct") -> ["varchar", "jsonb"] /// ``` fn output_types(ty: &str) -> Vec<&str> { - if let Some(s) = ty.strip_prefix("struct<") && let Some(args) = s.strip_suffix('>') { - args.split(',').map(|s| s.split_whitespace().nth(1).unwrap()).collect() + if let Some(s) = ty.strip_prefix("struct<") + && let Some(args) = s.strip_suffix('>') + { + args.split(',') + .map(|s| s.split_whitespace().nth(1).unwrap()) + .collect() } else { vec![ty] } diff --git a/src/expr/macro/src/parse.rs b/src/expr/macro/src/parse.rs index 8e2e8c6d0b2f1..fc9e4d45437e2 100644 --- a/src/expr/macro/src/parse.rs +++ b/src/expr/macro/src/parse.rs @@ -314,7 +314,9 @@ fn strip_iterator(ty: &syn::Type) -> Option<&syn::Type> { return None; }; for arg in &angle_bracketed.args { - if let syn::GenericArgument::AssocType(b) = arg && b.ident == "Item" { + if let syn::GenericArgument::AssocType(b) = arg + && b.ident == "Item" + { return Some(&b.ty); } } diff --git a/src/frontend/src/binder/expr/column.rs b/src/frontend/src/binder/expr/column.rs index 41ca86919defe..4c1a426950b63 100644 --- a/src/frontend/src/binder/expr/column.rs +++ b/src/frontend/src/binder/expr/column.rs @@ -148,8 +148,10 @@ impl Binder { // FIXME: The type of `CTID` should be `tid`. // FIXME: The `CTID` column should be unique, so literal may break something. // FIXME: At least we should add a notice here. - if let ErrorCode::ItemNotFound(_) = err && column_name == "ctid" { - return Ok(Literal::new(Some("".into()), DataType::Varchar).into()) + if let ErrorCode::ItemNotFound(_) = err + && column_name == "ctid" + { + return Ok(Literal::new(Some("".into()), DataType::Varchar).into()); } Err(err.into()) } diff --git a/src/frontend/src/binder/expr/function.rs b/src/frontend/src/binder/expr/function.rs index a32e07f72ce51..145b11581be90 100644 --- a/src/frontend/src/binder/expr/function.rs +++ b/src/frontend/src/binder/expr/function.rs @@ -93,7 +93,9 @@ impl Binder { }; // agg calls - if f.over.is_none() && let Ok(kind) = function_name.parse() { + if f.over.is_none() + && let Ok(kind) = function_name.parse() + { return self.bind_agg(f, kind); } @@ -154,11 +156,12 @@ impl Binder { // user defined function // TODO: resolve schema name https://github.com/risingwavelabs/risingwave/issues/12422 - if let Ok(schema) = self.first_valid_schema() && - let Some(func) = schema.get_function_by_name_args( + if let Ok(schema) = self.first_valid_schema() + && let Some(func) = schema.get_function_by_name_args( &function_name, &inputs.iter().map(|arg| arg.return_type()).collect_vec(), - ) { + ) + { use crate::catalog::function_catalog::FunctionKind::*; match &func.kind { Scalar { .. } => return Ok(UserDefinedFunction::new(func.clone(), inputs).into()), @@ -360,8 +363,12 @@ impl Binder { // check signature and do implicit cast match (kind, direct_args.as_mut_slice(), args.as_mut_slice()) { (AggKind::PercentileCont | AggKind::PercentileDisc, [fraction], [arg]) => { - if fraction.cast_implicit_mut(DataType::Float64).is_ok() && let Ok(casted) = fraction.fold_const() { - if let Some(ref casted) = casted && !(0.0..=1.0).contains(&casted.as_float64().0) { + if fraction.cast_implicit_mut(DataType::Float64).is_ok() + && let Ok(casted) = fraction.fold_const() + { + if let Some(ref casted) = casted + && !(0.0..=1.0).contains(&casted.as_float64().0) + { return Err(ErrorCode::InvalidInputSyntax(format!( "direct arg in `{}` must between 0.0 and 1.0", kind diff --git a/src/frontend/src/binder/relation/join.rs b/src/frontend/src/binder/relation/join.rs index eb4ce96f9ab3f..10c7cd2b646e2 100644 --- a/src/frontend/src/binder/relation/join.rs +++ b/src/frontend/src/binder/relation/join.rs @@ -193,7 +193,9 @@ impl Binder { // TODO: is it ok to ignore quote style? // If we have a `USING` constraint, we only bind the columns appearing in the // constraint. - if let Some(cols) = &using_columns && !cols.contains(&column) { + if let Some(cols) = &using_columns + && !cols.contains(&column) + { continue; } let indices_l = match old_context.get_unqualified_indices(&column.real_value()) diff --git a/src/frontend/src/binder/relation/mod.rs b/src/frontend/src/binder/relation/mod.rs index a6a1a8d2b02f2..856f221ec9855 100644 --- a/src/frontend/src/binder/relation/mod.rs +++ b/src/frontend/src/binder/relation/mod.rs @@ -193,11 +193,13 @@ impl Binder { let schema_name = identifiers.pop().map(|ident| ident.real_value()); let database_name = identifiers.pop().map(|ident| ident.real_value()); - if let Some(database_name) = database_name && database_name != db_name { + if let Some(database_name) = database_name + && database_name != db_name + { return Err(ResolveQualifiedNameError::new( formatted_name, - ResolveQualifiedNameErrorKind::NotCurrentDatabase) - ); + ResolveQualifiedNameErrorKind::NotCurrentDatabase, + )); } Ok((schema_name, name)) @@ -330,7 +332,9 @@ impl Binder { for_system_time_as_of_proctime: bool, ) -> Result { let (schema_name, table_name) = Self::resolve_schema_qualified_name(&self.db_name, name)?; - if schema_name.is_none() && let Some(item) = self.context.cte_to_relation.get(&table_name) { + if schema_name.is_none() + && let Some(item) = self.context.cte_to_relation.get(&table_name) + { // Handles CTE let (share_id, query, mut original_alias) = item.deref().clone(); @@ -341,9 +345,7 @@ impl Binder { original_alias.columns = original_alias .columns .into_iter() - .zip_longest( - from_alias.columns - ) + .zip_longest(from_alias.columns) .map(EitherOrBoth::into_right) .collect(); } @@ -360,7 +362,10 @@ impl Binder { )?; // Share the CTE. - let input_relation = Relation::Subquery(Box::new(BoundSubquery { query, lateral: false })); + let input_relation = Relation::Subquery(Box::new(BoundSubquery { + query, + lateral: false, + })); let share_relation = Relation::Share(Box::new(BoundShare { share_id, input: input_relation, diff --git a/src/frontend/src/binder/relation/table_or_source.rs b/src/frontend/src/binder/relation/table_or_source.rs index b05b5db42b300..cd2d2ef45efab 100644 --- a/src/frontend/src/binder/relation/table_or_source.rs +++ b/src/frontend/src/binder/relation/table_or_source.rs @@ -145,9 +145,11 @@ impl Binder { let user_name = &self.auth_context.user_name; for path in self.search_path.path() { - if is_system_schema(path) && - let Ok(sys_table_catalog) = - self.catalog.get_sys_table_by_name(&self.db_name, path, table_name) { + if is_system_schema(path) + && let Ok(sys_table_catalog) = + self.catalog + .get_sys_table_by_name(&self.db_name, path, table_name) + { return Ok(resolve_sys_table_relation(sys_table_catalog)); } else { let schema_name = if path == USER_NAME_WILD_CARD { diff --git a/src/frontend/src/binder/select.rs b/src/frontend/src/binder/select.rs index ac1a53e75f63f..ceb7d55312f46 100644 --- a/src/frontend/src/binder/select.rs +++ b/src/frontend/src/binder/select.rs @@ -227,21 +227,50 @@ impl Binder { self.context.clause = Some(Clause::GroupBy); // Only support one grouping item in group by clause - let group_by = if select.group_by.len() == 1 && let Expr::GroupingSets(grouping_sets) = &select.group_by[0] { - GroupBy::GroupingSets(self.bind_grouping_items_expr_in_select(grouping_sets.clone(), &out_name_to_index, &select_items)?) - } else if select.group_by.len() == 1 && let Expr::Rollup(rollup) = &select.group_by[0] { - GroupBy::Rollup(self.bind_grouping_items_expr_in_select(rollup.clone(), &out_name_to_index, &select_items)?) - } else if select.group_by.len() == 1 && let Expr::Cube(cube) = &select.group_by[0] { - GroupBy::Cube(self.bind_grouping_items_expr_in_select(cube.clone(), &out_name_to_index, &select_items)?) + let group_by = if select.group_by.len() == 1 + && let Expr::GroupingSets(grouping_sets) = &select.group_by[0] + { + GroupBy::GroupingSets(self.bind_grouping_items_expr_in_select( + grouping_sets.clone(), + &out_name_to_index, + &select_items, + )?) + } else if select.group_by.len() == 1 + && let Expr::Rollup(rollup) = &select.group_by[0] + { + GroupBy::Rollup(self.bind_grouping_items_expr_in_select( + rollup.clone(), + &out_name_to_index, + &select_items, + )?) + } else if select.group_by.len() == 1 + && let Expr::Cube(cube) = &select.group_by[0] + { + GroupBy::Cube(self.bind_grouping_items_expr_in_select( + cube.clone(), + &out_name_to_index, + &select_items, + )?) } else { - if select.group_by.iter().any(|expr| matches!(expr, Expr::GroupingSets(_)) || matches!(expr, Expr::Rollup(_)) || matches!(expr, Expr::Cube(_))) { - return Err(ErrorCode::BindError("Only support one grouping item in group by clause".to_string()).into()); + if select.group_by.iter().any(|expr| { + matches!(expr, Expr::GroupingSets(_)) + || matches!(expr, Expr::Rollup(_)) + || matches!(expr, Expr::Cube(_)) + }) { + return Err(ErrorCode::BindError( + "Only support one grouping item in group by clause".to_string(), + ) + .into()); } - GroupBy::GroupKey(select - .group_by - .into_iter() - .map(|expr| self.bind_group_by_expr_in_select(expr, &out_name_to_index, &select_items)) - .try_collect()?) + GroupBy::GroupKey( + select + .group_by + .into_iter() + .map(|expr| { + self.bind_group_by_expr_in_select(expr, &out_name_to_index, &select_items) + }) + .try_collect()?, + ) }; self.context.clause = None; @@ -795,7 +824,9 @@ impl Binder { let mut bound_exprs = vec![]; for expr in exprs { let expr_impl = match expr { - Expr::Identifier(name) if let Some(index) = name_to_index.get(&name.real_value()) => { + Expr::Identifier(name) + if let Some(index) = name_to_index.get(&name.real_value()) => + { match *index { usize::MAX => { return Err(ErrorCode::BindError(format!( @@ -809,24 +840,21 @@ impl Binder { } } } - Expr::Value(Value::Number(number)) => { - match number.parse::() { - Ok(index) if 1 <= index && index <= select_items.len() => { - let idx_from_0 = index - 1; - InputRef::new(idx_from_0, select_items[idx_from_0].return_type()).into() - } - _ => { - return Err(ErrorCode::InvalidInputSyntax(format!( - "Invalid ordinal number in DISTINCT ON: {}", - number - )) - .into()) - } + Expr::Value(Value::Number(number)) => match number.parse::() { + Ok(index) if 1 <= index && index <= select_items.len() => { + let idx_from_0 = index - 1; + InputRef::new(idx_from_0, select_items[idx_from_0].return_type()) + .into() } - } - expr => { - self.bind_expr(expr)? - } + _ => { + return Err(ErrorCode::InvalidInputSyntax(format!( + "Invalid ordinal number in DISTINCT ON: {}", + number + )) + .into()) + } + }, + expr => self.bind_expr(expr)?, }; bound_exprs.push(expr_impl); } diff --git a/src/frontend/src/expr/function_call.rs b/src/frontend/src/expr/function_call.rs index f5e618892fc5e..7f2c84c6cce78 100644 --- a/src/frontend/src/expr/function_call.rs +++ b/src/frontend/src/expr/function_call.rs @@ -111,12 +111,16 @@ impl FunctionCall { target: DataType, allows: CastContext, ) -> Result<(), CastError> { - if let ExprImpl::Parameter(expr) = child && !expr.has_infer() { + if let ExprImpl::Parameter(expr) = child + && !expr.has_infer() + { // Always Ok below. Safe to mutate `expr` (from `child`). expr.cast_infer_type(target); return Ok(()); } - if let ExprImpl::FunctionCall(func) = child && func.func_type == ExprType::Row { + if let ExprImpl::FunctionCall(func) = child + && func.func_type == ExprType::Row + { // Row function will have empty fields in Datatype::Struct at this point. Therefore, // we will need to take some special care to generate the cast types. For normal struct // types, they will be handled in `cast_ok`. diff --git a/src/frontend/src/expr/mod.rs b/src/frontend/src/expr/mod.rs index 6eec2983f5c91..27e781a88690b 100644 --- a/src/frontend/src/expr/mod.rs +++ b/src/frontend/src/expr/mod.rs @@ -822,13 +822,19 @@ impl ExprImpl { match expr_type { ExprType::Add | ExprType::Subtract => { let (_, lhs, rhs) = function_call.clone().decompose_as_binary(); - if let ExprImpl::InputRef(input_ref) = &lhs && rhs.is_const() { + if let ExprImpl::InputRef(input_ref) = &lhs + && rhs.is_const() + { // Currently we will return `None` for non-literal because the result of the expression might be '1 day'. However, there will definitely exist false positives such as '1 second + 1 second'. // We will treat the expression as an input offset when rhs is `null`. - if rhs.return_type() == DataType::Interval && rhs.as_literal().map_or(true, |literal| literal.get_data().as_ref().map_or(false, |scalar| { - let interval = scalar.as_interval(); - interval.months() != 0 || interval.days() != 0 - })) { + if rhs.return_type() == DataType::Interval + && rhs.as_literal().map_or(true, |literal| { + literal.get_data().as_ref().map_or(false, |scalar| { + let interval = scalar.as_interval(); + interval.months() != 0 || interval.days() != 0 + }) + }) + { None } else { Some((input_ref.index(), Some((expr_type, rhs)))) diff --git a/src/frontend/src/handler/alter_user.rs b/src/frontend/src/handler/alter_user.rs index 0d83c3ae867d5..b72d8d32ae8be 100644 --- a/src/frontend/src/handler/alter_user.rs +++ b/src/frontend/src/handler/alter_user.rs @@ -102,7 +102,9 @@ fn alter_prost_user_info( } UserOption::Password(opt) => { // TODO: Behaviour of PostgreSQL: Notice when password is empty string. - if let Some(password) = opt && !password.0.is_empty() { + if let Some(password) = opt + && !password.0.is_empty() + { user_info.auth_info = encrypted_password(&user_info.name, &password.0); } else { user_info.auth_info = None; diff --git a/src/frontend/src/handler/create_user.rs b/src/frontend/src/handler/create_user.rs index 8659e1b647c33..0bac084db2c80 100644 --- a/src/frontend/src/handler/create_user.rs +++ b/src/frontend/src/handler/create_user.rs @@ -85,7 +85,9 @@ fn make_prost_user_info( } UserOption::Password(opt) => { // TODO: Behaviour of PostgreSQL: Notice when password is empty string. - if let Some(password) = opt && !password.0.is_empty() { + if let Some(password) = opt + && !password.0.is_empty() + { user_info.auth_info = encrypted_password(&user_info.name, &password.0); } } diff --git a/src/frontend/src/handler/extended_handle.rs b/src/frontend/src/handler/extended_handle.rs index bcd65782d59a2..1c0f0d36f0cbf 100644 --- a/src/frontend/src/handler/extended_handle.rs +++ b/src/frontend/src/handler/extended_handle.rs @@ -100,7 +100,9 @@ pub fn handle_parse( Ok(PrepareStatement::PureStatement(statement)) } Statement::CreateTable { query, .. } => { - if let Some(query) = query && have_parameter_in_query(query) { + if let Some(query) = query + && have_parameter_in_query(query) + { Err(ErrorCode::NotImplemented( "CREATE TABLE AS SELECT with parameters".to_string(), None.into(), @@ -111,7 +113,9 @@ pub fn handle_parse( } } Statement::CreateSink { stmt } => { - if let CreateSink::AsQuery(query) = &stmt.sink_from && have_parameter_in_query(query) { + if let CreateSink::AsQuery(query) = &stmt.sink_from + && have_parameter_in_query(query) + { Err(ErrorCode::NotImplemented( "CREATE SINK AS SELECT with parameters".to_string(), None.into(), diff --git a/src/frontend/src/optimizer/plan_expr_rewriter/cse_rewriter.rs b/src/frontend/src/optimizer/plan_expr_rewriter/cse_rewriter.rs index d027bf9273696..bc59b0c89a8fa 100644 --- a/src/frontend/src/optimizer/plan_expr_rewriter/cse_rewriter.rs +++ b/src/frontend/src/optimizer/plan_expr_rewriter/cse_rewriter.rs @@ -36,7 +36,9 @@ impl CseRewriter { impl ExprRewriter for CseRewriter { fn rewrite_function_call(&mut self, func_call: FunctionCall) -> ExprImpl { - if let Some(count) = self.expr_counter.counter.get(&func_call) && *count > 1 { + if let Some(count) = self.expr_counter.counter.get(&func_call) + && *count > 1 + { if let Some(expr) = self.cse_mapping.get(&func_call) { let expr: ExprImpl = ExprImpl::InputRef(expr.clone().into()); return expr; diff --git a/src/frontend/src/optimizer/plan_node/generic/hop_window.rs b/src/frontend/src/optimizer/plan_node/generic/hop_window.rs index 9bd0dec4b70cc..36095041b6380 100644 --- a/src/frontend/src/optimizer/plan_node/generic/hop_window.rs +++ b/src/frontend/src/optimizer/plan_node/generic/hop_window.rs @@ -106,7 +106,9 @@ impl GenericPlanNode for HopWindow { internal2output.try_map(self.internal_window_end_col_idx()), ) }; - if let Some(start_idx) = start_idx_in_output && let Some(end_idx) = end_idx_in_output { + if let Some(start_idx) = start_idx_in_output + && let Some(end_idx) = end_idx_in_output + { fd_set.add_functional_dependency_by_column_indices(&[start_idx], &[end_idx]); fd_set.add_functional_dependency_by_column_indices(&[end_idx], &[start_idx]); } diff --git a/src/frontend/src/optimizer/plan_node/logical_join.rs b/src/frontend/src/optimizer/plan_node/logical_join.rs index c594ededa40cf..35a15a5d64792 100644 --- a/src/frontend/src/optimizer/plan_node/logical_join.rs +++ b/src/frontend/src/optimizer/plan_node/logical_join.rs @@ -977,7 +977,10 @@ impl LogicalJoin { // Use primary table. let mut result_plan = self.to_stream_temporal_join(predicate.clone(), ctx); // Return directly if this temporal join can match the pk of its right table. - if let Ok(temporal_join) = &result_plan && temporal_join.eq_join_predicate().eq_indexes().len() == logical_scan.primary_key().len() { + if let Ok(temporal_join) = &result_plan + && temporal_join.eq_join_predicate().eq_indexes().len() + == logical_scan.primary_key().len() + { return result_plan; } let indexes = logical_scan.indexes(); diff --git a/src/frontend/src/optimizer/plan_node/logical_over_window.rs b/src/frontend/src/optimizer/plan_node/logical_over_window.rs index a78a145ab1997..6193c072563c6 100644 --- a/src/frontend/src/optimizer/plan_node/logical_over_window.rs +++ b/src/frontend/src/optimizer/plan_node/logical_over_window.rs @@ -115,45 +115,44 @@ impl<'a> LogicalOverWindowBuilder<'a> { match agg_kind { AggKind::Avg => { assert_eq!(args.len(), 1); - let left_ref = ExprImpl::from(self.push_window_func(WindowFunction::new( + let left_ref = ExprImpl::from(self.push_window_func(WindowFunction::new( WindowFuncKind::Aggregate(AggKind::Sum), partition_by.clone(), order_by.clone(), args.clone(), frame.clone(), - )?)).cast_explicit(return_type)?; + )?)) + .cast_explicit(return_type)?; let right_ref = ExprImpl::from(self.push_window_func(WindowFunction::new( - WindowFuncKind::Aggregate(AggKind::Count), - partition_by, - order_by, - args, - frame, - )?)); - - let new_expr = ExprImpl::from( - FunctionCall::new(ExprType::Divide, vec![left_ref, right_ref])?, - ); + WindowFuncKind::Aggregate(AggKind::Count), + partition_by, + order_by, + args, + frame, + )?)); + + let new_expr = ExprImpl::from(FunctionCall::new( + ExprType::Divide, + vec![left_ref, right_ref], + )?); Ok(new_expr) } - AggKind::StddevPop - | AggKind::StddevSamp - | AggKind::VarPop - | AggKind::VarSamp => { + AggKind::StddevPop | AggKind::StddevSamp | AggKind::VarPop | AggKind::VarSamp => { let input = args.first().unwrap(); - let squared_input_expr = ExprImpl::from( - FunctionCall::new( - ExprType::Multiply, - vec![input.clone(), input.clone()], - )?, - ); - - let sum_of_squares_expr = ExprImpl::from(self.push_window_func(WindowFunction::new( - WindowFuncKind::Aggregate(AggKind::Sum), - partition_by.clone(), - order_by.clone(), - vec![squared_input_expr], - frame.clone(), - )?)).cast_explicit(return_type.clone())?; + let squared_input_expr = ExprImpl::from(FunctionCall::new( + ExprType::Multiply, + vec![input.clone(), input.clone()], + )?); + + let sum_of_squares_expr = + ExprImpl::from(self.push_window_func(WindowFunction::new( + WindowFuncKind::Aggregate(AggKind::Sum), + partition_by.clone(), + order_by.clone(), + vec![squared_input_expr], + frame.clone(), + )?)) + .cast_explicit(return_type.clone())?; let sum_expr = ExprImpl::from(self.push_window_func(WindowFunction::new( WindowFuncKind::Aggregate(AggKind::Sum), @@ -161,7 +160,8 @@ impl<'a> LogicalOverWindowBuilder<'a> { order_by.clone(), args.clone(), frame.clone(), - )?)).cast_explicit(return_type.clone())?; + )?)) + .cast_explicit(return_type.clone())?; let count_expr = ExprImpl::from(self.push_window_func(WindowFunction::new( WindowFuncKind::Aggregate(AggKind::Count), @@ -171,32 +171,26 @@ impl<'a> LogicalOverWindowBuilder<'a> { frame, )?)); - let square_of_sum_expr = ExprImpl::from( - FunctionCall::new( - ExprType::Multiply, - vec![sum_expr.clone(), sum_expr], - )?, - ); - - let numerator_expr = ExprImpl::from( - FunctionCall::new( - ExprType::Subtract, - vec![ - sum_of_squares_expr, - ExprImpl::from( - FunctionCall::new( - ExprType::Divide, - vec![square_of_sum_expr, count_expr.clone()], - )?, - ), - ], - )?, - ); + let square_of_sum_expr = ExprImpl::from(FunctionCall::new( + ExprType::Multiply, + vec![sum_expr.clone(), sum_expr], + )?); + + let numerator_expr = ExprImpl::from(FunctionCall::new( + ExprType::Subtract, + vec![ + sum_of_squares_expr, + ExprImpl::from(FunctionCall::new( + ExprType::Divide, + vec![square_of_sum_expr, count_expr.clone()], + )?), + ], + )?); let denominator_expr = match agg_kind { AggKind::StddevPop | AggKind::VarPop => count_expr.clone(), - AggKind::StddevSamp | AggKind::VarSamp => ExprImpl::from( - FunctionCall::new( + AggKind::StddevSamp | AggKind::VarSamp => { + ExprImpl::from(FunctionCall::new( ExprType::Subtract, vec![ count_expr.clone(), @@ -205,17 +199,15 @@ impl<'a> LogicalOverWindowBuilder<'a> { DataType::Int64, )), ], - )?, - ), + )?) + } _ => unreachable!(), }; - let mut target_expr = ExprImpl::from( - FunctionCall::new( - ExprType::Divide, - vec![numerator_expr, denominator_expr], - )?, - ); + let mut target_expr = ExprImpl::from(FunctionCall::new( + ExprType::Divide, + vec![numerator_expr, denominator_expr], + )?); if matches!(agg_kind, AggKind::StddevPop | AggKind::StddevSamp) { target_expr = ExprImpl::from( @@ -224,31 +216,24 @@ impl<'a> LogicalOverWindowBuilder<'a> { } match agg_kind { - AggKind::VarPop | AggKind::StddevPop => { - Ok(target_expr) - } + AggKind::VarPop | AggKind::StddevPop => Ok(target_expr), AggKind::StddevSamp | AggKind::VarSamp => { - let less_than_expr = ExprImpl::from( - FunctionCall::new( - ExprType::LessThanOrEqual, - vec![ - count_expr, - ExprImpl::from(Literal::new( - Datum::from(ScalarImpl::Int64(1)), - DataType::Int64, - )), - ], - )?, - ); - let null_expr = - ExprImpl::from(Literal::new(None, return_type)); - - let case_expr = ExprImpl::from( - FunctionCall::new( - ExprType::Case, - vec![less_than_expr, null_expr, target_expr], - )?, - ); + let less_than_expr = ExprImpl::from(FunctionCall::new( + ExprType::LessThanOrEqual, + vec![ + count_expr, + ExprImpl::from(Literal::new( + Datum::from(ScalarImpl::Int64(1)), + DataType::Int64, + )), + ], + )?); + let null_expr = ExprImpl::from(Literal::new(None, return_type)); + + let case_expr = ExprImpl::from(FunctionCall::new( + ExprType::Case, + vec![less_than_expr, null_expr, target_expr], + )?); Ok(case_expr) } _ => unreachable!(), @@ -307,21 +292,19 @@ impl<'a> OverWindowProjectBuilder<'a> { window_function: &WindowFunction, ) -> std::result::Result<(), ErrorCode> { if let WindowFuncKind::Aggregate(agg_kind) = window_function.kind - && matches!( - agg_kind, - AggKind::StddevPop - | AggKind::StddevSamp - | AggKind::VarPop - | AggKind::VarSamp - ) - { - let input = window_function.args.iter().exactly_one().unwrap(); - let squared_input_expr = ExprImpl::from( - FunctionCall::new(ExprType::Multiply, vec![input.clone(), input.clone()]) - .unwrap(), - ); - self.builder.add_expr(&squared_input_expr).map_err(|err| ErrorCode::NotImplemented(format!("{err} inside args"), None.into()))?; - } + && matches!( + agg_kind, + AggKind::StddevPop | AggKind::StddevSamp | AggKind::VarPop | AggKind::VarSamp + ) + { + let input = window_function.args.iter().exactly_one().unwrap(); + let squared_input_expr = ExprImpl::from( + FunctionCall::new(ExprType::Multiply, vec![input.clone(), input.clone()]).unwrap(), + ); + self.builder.add_expr(&squared_input_expr).map_err(|err| { + ErrorCode::NotImplemented(format!("{err} inside args"), None.into()) + })?; + } for arg in &window_function.args { self.builder.add_expr(arg).map_err(|err| { ErrorCode::NotImplemented(format!("{err} inside args"), None.into()) diff --git a/src/frontend/src/optimizer/plan_node/logical_source.rs b/src/frontend/src/optimizer/plan_node/logical_source.rs index cac051957b0a5..542178a830b73 100644 --- a/src/frontend/src/optimizer/plan_node/logical_source.rs +++ b/src/frontend/src/optimizer/plan_node/logical_source.rs @@ -571,7 +571,9 @@ impl ToStream for LogicalSource { } assert!(!(self.core.gen_row_id && self.core.for_table)); - if let Some(row_id_index) = self.core.row_id_index && self.core.gen_row_id { + if let Some(row_id_index) = self.core.row_id_index + && self.core.gen_row_id + { plan = StreamRowIdGen::new(plan, row_id_index).into(); } Ok(plan) diff --git a/src/frontend/src/optimizer/plan_node/stream_group_topn.rs b/src/frontend/src/optimizer/plan_node/stream_group_topn.rs index d0c3077f83286..ca208f94df05a 100644 --- a/src/frontend/src/optimizer/plan_node/stream_group_topn.rs +++ b/src/frontend/src/optimizer/plan_node/stream_group_topn.rs @@ -57,7 +57,9 @@ impl StreamGroupTopN { let mut stream_key = core .stream_key() .expect("logical node should have stream key here"); - if let Some(vnode_col_idx) = vnode_col_idx && stream_key.len() > 1 { + if let Some(vnode_col_idx) = vnode_col_idx + && stream_key.len() > 1 + { // The output stream key of `GroupTopN` is a union of group key and input stream key, // while vnode is calculated from a subset of input stream key. So we can safely remove // the vnode column from output stream key. While at meanwhile we cannot leave the stream key diff --git a/src/frontend/src/optimizer/plan_node/stream_hash_join.rs b/src/frontend/src/optimizer/plan_node/stream_hash_join.rs index 36aff15d96055..1dc35bb909905 100644 --- a/src/frontend/src/optimizer/plan_node/stream_hash_join.rs +++ b/src/frontend/src/optimizer/plan_node/stream_hash_join.rs @@ -161,7 +161,9 @@ impl StreamHashJoin { ) }; let mut is_valuable_inequality = do_state_cleaning; - if let Some(internal) = internal && !watermark_columns.contains(internal) { + if let Some(internal) = internal + && !watermark_columns.contains(internal) + { watermark_columns.insert(internal); is_valuable_inequality = true; } diff --git a/src/frontend/src/optimizer/rule/except_merge_rule.rs b/src/frontend/src/optimizer/rule/except_merge_rule.rs index 0979aaf262f06..77b4c827d07c3 100644 --- a/src/frontend/src/optimizer/rule/except_merge_rule.rs +++ b/src/frontend/src/optimizer/rule/except_merge_rule.rs @@ -26,7 +26,9 @@ impl Rule for ExceptMergeRule { let top_except_inputs = top_except.inputs(); let (left_most_input, remain_vec) = top_except_inputs.split_at(1); - if let Some(bottom_except) = left_most_input[0].as_logical_except() && bottom_except.all() == top_all { + if let Some(bottom_except) = left_most_input[0].as_logical_except() + && bottom_except.all() == top_all + { let mut new_inputs = vec![]; new_inputs.extend(bottom_except.inputs()); new_inputs.extend(remain_vec.iter().cloned()); diff --git a/src/frontend/src/optimizer/rule/index_delta_join_rule.rs b/src/frontend/src/optimizer/rule/index_delta_join_rule.rs index 30435d635568b..5ae42877f6454 100644 --- a/src/frontend/src/optimizer/rule/index_delta_join_rule.rs +++ b/src/frontend/src/optimizer/rule/index_delta_join_rule.rs @@ -122,11 +122,8 @@ impl Rule for IndexDeltaJoinRule { if chain_type != table_scan.chain_type() { Some( - StreamTableScan::new_with_chain_type( - table_scan.core().clone(), - chain_type, - ) - .into(), + StreamTableScan::new_with_chain_type(table_scan.core().clone(), chain_type) + .into(), ) } else { Some(table_scan.clone().into()) diff --git a/src/frontend/src/optimizer/rule/intersect_merge_rule.rs b/src/frontend/src/optimizer/rule/intersect_merge_rule.rs index c21ea85e6baf5..3110b9e3881b6 100644 --- a/src/frontend/src/optimizer/rule/intersect_merge_rule.rs +++ b/src/frontend/src/optimizer/rule/intersect_merge_rule.rs @@ -24,7 +24,9 @@ impl Rule for IntersectMergeRule { let mut new_inputs = vec![]; let mut has_merge = false; for input in top_intersect.inputs() { - if let Some(bottom_intersect) = input.as_logical_intersect() && bottom_intersect.all() == top_all { + if let Some(bottom_intersect) = input.as_logical_intersect() + && bottom_intersect.all() == top_all + { new_inputs.extend(bottom_intersect.inputs()); has_merge = true; } else { diff --git a/src/frontend/src/optimizer/rule/over_window_to_topn_rule.rs b/src/frontend/src/optimizer/rule/over_window_to_topn_rule.rs index 93637d3ba8193..496a51d6d9f3d 100644 --- a/src/frontend/src/optimizer/rule/over_window_to_topn_rule.rs +++ b/src/frontend/src/optimizer/rule/over_window_to_topn_rule.rs @@ -161,7 +161,9 @@ fn handle_rank_preds(rank_preds: &[ExprImpl], window_func_pos: usize) -> Option< assert_eq!(input_ref.index, window_func_pos); let v = v.cast_implicit(DataType::Int64).ok()?.fold_const().ok()??; let v = *v.as_int64(); - if let Some(eq) = eq && eq != v { + if let Some(eq) = eq + && eq != v + { tracing::warn!( "Failed to optimize rank predicate with conflicting equal conditions." ); diff --git a/src/frontend/src/optimizer/rule/union_merge_rule.rs b/src/frontend/src/optimizer/rule/union_merge_rule.rs index 169c9b72c530f..cd7199c2a5050 100644 --- a/src/frontend/src/optimizer/rule/union_merge_rule.rs +++ b/src/frontend/src/optimizer/rule/union_merge_rule.rs @@ -24,7 +24,9 @@ impl Rule for UnionMergeRule { let mut new_inputs = vec![]; let mut has_merge = false; for input in top_union.inputs() { - if let Some(bottom_union) = input.as_logical_union() && bottom_union.all() == top_all { + if let Some(bottom_union) = input.as_logical_union() + && bottom_union.all() == top_all + { new_inputs.extend(bottom_union.inputs()); has_merge = true; } else { diff --git a/src/frontend/src/planner/query.rs b/src/frontend/src/planner/query.rs index a5f9651c4141e..d00ce93598a1e 100644 --- a/src/frontend/src/planner/query.rs +++ b/src/frontend/src/planner/query.rs @@ -56,7 +56,9 @@ impl Planner { } let mut out_fields = FixedBitSet::with_capacity(plan.schema().len()); out_fields.insert_range(..plan.schema().len() - extra_order_exprs_len); - if let Some(field) = plan.schema().fields.get(0) && field.name == "projected_row_id" { + if let Some(field) = plan.schema().fields.get(0) + && field.name == "projected_row_id" + { // Do not output projected_row_id hidden column. out_fields.set(0, false); } diff --git a/src/frontend/src/planner/select.rs b/src/frontend/src/planner/select.rs index 96b32680309df..5266ce55710e3 100644 --- a/src/frontend/src/planner/select.rs +++ b/src/frontend/src/planner/select.rs @@ -67,7 +67,9 @@ impl Planner { exprs.iter().map(|expr| (expr.clone(), false)).collect(); let mut uncovered_distinct_on_exprs_cnt = distinct_on_exprs.len(); let mut order_iter = order.iter().map(|o| &select_items[o.column_index]); - while uncovered_distinct_on_exprs_cnt > 0 && let Some(order_expr) = order_iter.next() { + while uncovered_distinct_on_exprs_cnt > 0 + && let Some(order_expr) = order_iter.next() + { match distinct_on_exprs.get_mut(order_expr) { Some(has_been_covered) => { if !*has_been_covered { @@ -179,7 +181,9 @@ impl Planner { if let BoundDistinct::Distinct = distinct { let fields = root.schema().fields(); - let group_key = if let Some(field) = fields.get(0) && field.name == "projected_row_id" { + let group_key = if let Some(field) = fields.get(0) + && field.name == "projected_row_id" + { // Do not group by projected_row_id hidden column. (1..fields.len()).collect() } else { diff --git a/src/frontend/src/scheduler/distributed/stage.rs b/src/frontend/src/scheduler/distributed/stage.rs index 7c3030370d56f..cee3db2986cf5 100644 --- a/src/frontend/src/scheduler/distributed/stage.rs +++ b/src/frontend/src/scheduler/distributed/stage.rs @@ -344,7 +344,10 @@ impl StageRunner { // the task. // We schedule the task to the worker node that owns the data partition. let parallel_unit_ids = vnode_bitmaps.keys().cloned().collect_vec(); - let workers = self.worker_node_manager.manager.get_workers_by_parallel_unit_ids(¶llel_unit_ids)?; + let workers = self + .worker_node_manager + .manager + .get_workers_by_parallel_unit_ids(¶llel_unit_ids)?; for (i, (parallel_unit_id, worker)) in parallel_unit_ids .into_iter() .zip_eq_fast(workers.into_iter()) diff --git a/src/frontend/src/scheduler/local.rs b/src/frontend/src/scheduler/local.rs index 28cfa25b70bf1..d3d558ef4eff2 100644 --- a/src/frontend/src/scheduler/local.rs +++ b/src/frontend/src/scheduler/local.rs @@ -282,7 +282,10 @@ impl LocalQueryExecution { // `exchange_source`. let (parallel_unit_ids, vnode_bitmaps): (Vec<_>, Vec<_>) = vnode_bitmaps.clone().into_iter().unzip(); - let workers = self.worker_node_manager.manager.get_workers_by_parallel_unit_ids(¶llel_unit_ids)?; + let workers = self + .worker_node_manager + .manager + .get_workers_by_parallel_unit_ids(¶llel_unit_ids)?; for (idx, (worker_node, partition)) in (workers.into_iter().zip_eq_fast(vnode_bitmaps.into_iter())).enumerate() { @@ -355,8 +358,12 @@ impl LocalQueryExecution { sources.push(exchange_source); } } else { - let second_stage_plan_node = - self.convert_plan_node(&second_stage.root, &mut None, None, next_executor_id)?; + let second_stage_plan_node = self.convert_plan_node( + &second_stage.root, + &mut None, + None, + next_executor_id, + )?; let second_stage_plan_fragment = PlanFragment { root: Some(second_stage_plan_node), exchange_info: Some(ExchangeInfo { diff --git a/src/frontend/src/utils/condition.rs b/src/frontend/src/utils/condition.rs index 5305f9f1f356a..332dda739bb3a 100644 --- a/src/frontend/src/utils/condition.rs +++ b/src/frontend/src/utils/condition.rs @@ -889,7 +889,9 @@ impl Condition { } // remove all constant boolean `true` res.retain(|expr| { - if let Some(v) = try_get_bool_constant(expr) && v { + if let Some(v) = try_get_bool_constant(expr) + && v + { false } else { true diff --git a/src/frontend/src/utils/stream_graph_formatter.rs b/src/frontend/src/utils/stream_graph_formatter.rs index 2e9e6d1bb01ec..500167f9380de 100644 --- a/src/frontend/src/utils/stream_graph_formatter.rs +++ b/src/frontend/src/utils/stream_graph_formatter.rs @@ -261,57 +261,58 @@ impl StreamGraphFormatter { self.pretty_add_table(node.get_state_table().unwrap()), )); } - stream_node::NodeBody::Chain(node) => { - fields.push(( - "state table", - self.pretty_add_table(node.get_state_table().unwrap()), - )) - } - stream_node::NodeBody::Sort(node) => { + stream_node::NodeBody::Chain(node) => fields.push(( + "state table", + self.pretty_add_table(node.get_state_table().unwrap()), + )), + stream_node::NodeBody::Sort(node) => { fields.push(( "state table", self.pretty_add_table(node.get_state_table().unwrap()), )); } stream_node::NodeBody::WatermarkFilter(node) => { - let vec = node.tables.iter().map(|tb| self.pretty_add_table(tb) ).collect_vec(); - fields.push(("state tables", Pretty::Array(vec) - )); + let vec = node + .tables + .iter() + .map(|tb| self.pretty_add_table(tb)) + .collect_vec(); + fields.push(("state tables", Pretty::Array(vec))); } stream_node::NodeBody::EowcOverWindow(node) => { fields.push(( - "state table", - self.pretty_add_table(node.get_state_table().unwrap()), + "state table", + self.pretty_add_table(node.get_state_table().unwrap()), )); } - stream_node::NodeBody::OverWindow(node) =>{ + stream_node::NodeBody::OverWindow(node) => { fields.push(( "state table", self.pretty_add_table(node.get_state_table().unwrap()), )); } - stream_node::NodeBody::Project(_) | - stream_node::NodeBody::Filter(_) | - stream_node::NodeBody::StatelessSimpleAgg(_) | - stream_node::NodeBody::HopWindow(_) | - stream_node::NodeBody::Merge(_) | - stream_node::NodeBody::Exchange(_) | - stream_node::NodeBody::BatchPlan(_) | - stream_node::NodeBody::Lookup(_) | - stream_node::NodeBody::LookupUnion(_) | - stream_node::NodeBody::Union(_) | - stream_node::NodeBody::DeltaIndexJoin(_) | - stream_node::NodeBody::Sink(_) | - stream_node::NodeBody::Expand(_) | - stream_node::NodeBody::ProjectSet(_) | - stream_node::NodeBody::Dml(_) | - stream_node::NodeBody::RowIdGen(_) | - stream_node::NodeBody::TemporalJoin(_) | - stream_node::NodeBody::BarrierRecv(_) | - stream_node::NodeBody::Values(_) | - stream_node::NodeBody::Source(_) | - stream_node::NodeBody::StreamFsFetch(_) | - stream_node::NodeBody::NoOp(_) => {} + stream_node::NodeBody::Project(_) + | stream_node::NodeBody::Filter(_) + | stream_node::NodeBody::StatelessSimpleAgg(_) + | stream_node::NodeBody::HopWindow(_) + | stream_node::NodeBody::Merge(_) + | stream_node::NodeBody::Exchange(_) + | stream_node::NodeBody::BatchPlan(_) + | stream_node::NodeBody::Lookup(_) + | stream_node::NodeBody::LookupUnion(_) + | stream_node::NodeBody::Union(_) + | stream_node::NodeBody::DeltaIndexJoin(_) + | stream_node::NodeBody::Sink(_) + | stream_node::NodeBody::Expand(_) + | stream_node::NodeBody::ProjectSet(_) + | stream_node::NodeBody::Dml(_) + | stream_node::NodeBody::RowIdGen(_) + | stream_node::NodeBody::TemporalJoin(_) + | stream_node::NodeBody::BarrierRecv(_) + | stream_node::NodeBody::Values(_) + | stream_node::NodeBody::Source(_) + | stream_node::NodeBody::StreamFsFetch(_) + | stream_node::NodeBody::NoOp(_) => {} }; if self.verbose { diff --git a/src/meta/src/hummock/compaction/picker/space_reclaim_compaction_picker.rs b/src/meta/src/hummock/compaction/picker/space_reclaim_compaction_picker.rs index 7dc7a4688e644..64b2e24c756d5 100644 --- a/src/meta/src/hummock/compaction/picker/space_reclaim_compaction_picker.rs +++ b/src/meta/src/hummock/compaction/picker/space_reclaim_compaction_picker.rs @@ -63,12 +63,16 @@ impl SpaceReclaimCompactionPicker { ) -> Option { assert!(!levels.levels.is_empty()); let mut select_input_ssts = vec![]; - if let Some(l0) = levels.l0.as_ref() && state.last_level == 0 { + if let Some(l0) = levels.l0.as_ref() + && state.last_level == 0 + { // only pick trivial reclaim sstables because this kind of task could be optimized and do not need send to compactor. for level in &l0.sub_levels { for sst in &level.table_infos { let exist_count = self.exist_table_count(sst); - if exist_count == sst.table_ids.len() || level_handlers[0].is_pending_compact( &sst.sst_id) { + if exist_count == sst.table_ids.len() + || level_handlers[0].is_pending_compact(&sst.sst_id) + { if !select_input_ssts.is_empty() { break; } diff --git a/src/meta/src/hummock/manager/mod.rs b/src/meta/src/hummock/manager/mod.rs index 1b3a284e9ccc9..678374b8c571a 100644 --- a/src/meta/src/hummock/manager/mod.rs +++ b/src/meta/src/hummock/manager/mod.rs @@ -353,7 +353,7 @@ impl HummockManager { && !env.opts.do_not_config_object_storage_lifecycle { let is_bucket_expiration_configured = s3.inner().configure_bucket_lifecycle().await; - if is_bucket_expiration_configured{ + if is_bucket_expiration_configured { return Err(ObjectError::internal("Cluster cannot start with object expiration configured for bucket because RisingWave data will be lost when object expiration kicks in. Please disable object expiration and restart the cluster.") .into()); diff --git a/src/meta/src/manager/catalog/fragment.rs b/src/meta/src/manager/catalog/fragment.rs index 8b26b8afa11d9..c3445da8cc57a 100644 --- a/src/meta/src/manager/catalog/fragment.rs +++ b/src/meta/src/manager/catalog/fragment.rs @@ -78,7 +78,9 @@ impl FragmentManagerCore { .filter(|tf| tf.state() != State::Initial) .flat_map(|table_fragments| { table_fragments.fragments.values().filter_map(|fragment| { - if let Some(id_filter) = id_filter.as_ref() && !id_filter.contains(&fragment.fragment_id) { + if let Some(id_filter) = id_filter.as_ref() + && !id_filter.contains(&fragment.fragment_id) + { return None; } let parallelism = match fragment.vnode_mapping.as_ref() { @@ -687,7 +689,9 @@ impl FragmentManager { .with_context(|| format!("table_fragment not exist: id={}", table_id))?; for status in table_fragment.actor_status.values_mut() { - if let Some(pu) = &status.parallel_unit && migration_plan.parallel_unit_plan.contains_key(&pu.id) { + if let Some(pu) = &status.parallel_unit + && migration_plan.parallel_unit_plan.contains_key(&pu.id) + { status.parallel_unit = Some(migration_plan.parallel_unit_plan[&pu.id].clone()); } } @@ -717,8 +721,9 @@ impl FragmentManager { .values() .filter(|tf| { for status in tf.actor_status.values() { - if let Some(pu) = &status.parallel_unit && - migration_plan.parallel_unit_plan.contains_key(&pu.id) { + if let Some(pu) = &status.parallel_unit + && migration_plan.parallel_unit_plan.contains_key(&pu.id) + { return true; } } diff --git a/src/meta/src/manager/catalog/mod.rs b/src/meta/src/manager/catalog/mod.rs index d2007dcab45d6..781dd244c1c57 100644 --- a/src/meta/src/manager/catalog/mod.rs +++ b/src/meta/src/manager/catalog/mod.rs @@ -742,7 +742,8 @@ impl CatalogManager { fn assert_table_creating(tables: &BTreeMap, table: &Table) { if let Some(t) = tables.get(&table.id) && let Ok(StreamJobStatus::Creating) = t.get_stream_job_status() - {} else { + { + } else { panic!("Table must be in creating procedure: {table:#?}") } } diff --git a/src/meta/src/rpc/ddl_controller.rs b/src/meta/src/rpc/ddl_controller.rs index 8f6e7c0be6915..c08281a2f59ed 100644 --- a/src/meta/src/rpc/ddl_controller.rs +++ b/src/meta/src/rpc/ddl_controller.rs @@ -402,11 +402,15 @@ impl DdlController { async fn delete_vpc_endpoint(&self, connection: &Connection) -> MetaResult<()> { // delete AWS vpc endpoint if let Some(connection::Info::PrivateLinkService(svc)) = &connection.info - && svc.get_provider()? == PbPrivateLinkProvider::Aws { + && svc.get_provider()? == PbPrivateLinkProvider::Aws + { if let Some(aws_cli) = self.aws_client.as_ref() { aws_cli.delete_vpc_endpoint(&svc.endpoint_id).await?; } else { - warn!("AWS client is not initialized, skip deleting vpc endpoint {}", svc.endpoint_id); + warn!( + "AWS client is not initialized, skip deleting vpc endpoint {}", + svc.endpoint_id + ); } } Ok(()) diff --git a/src/meta/src/rpc/election/etcd.rs b/src/meta/src/rpc/election/etcd.rs index f30d8253cb95d..dbcd62abfdbb7 100644 --- a/src/meta/src/rpc/election/etcd.rs +++ b/src/meta/src/rpc/election/etcd.rs @@ -94,7 +94,9 @@ impl ElectionClient for EtcdElectionClient { let (mut keeper, mut resp_stream) = self.client.keep_alive(lease_id).await?; let _resp = keeper.keep_alive().await?; let resp = resp_stream.message().await?; - if let Some(resp) = resp && resp.ttl() <= 0 { + if let Some(resp) = resp + && resp.ttl() <= 0 + { tracing::info!("lease {} expired or revoked, re-granting", lease_id); if restored_leader { tracing::info!("restored leader lease {} lost", lease_id); diff --git a/src/meta/src/stream/stream_manager.rs b/src/meta/src/stream/stream_manager.rs index 77a784c64ac09..003252582d6c5 100644 --- a/src/meta/src/stream/stream_manager.rs +++ b/src/meta/src/stream/stream_manager.rs @@ -126,7 +126,11 @@ impl CreatingStreamingJobInfo { && let Some(shutdown_tx) = job.shutdown_tx.take() { let (tx, rx) = oneshot::channel(); - if shutdown_tx.send(CreatingState::Canceling { finish_tx: tx }).await.is_ok() { + if shutdown_tx + .send(CreatingState::Canceling { finish_tx: tx }) + .await + .is_ok() + { receivers.insert(job_id, rx); } else { tracing::warn!("failed to send canceling state"); diff --git a/src/object_store/src/object/mem.rs b/src/object_store/src/object/mem.rs index 02ee6c744d33a..0cd266abb293d 100644 --- a/src/object_store/src/object/mem.rs +++ b/src/object_store/src/object/mem.rs @@ -237,7 +237,9 @@ impl InMemObjectStore { .map(|(_, obj)| obj) .ok_or_else(|| Error::not_found(format!("no object at path '{}'", path)))?; - if let Some(end) = range.end() && end > obj.len() { + if let Some(end) = range.end() + && end > obj.len() + { return Err(Error::other("bad block offset and size").into()); } diff --git a/src/object_store/src/object/opendal_engine/opendal_object_store.rs b/src/object_store/src/object/opendal_engine/opendal_object_store.rs index ff682946b0651..204d9cac25753 100644 --- a/src/object_store/src/object/opendal_engine/opendal_object_store.rs +++ b/src/object_store/src/object/opendal_engine/opendal_object_store.rs @@ -86,7 +86,9 @@ impl ObjectStore for OpendalObjectStore { .await? }; - if let Some(len) = range.len() && len != data.len() { + if let Some(len) = range.len() + && len != data.len() + { return Err(ObjectError::internal(format!( "mismatched size: expected {}, found {} when reading {} at {:?}", len, diff --git a/src/object_store/src/object/s3.rs b/src/object_store/src/object/s3.rs index 89f9aa5a053d5..6dce56ed4c759 100644 --- a/src/object_store/src/object/s3.rs +++ b/src/object_store/src/object/s3.rs @@ -386,7 +386,9 @@ impl ObjectStore for S3ObjectStore { ) .await?; - if let Some(len) = range.len() && len != val.len() { + if let Some(len) = range.len() + && len != val.len() + { return Err(ObjectError::internal(format!( "mismatched size: expected {}, found {} when reading {} at {:?}", len, diff --git a/src/risedevtool/src/config/provide_expander.rs b/src/risedevtool/src/config/provide_expander.rs index 9948c81b0336c..2860ffd6c9850 100644 --- a/src/risedevtool/src/config/provide_expander.rs +++ b/src/risedevtool/src/config/provide_expander.rs @@ -70,7 +70,9 @@ impl ProvideExpander { .into_hash() .ok_or_else(|| anyhow!("expect a hashmap"))?; let map = map.into_iter().map(|(k, v)| { - if let Some(k) = k.as_str() && k.starts_with("provide-") { + if let Some(k) = k.as_str() + && k.starts_with("provide-") + { let array = v .as_vec() .ok_or_else(|| anyhow!("expect an array of provide-"))?; diff --git a/src/risedevtool/src/preflight_check.rs b/src/risedevtool/src/preflight_check.rs index 47fb8235495fc..e29e3f4709c93 100644 --- a/src/risedevtool/src/preflight_check.rs +++ b/src/risedevtool/src/preflight_check.rs @@ -26,7 +26,10 @@ fn preflight_check_proxy() -> Result<()> { || env::var("all_proxy").is_ok() || env::var("ALL_PROXY").is_ok() { - if let Ok(x) = env::var("no_proxy") && x.contains("127.0.0.1") && x.contains("::1") { + if let Ok(x) = env::var("no_proxy") + && x.contains("127.0.0.1") + && x.contains("::1") + { println!( "[{}] {} - You are using proxies for all RisingWave components. Please make sure that `no_proxy` is set for all worker nodes within the cluster.", style("risedev-preflight-check").bold(), diff --git a/src/risedevtool/src/task/compactor_service.rs b/src/risedevtool/src/task/compactor_service.rs index adecc007b8207..c7dcbb8c1179c 100644 --- a/src/risedevtool/src/task/compactor_service.rs +++ b/src/risedevtool/src/task/compactor_service.rs @@ -34,7 +34,9 @@ impl CompactorService { fn compactor(&self) -> Result { let prefix_bin = env::var("PREFIX_BIN")?; - if let Ok(x) = env::var("ENABLE_ALL_IN_ONE") && x == "true" { + if let Ok(x) = env::var("ENABLE_ALL_IN_ONE") + && x == "true" + { Ok(Command::new( Path::new(&prefix_bin).join("risingwave").join("compactor"), )) diff --git a/src/risedevtool/src/task/compute_node_service.rs b/src/risedevtool/src/task/compute_node_service.rs index ced6bec115f6a..20d01f33f53d1 100644 --- a/src/risedevtool/src/task/compute_node_service.rs +++ b/src/risedevtool/src/task/compute_node_service.rs @@ -34,7 +34,9 @@ impl ComputeNodeService { fn compute_node(&self) -> Result { let prefix_bin = env::var("PREFIX_BIN")?; - if let Ok(x) = env::var("ENABLE_ALL_IN_ONE") && x == "true" { + if let Ok(x) = env::var("ENABLE_ALL_IN_ONE") + && x == "true" + { Ok(Command::new( Path::new(&prefix_bin) .join("risingwave") diff --git a/src/risedevtool/src/task/frontend_service.rs b/src/risedevtool/src/task/frontend_service.rs index cf0213028e465..b19167abbfdd1 100644 --- a/src/risedevtool/src/task/frontend_service.rs +++ b/src/risedevtool/src/task/frontend_service.rs @@ -35,7 +35,9 @@ impl FrontendService { fn frontend(&self) -> Result { let prefix_bin = env::var("PREFIX_BIN")?; - if let Ok(x) = env::var("ENABLE_ALL_IN_ONE") && x == "true" { + if let Ok(x) = env::var("ENABLE_ALL_IN_ONE") + && x == "true" + { Ok(Command::new( Path::new(&prefix_bin) .join("risingwave") diff --git a/src/risedevtool/src/task/meta_node_service.rs b/src/risedevtool/src/task/meta_node_service.rs index 2494a9eceaf16..df48b59c2f1f5 100644 --- a/src/risedevtool/src/task/meta_node_service.rs +++ b/src/risedevtool/src/task/meta_node_service.rs @@ -35,7 +35,9 @@ impl MetaNodeService { fn meta_node(&self) -> Result { let prefix_bin = env::var("PREFIX_BIN")?; - if let Ok(x) = env::var("ENABLE_ALL_IN_ONE") && x == "true" { + if let Ok(x) = env::var("ENABLE_ALL_IN_ONE") + && x == "true" + { Ok(Command::new( Path::new(&prefix_bin).join("risingwave").join("meta-node"), )) diff --git a/src/rpc_client/src/meta_client.rs b/src/rpc_client/src/meta_client.rs index b8603fbe46e62..b58a13027261b 100644 --- a/src/rpc_client/src/meta_client.rs +++ b/src/rpc_client/src/meta_client.rs @@ -247,7 +247,8 @@ impl MetaClient { }) .await?; if let Some(status) = &add_worker_resp.status - && status.code() == risingwave_pb::common::status::Code::UnknownWorker { + && status.code() == risingwave_pb::common::status::Code::UnknownWorker + { tracing::error!("invalid worker: {}", status.message); std::process::exit(1); } diff --git a/src/sqlparser/src/tokenizer.rs b/src/sqlparser/src/tokenizer.rs index 4fafde820f414..1f3b99314d143 100644 --- a/src/sqlparser/src/tokenizer.rs +++ b/src/sqlparser/src/tokenizer.rs @@ -1019,7 +1019,9 @@ impl<'a> Tokenizer<'a> { ) -> Result<(), String> { let mut unicode_seq: String = String::with_capacity(len); for _ in 0..len { - if let Some(c) = chars.peek() && c.is_ascii_hexdigit() { + if let Some(c) = chars.peek() + && c.is_ascii_hexdigit() + { unicode_seq.push(chars.next().unwrap()); } else { break; @@ -1063,7 +1065,9 @@ impl<'a> Tokenizer<'a> { let mut unicode_seq: String = String::with_capacity(3); unicode_seq.push(digit); for _ in 0..2 { - if let Some(c) = chars.peek() && matches!(*c, '0'..='7') { + if let Some(c) = chars.peek() + && matches!(*c, '0'..='7') + { unicode_seq.push(chars.next().unwrap()); } else { break; diff --git a/src/storage/src/hummock/compactor/compactor_runner.rs b/src/storage/src/hummock/compactor/compactor_runner.rs index 1925acbce7534..e46965b16e5fc 100644 --- a/src/storage/src/hummock/compactor/compactor_runner.rs +++ b/src/storage/src/hummock/compactor/compactor_runner.rs @@ -701,7 +701,9 @@ where while iter.is_valid() { progress_key_num += 1; - if let Some(task_progress) = task_progress.as_ref() && progress_key_num >= PROGRESS_KEY_INTERVAL { + if let Some(task_progress) = task_progress.as_ref() + && progress_key_num >= PROGRESS_KEY_INTERVAL + { task_progress.inc_progress_key(progress_key_num); progress_key_num = 0; } @@ -751,7 +753,9 @@ where } del_iter.next(); progress_key_num += 1; - if let Some(task_progress) = task_progress.as_ref() && progress_key_num >= PROGRESS_KEY_INTERVAL { + if let Some(task_progress) = task_progress.as_ref() + && progress_key_num >= PROGRESS_KEY_INTERVAL + { task_progress.inc_progress_key(progress_key_num); progress_key_num = 0; } @@ -858,14 +862,18 @@ where .await?; del_iter.next(); progress_key_num += 1; - if let Some(task_progress) = task_progress.as_ref() && progress_key_num >= PROGRESS_KEY_INTERVAL { + if let Some(task_progress) = task_progress.as_ref() + && progress_key_num >= PROGRESS_KEY_INTERVAL + { task_progress.inc_progress_key(progress_key_num); progress_key_num = 0; } } } - if let Some(task_progress) = task_progress.as_ref() && progress_key_num > 0 { + if let Some(task_progress) = task_progress.as_ref() + && progress_key_num > 0 + { // Avoid losing the progress_key_num in the last Interval task_progress.inc_progress_key(progress_key_num); } diff --git a/src/storage/src/hummock/event_handler/uploader.rs b/src/storage/src/hummock/event_handler/uploader.rs index a07da55fb7046..3b9e9dc587fe1 100644 --- a/src/storage/src/hummock/event_handler/uploader.rs +++ b/src/storage/src/hummock/event_handler/uploader.rs @@ -1333,7 +1333,9 @@ mod tests { assert_eq!(epoch, uploader.max_sealed_epoch); // check sealed data has two imms let imms_by_epoch = uploader.sealed_data.imms_by_epoch(); - if let Some((e, imms)) = imms_by_epoch.last_key_value() && *e == epoch{ + if let Some((e, imms)) = imms_by_epoch.last_key_value() + && *e == epoch + { assert_eq!(2, imms.len()); } diff --git a/src/storage/src/hummock/iterator/delete_range_iterator.rs b/src/storage/src/hummock/iterator/delete_range_iterator.rs index 7936fb994a92a..4d943ce63d3fc 100644 --- a/src/storage/src/hummock/iterator/delete_range_iterator.rs +++ b/src/storage/src/hummock/iterator/delete_range_iterator.rs @@ -291,7 +291,10 @@ impl DeleteRangeIterator for ForwardMergeRangeIterator { async { self.tmp_buffer .push(self.heap.pop().expect("no inner iter")); - while let Some(node) = self.heap.peek() && node.is_valid() && node.next_extended_user_key() == self.tmp_buffer[0].next_extended_user_key() { + while let Some(node) = self.heap.peek() + && node.is_valid() + && node.next_extended_user_key() == self.tmp_buffer[0].next_extended_user_key() + { self.tmp_buffer.push(self.heap.pop().unwrap()); } for node in &self.tmp_buffer { diff --git a/src/storage/src/hummock/mod.rs b/src/storage/src/hummock/mod.rs index 60553b5aa09a3..11af5e7deaea2 100644 --- a/src/storage/src/hummock/mod.rs +++ b/src/storage/src/hummock/mod.rs @@ -77,7 +77,15 @@ pub async fn get_from_sstable_info( // Bloom filter key is the distribution key, which is no need to be the prefix of pk, and do not // contain `TablePrefix` and `VnodePrefix`. if let Some(hash) = dist_key_hash - && !hit_sstable_bloom_filter(sstable.value(), &(Bound::Included(full_key.user_key), Bound::Included(full_key.user_key)), hash, local_stats) + && !hit_sstable_bloom_filter( + sstable.value(), + &( + Bound::Included(full_key.user_key), + Bound::Included(full_key.user_key), + ), + hash, + local_stats, + ) { if !read_options.ignore_range_tombstone { let delete_epoch = get_min_delete_range_epoch_from_sstable( diff --git a/src/storage/src/hummock/sstable/multi_builder.rs b/src/storage/src/hummock/sstable/multi_builder.rs index 9bee67e78ca68..4baabb4fdafe6 100644 --- a/src/storage/src/hummock/sstable/multi_builder.rs +++ b/src/storage/src/hummock/sstable/multi_builder.rs @@ -212,9 +212,17 @@ where } } } - if need_seal_current && let Some(event) = builder.last_range_tombstone() && event.new_epoch != HummockEpoch::MAX { + if need_seal_current + && let Some(event) = builder.last_range_tombstone() + && event.new_epoch != HummockEpoch::MAX + { last_range_tombstone_epoch = event.new_epoch; - if event.event_key.left_user_key.as_ref().eq(&full_key.user_key) { + if event + .event_key + .left_user_key + .as_ref() + .eq(&full_key.user_key) + { // If the last range tombstone equals the new key, we can not create new file because we must keep the new key in origin file. need_seal_current = false; } else { @@ -295,7 +303,10 @@ where /// Add kv pair to sstable. pub async fn add_monotonic_delete(&mut self, event: MonotonicDeleteEvent) -> HummockResult<()> { - if let Some(builder) = self.current_builder.as_mut() && builder.reach_capacity() && event.new_epoch != HummockEpoch::MAX { + if let Some(builder) = self.current_builder.as_mut() + && builder.reach_capacity() + && event.new_epoch != HummockEpoch::MAX + { if builder.last_range_tombstone_epoch() != HummockEpoch::MAX { builder.add_monotonic_delete(MonotonicDeleteEvent { event_key: event.event_key.clone(), diff --git a/src/storage/src/hummock/sstable_store.rs b/src/storage/src/hummock/sstable_store.rs index 73d6110cacd29..25ac82636c77d 100644 --- a/src/storage/src/hummock/sstable_store.rs +++ b/src/storage/src/hummock/sstable_store.rs @@ -821,7 +821,9 @@ impl SstableWriter for StreamingUploadWriter { self.sstable_store.insert_meta_cache(self.object_id, meta); // Add block cache. - if let CachePolicy::Fill(fill_high_priority_cache) = self.policy && !self.blocks.is_empty() { + if let CachePolicy::Fill(fill_high_priority_cache) = self.policy + && !self.blocks.is_empty() + { for (block_idx, block) in self.blocks.into_iter().enumerate() { self.sstable_store.block_cache.insert( self.object_id, diff --git a/src/storage/src/memory.rs b/src/storage/src/memory.rs index ac52b65e5488f..c454ad94339dc 100644 --- a/src/storage/src/memory.rs +++ b/src/storage/src/memory.rs @@ -524,7 +524,9 @@ impl RangeKvStateStore { } last_user_key = Some(key.user_key.clone()); } - if let Some(limit) = limit && data.len() >= limit { + if let Some(limit) = limit + && data.len() >= limit + { break; } } diff --git a/src/stream/src/common/log_store_impl/in_mem.rs b/src/stream/src/common/log_store_impl/in_mem.rs index 35040be82c93b..8ff3e05d0bea8 100644 --- a/src/stream/src/common/log_store_impl/in_mem.rs +++ b/src/stream/src/common/log_store_impl/in_mem.rs @@ -222,7 +222,9 @@ impl LogReader for BoundedInMemLogStoreReader { next_epoch, } = &self.epoch_progress { - if let TruncateOffset::Barrier {epoch} = offset && epoch == *sealed_epoch { + if let TruncateOffset::Barrier { epoch } = offset + && epoch == *sealed_epoch + { let sealed_epoch = *sealed_epoch; self.epoch_progress = Consuming(*next_epoch); self.truncated_epoch_tx diff --git a/src/stream/src/common/log_store_impl/kv_log_store/buffer.rs b/src/stream/src/common/log_store_impl/kv_log_store/buffer.rs index ed1c495c81d75..d0773b008c16a 100644 --- a/src/stream/src/common/log_store_impl/kv_log_store/buffer.rs +++ b/src/stream/src/common/log_store_impl/kv_log_store/buffer.rs @@ -250,7 +250,9 @@ impl LogStoreBufferSender { pub(crate) fn pop_truncation(&self, curr_epoch: u64) -> Option { let mut inner = self.buffer.inner(); let mut ret = None; - while let Some((epoch, _)) = inner.truncation_list.front() && *epoch < curr_epoch { + while let Some((epoch, _)) = inner.truncation_list.front() + && *epoch < curr_epoch + { ret = inner.truncation_list.pop_front(); } ret @@ -380,7 +382,9 @@ impl LogStoreBufferReceiver { } } if let Some((epoch, seq_id)) = latest_offset { - if let Some((prev_epoch, ref mut prev_seq_id)) = inner.truncation_list.back_mut() && *prev_epoch == epoch { + if let Some((prev_epoch, ref mut prev_seq_id)) = inner.truncation_list.back_mut() + && *prev_epoch == epoch + { *prev_seq_id = seq_id; } else { inner.truncation_list.push_back((epoch, seq_id)); diff --git a/src/stream/src/common/log_store_impl/kv_log_store/serde.rs b/src/stream/src/common/log_store_impl/kv_log_store/serde.rs index d3102aa936fad..ba69209887b67 100644 --- a/src/stream/src/common/log_store_impl/kv_log_store/serde.rs +++ b/src/stream/src/common/log_store_impl/kv_log_store/serde.rs @@ -586,7 +586,9 @@ impl LogStoreRowOpStream { .pop() .expect("have check non-empty"); self.row_streams.push(stream.into_future()); - while let Some((stream_epoch, _)) = self.not_started_streams.last() && *stream_epoch == epoch { + while let Some((stream_epoch, _)) = self.not_started_streams.last() + && *stream_epoch == epoch + { let (_, stream) = self.not_started_streams.pop().expect("should not be empty"); self.row_streams.push(stream.into_future()); } diff --git a/src/stream/src/common/table/state_table.rs b/src/stream/src/common/table/state_table.rs index 37e788a3e7abd..f0ef04ab7ec91 100644 --- a/src/stream/src/common/table/state_table.rs +++ b/src/stream/src/common/table/state_table.rs @@ -852,14 +852,14 @@ where match op { Op::Insert | Op::UpdateInsert => { if USE_WATERMARK_CACHE && let Some(ref pk) = key { - self.watermark_cache.insert(pk); - } + self.watermark_cache.insert(pk); + } self.insert_inner(TableKey(key_bytes), value); } Op::Delete | Op::UpdateDelete => { if USE_WATERMARK_CACHE && let Some(ref pk) = key { - self.watermark_cache.delete(pk); - } + self.watermark_cache.delete(pk); + } self.delete_inner(TableKey(key_bytes), value); } } @@ -870,14 +870,14 @@ where match op { Op::Insert | Op::UpdateInsert => { if USE_WATERMARK_CACHE && let Some(ref pk) = key { - self.watermark_cache.insert(pk); - } + self.watermark_cache.insert(pk); + } self.insert_inner(TableKey(key_bytes), value); } Op::Delete | Op::UpdateDelete => { if USE_WATERMARK_CACHE && let Some(ref pk) = key { - self.watermark_cache.delete(pk); - } + self.watermark_cache.delete(pk); + } self.delete_inner(TableKey(key_bytes), value); } } @@ -1026,11 +1026,21 @@ where }); // Compute Delete Ranges - if should_clean_watermark && let Some(watermark_suffix) = watermark_suffix && let Some(first_byte) = watermark_suffix.first() { + if should_clean_watermark + && let Some(watermark_suffix) = watermark_suffix + && let Some(first_byte) = watermark_suffix.first() + { trace!(table_id = %self.table_id, watermark = ?watermark_suffix, vnodes = ?{ self.vnodes.iter_vnodes().collect_vec() }, "delete range"); - if prefix_serializer.as_ref().unwrap().get_order_types().first().unwrap().is_ascending() { + if prefix_serializer + .as_ref() + .unwrap() + .get_order_types() + .first() + .unwrap() + .is_ascending() + { // We either serialize null into `0u8`, data into `(1u8 || scalar)`, or serialize null // into `1u8`, data into `(0u8 || scalar)`. We do not want to delete null // here, so `range_begin_suffix` cannot be `vec![]` when null is represented as `0u8`. diff --git a/src/stream/src/executor/backfill/arrangement_backfill.rs b/src/stream/src/executor/backfill/arrangement_backfill.rs index ae5e8696de6c3..0bd6e47841584 100644 --- a/src/stream/src/executor/backfill/arrangement_backfill.rs +++ b/src/stream/src/executor/backfill/arrangement_backfill.rs @@ -450,7 +450,9 @@ where while let Some(Ok(msg)) = upstream.next().await { if let Some(msg) = mapping_message(msg, &self.output_indices) { // If not finished then we need to update state, otherwise no need. - if let Message::Barrier(barrier) = &msg && !is_completely_finished { + if let Message::Barrier(barrier) = &msg + && !is_completely_finished + { // If snapshot was empty, we do not need to backfill, // but we still need to persist the finished state. // We currently persist it on the second barrier here rather than first. @@ -458,9 +460,13 @@ where // since it expects to have been initialized in previous epoch // (there's no epoch before the first epoch). if is_snapshot_empty { - let finished_state = construct_initial_finished_state(pk_in_output_indices.len()); + let finished_state = + construct_initial_finished_state(pk_in_output_indices.len()); for vnode in upstream_table.vnodes().iter_vnodes() { - backfill_state.update_progress(vnode, BackfillProgressPerVnode::InProgress(finished_state.clone())); + backfill_state.update_progress( + vnode, + BackfillProgressPerVnode::InProgress(finished_state.clone()), + ); } } @@ -471,9 +477,11 @@ where &backfill_state, &mut committed_progress, &mut temporary_state, - ).await?; + ) + .await?; - self.progress.finish(barrier.epoch.curr, total_snapshot_processed_rows); + self.progress + .finish(barrier.epoch.curr, total_snapshot_processed_rows); yield msg; break; } diff --git a/src/stream/src/executor/backfill/cdc_backfill.rs b/src/stream/src/executor/backfill/cdc_backfill.rs index c17aad1d2d62d..e3121e1bd0a7b 100644 --- a/src/stream/src/executor/backfill/cdc_backfill.rs +++ b/src/stream/src/executor/backfill/cdc_backfill.rs @@ -509,35 +509,31 @@ impl CdcBackfillExecutor { .await?; if let Some(SplitImpl::MysqlCdc(split)) = cdc_split.as_mut() - && let Some(s) = split.mysql_split.as_mut() { - let start_offset = - last_binlog_offset.as_ref().map(|cdc_offset| { - let source_offset = - if let CdcOffset::MySql(o) = cdc_offset - { - DebeziumSourceOffset { - file: Some(o.filename.clone()), - pos: Some(o.position), - ..Default::default() - } - } else { - DebeziumSourceOffset::default() - }; - - let mut server = "RW_CDC_".to_string(); - server.push_str( - upstream_table_id.to_string().as_str(), - ); - DebeziumOffset { - source_partition: hashmap! { - "server".to_string() => server - }, - source_offset, - // upstream heartbeat event would not emit to the cdc backfill executor, - // since we don't parse heartbeat event in the source parser. - is_heartbeat: false, + && let Some(s) = split.mysql_split.as_mut() + { + let start_offset = last_binlog_offset.as_ref().map(|cdc_offset| { + let source_offset = if let CdcOffset::MySql(o) = cdc_offset { + DebeziumSourceOffset { + file: Some(o.filename.clone()), + pos: Some(o.position), + ..Default::default() } - }); + } else { + DebeziumSourceOffset::default() + }; + + let mut server = "RW_CDC_".to_string(); + server.push_str(upstream_table_id.to_string().as_str()); + DebeziumOffset { + source_partition: hashmap! { + "server".to_string() => server + }, + source_offset, + // upstream heartbeat event would not emit to the cdc backfill executor, + // since we don't parse heartbeat event in the source parser. + is_heartbeat: false, + } + }); // persist the last binlog offset into split state s.inner.start_offset = start_offset.map(|o| { diff --git a/src/stream/src/executor/hash_agg.rs b/src/stream/src/executor/hash_agg.rs index 8d02cc328fa43..2a321dcc2b64c 100644 --- a/src/stream/src/executor/hash_agg.rs +++ b/src/stream/src/executor/hash_agg.rs @@ -384,7 +384,9 @@ impl HashAggExecutor { .zip_eq_fast(&mut this.storages) .zip_eq_fast(call_visibilities.iter()) { - if let AggStateStorage::MaterializedInput { table, mapping } = storage && !call.distinct { + if let AggStateStorage::MaterializedInput { table, mapping } = storage + && !call.distinct + { let chunk = chunk.project_with_vis(mapping.upstream_columns(), visibility.clone()); table.write_chunk(chunk); } @@ -413,8 +415,11 @@ impl HashAggExecutor { .zip_eq_fast(&mut this.storages) .zip_eq_fast(visibilities.iter()) { - if let AggStateStorage::MaterializedInput { table, mapping } = storage && call.distinct { - let chunk = chunk.project_with_vis(mapping.upstream_columns(), visibility.clone()); + if let AggStateStorage::MaterializedInput { table, mapping } = storage + && call.distinct + { + let chunk = + chunk.project_with_vis(mapping.upstream_columns(), visibility.clone()); table.write_chunk(chunk); } } diff --git a/src/stream/src/executor/hash_join.rs b/src/stream/src/executor/hash_join.rs index 75414fe24a379..93f1734a3ec0e 100644 --- a/src/stream/src/executor/hash_join.rs +++ b/src/stream/src/executor/hash_join.rs @@ -912,7 +912,8 @@ impl HashJoinExecutor input_watermark.val = value.unwrap(), diff --git a/src/stream/src/executor/over_window/general.rs b/src/stream/src/executor/over_window/general.rs index 9e66835b54b05..c9717f9defe61 100644 --- a/src/stream/src/executor/over_window/general.rs +++ b/src/stream/src/executor/over_window/general.rs @@ -389,7 +389,9 @@ impl OverWindowExecutor { } // Update recently accessed range for later shrinking cache. - if !this.cache_policy.is_full() && let Some(accessed_range) = accessed_range { + if !this.cache_policy.is_full() + && let Some(accessed_range) = accessed_range + { match vars.recently_accessed_ranges.entry(part_key) { btree_map::Entry::Vacant(vacant) => { vacant.insert(accessed_range); diff --git a/src/stream/src/executor/project_set.rs b/src/stream/src/executor/project_set.rs index ff3214db88eaa..e1000122af247 100644 --- a/src/stream/src/executor/project_set.rs +++ b/src/stream/src/executor/project_set.rs @@ -189,11 +189,15 @@ impl Inner { // for each column for (item, value) in results.iter_mut().zip_eq_fast(&mut row[1..]) { *value = match item { - Either::Left(state) => if let Some((i, value)) = state.peek() && i == row_idx { - valid = true; - value - } else { - None + Either::Left(state) => { + if let Some((i, value)) = state.peek() + && i == row_idx + { + valid = true; + value + } else { + None + } } Either::Right(array) => array.value_at(row_idx), }; @@ -211,7 +215,9 @@ impl Inner { } // move to the next row for item in &mut results { - if let Either::Left(state) = item && matches!(state.peek(), Some((i, _)) if i == row_idx) { + if let Either::Left(state) = item + && matches!(state.peek(), Some((i, _)) if i == row_idx) + { state.next().await?; } } diff --git a/src/stream/src/executor/source/state_table_handler.rs b/src/stream/src/executor/source/state_table_handler.rs index d742e72a4c7a9..1705350426968 100644 --- a/src/stream/src/executor/source/state_table_handler.rs +++ b/src/stream/src/executor/source/state_table_handler.rs @@ -226,10 +226,13 @@ impl SourceStateTableHandler { Some(row) => match row.datum_at(1) { Some(ScalarRefImpl::Jsonb(jsonb_ref)) => { let mut split_impl = SplitImpl::restore_from_json(jsonb_ref.to_owned_scalar())?; - if let SplitImpl::MysqlCdc(ref mut split) = split_impl && let Some(mysql_split) = split.mysql_split.as_mut() { + if let SplitImpl::MysqlCdc(ref mut split) = split_impl + && let Some(mysql_split) = split.mysql_split.as_mut() + { // if the snapshot_done is not set, we should check whether the backfill is finished if !mysql_split.inner.snapshot_done { - mysql_split.inner.snapshot_done = self.recover_cdc_snapshot_state(split_id).await?; + mysql_split.inner.snapshot_done = + self.recover_cdc_snapshot_state(split_id).await?; } } Some(split_impl) diff --git a/src/stream/src/executor/temporal_join.rs b/src/stream/src/executor/temporal_join.rs index 82c1e56649672..ddfcfd6b8e041 100644 --- a/src/stream/src/executor/temporal_join.rs +++ b/src/stream/src/executor/temporal_join.rs @@ -444,7 +444,8 @@ impl TemporalJoinExecutor }; if key.null_bitmap().is_subset(&null_matched) && let join_entry = self.right_table.lookup(&key, epoch).await? - && !join_entry.is_empty() { + && !join_entry.is_empty() + { for right_row in join_entry.cached.values() { // check join condition let ok = if let Some(ref mut cond) = self.condition { @@ -458,7 +459,8 @@ impl TemporalJoinExecutor }; if ok { - if let Some(chunk) = builder.append_row(op, left_row, right_row) { + if let Some(chunk) = builder.append_row(op, left_row, right_row) + { yield Message::Chunk(chunk); } } diff --git a/src/stream/src/executor/top_n/top_n_cache.rs b/src/stream/src/executor/top_n/top_n_cache.rs index b8275eba52b16..a1b7e26e8ae3a 100644 --- a/src/stream/src/executor/top_n/top_n_cache.rs +++ b/src/stream/src/executor/top_n/top_n_cache.rs @@ -232,7 +232,9 @@ impl TopNCache { return; } // For direct insert, we need to check if the key is smaller than the largest key - if let Some(high_last) = self.high.last_key_value() && cache_key <= *high_last.0 { + if let Some(high_last) = self.high.last_key_value() + && cache_key <= *high_last.0 + { debug_assert!(cache_key != *high_last.0, "cache_key should be unique"); self.high.insert(cache_key, row); } @@ -260,15 +262,16 @@ impl TopNCacheTrait for TopNCache { self.low.insert(cache_key, (&row).into()); return; } - let elem_to_compare_with_middle = - if let Some(low_last) = self.low.last_entry() && cache_key <= *low_last.key() { - // Take the last element of `cache.low` and insert input row to it. - let low_last = low_last.remove_entry(); - self.low.insert(cache_key, (&row).into()); - low_last - } else { - (cache_key, (&row).into()) - }; + let elem_to_compare_with_middle = if let Some(low_last) = self.low.last_entry() + && cache_key <= *low_last.key() + { + // Take the last element of `cache.low` and insert input row to it. + let low_last = low_last.remove_entry(); + self.low.insert(cache_key, (&row).into()); + low_last + } else { + (cache_key, (&row).into()) + }; if !self.is_middle_cache_full() { self.middle.insert( @@ -586,15 +589,16 @@ impl AppendOnlyTopNCacheTrait for TopNCache { return Ok(()); } - let elem_to_insert_into_middle = - if let Some(low_last) = self.low.last_entry() && &cache_key <= low_last.key() { - // Take the last element of `cache.low` and insert input row to it. - let low_last = low_last.remove_entry(); - self.low.insert(cache_key, row_ref.into()); - low_last - } else { - (cache_key, row_ref.into()) - }; + let elem_to_insert_into_middle = if let Some(low_last) = self.low.last_entry() + && &cache_key <= low_last.key() + { + // Take the last element of `cache.low` and insert input row to it. + let low_last = low_last.remove_entry(); + self.low.insert(cache_key, row_ref.into()); + low_last + } else { + (cache_key, row_ref.into()) + }; if !self.is_middle_cache_full() { self.middle.insert( diff --git a/src/stream/src/executor/top_n/top_n_state.rs b/src/stream/src/executor/top_n/top_n_state.rs index 841e7f5bb50d7..7214eb91064bc 100644 --- a/src/stream/src/executor/top_n/top_n_state.rs +++ b/src/stream/src/executor/top_n/top_n_state.rs @@ -136,7 +136,9 @@ impl ManagedTopNState { while let Some(item) = state_table_iter.next().await { // Note(bugen): should first compare with start key before constructing TopNStateRow. let topn_row = self.get_topn_row(item?.into_owned_row(), group_key.len()); - if let Some(start_key) = start_key.as_ref() && &topn_row.cache_key <= start_key { + if let Some(start_key) = start_key.as_ref() + && &topn_row.cache_key <= start_key + { continue; } // let row= &topn_row.row; @@ -225,7 +227,9 @@ impl ManagedTopNState { topn_cache.high_capacity > 0, "topn cache high_capacity should always > 0" ); - while !topn_cache.is_high_cache_full() && let Some(item) = state_table_iter.next().await { + while !topn_cache.is_high_cache_full() + && let Some(item) = state_table_iter.next().await + { let topn_row = self.get_topn_row(item?.into_owned_row(), group_key.len()); topn_cache .high diff --git a/src/stream/src/executor/watermark_filter.rs b/src/stream/src/executor/watermark_filter.rs index 5e5454cecff93..c8899553ac46a 100644 --- a/src/stream/src/executor/watermark_filter.rs +++ b/src/stream/src/executor/watermark_filter.rs @@ -217,7 +217,9 @@ impl WatermarkFilterExecutor { if watermark.col_idx == event_time_col_idx { tracing::warn!("WatermarkFilterExecutor received a watermark on the event it is filtering."); let watermark = watermark.val; - if let Some(cur_watermark) = current_watermark.clone() && cur_watermark.default_cmp(&watermark).is_lt() { + if let Some(cur_watermark) = current_watermark.clone() + && cur_watermark.default_cmp(&watermark).is_lt() + { current_watermark = Some(watermark.clone()); idle_input = false; yield Message::Watermark(Watermark::new( @@ -267,7 +269,10 @@ impl WatermarkFilterExecutor { let global_max_watermark = Self::get_global_max_watermark(&table).await?; - current_watermark = if let Some(global_max_watermark) = global_max_watermark.clone() && let Some(watermark) = current_watermark.clone(){ + current_watermark = if let Some(global_max_watermark) = + global_max_watermark.clone() + && let Some(watermark) = current_watermark.clone() + { Some(cmp::max_by( watermark, global_max_watermark, diff --git a/src/stream/src/executor/wrapper/epoch_check.rs b/src/stream/src/executor/wrapper/epoch_check.rs index 3b2975d08366b..fb3a24b3dda2b 100644 --- a/src/stream/src/executor/wrapper/epoch_check.rs +++ b/src/stream/src/executor/wrapper/epoch_check.rs @@ -47,7 +47,9 @@ pub async fn epoch_check(info: Arc, input: impl MessageStream) { ); } - if let Some(last_epoch) = last_epoch && !b.is_with_stop_mutation() { + if let Some(last_epoch) = last_epoch + && !b.is_with_stop_mutation() + { assert_eq!( b.epoch.prev, last_epoch, diff --git a/src/stream/src/from_proto/source/trad_source.rs b/src/stream/src/from_proto/source/trad_source.rs index b87ce5ff39dc7..b105837d9b093 100644 --- a/src/stream/src/from_proto/source/trad_source.rs +++ b/src/stream/src/from_proto/source/trad_source.rs @@ -160,8 +160,11 @@ impl ExecutorBuilder for SourceExecutorBuilder { ); let table_type = ExternalTableType::from_properties(&source.properties); - if table_type.can_backfill() && let Some(table_desc) = source_info.upstream_table.clone() { - let upstream_table_name = SchemaTableName::from_properties(&source.properties); + if table_type.can_backfill() + && let Some(table_desc) = source_info.upstream_table.clone() + { + let upstream_table_name = + SchemaTableName::from_properties(&source.properties); let table_pk_indices = table_desc .pk .iter() @@ -173,7 +176,8 @@ impl ExecutorBuilder for SourceExecutorBuilder { .map(|desc| OrderType::from_protobuf(desc.get_order_type().unwrap())) .collect_vec(); - let table_reader = table_type.create_table_reader(source.properties.clone(), schema.clone())?; + let table_reader = table_type + .create_table_reader(source.properties.clone(), schema.clone())?; let external_table = ExternalStorageTable::new( TableId::new(source.source_id), upstream_table_name, @@ -188,18 +192,19 @@ impl ExecutorBuilder for SourceExecutorBuilder { let source_state_handler = SourceStateTableHandler::from_table_catalog( source.state_table.as_ref().unwrap(), store.clone(), - ).await; + ) + .await; let cdc_backfill = CdcBackfillExecutor::new( params.actor_context.clone(), external_table, Box::new(source_exec), - (0..source.columns.len()).collect_vec(), // eliminate the last column (_rw_offset) + (0..source.columns.len()).collect_vec(), /* eliminate the last column (_rw_offset) */ None, schema.clone(), params.pk_indices, params.executor_stats, source_state_handler, - source_ctrl_opts.chunk_size + source_ctrl_opts.chunk_size, ); cdc_backfill.boxed() } else { diff --git a/src/stream/src/task/stream_manager.rs b/src/stream/src/task/stream_manager.rs index f54eb9921f77c..4386413029ae1 100644 --- a/src/stream/src/task/stream_manager.rs +++ b/src/stream/src/task/stream_manager.rs @@ -821,7 +821,9 @@ impl LocalStreamManagerCore { let mut actor_infos = self.context.actor_infos.write(); for actor in new_actor_infos { let ret = actor_infos.insert(actor.get_actor_id(), actor.clone()); - if let Some(prev_actor) = ret && actor != &prev_actor { + if let Some(prev_actor) = ret + && actor != &prev_actor + { bail!( "actor info mismatch when broadcasting {}", actor.get_actor_id() diff --git a/src/tests/regress/src/schedule.rs b/src/tests/regress/src/schedule.rs index 357cab473d135..c33cf5e91ed5a 100644 --- a/src/tests/regress/src/schedule.rs +++ b/src/tests/regress/src/schedule.rs @@ -329,12 +329,16 @@ impl TestCase { // Find the matching output line, and collect lines before the next matching line. let mut expected_output = vec![]; - while let Some(line) = expected_lines.next() && line != original_input_line { + while let Some(line) = expected_lines.next() + && line != original_input_line + { expected_output.push(line); } let mut actual_output = vec![]; - while let Some(line) = actual_lines.next() && line != input_line { + while let Some(line) = actual_lines.next() + && line != input_line + { actual_output.push(line); } @@ -371,7 +375,9 @@ fn compare_output(query: &[&str], expected: &[String], actual: &[String]) -> boo eq }; - if let Some(l) = query.last() && l.starts_with(PREFIX_IGNORE) { + if let Some(l) = query.last() + && l.starts_with(PREFIX_IGNORE) + { return true; } if !expected.is_empty() diff --git a/src/tests/sqlsmith/src/runner.rs b/src/tests/sqlsmith/src/runner.rs index 5efc793cdd95c..cebd50f7e008e 100644 --- a/src/tests/sqlsmith/src/runner.rs +++ b/src/tests/sqlsmith/src/runner.rs @@ -473,7 +473,9 @@ fn validate_response( Ok(rows) => Ok((0, rows)), Err(e) => { // Permit runtime errors conservatively. - if let Some(e) = e.as_db_error() && is_permissible_error(&e.to_string()) { + if let Some(e) = e.as_db_error() + && is_permissible_error(&e.to_string()) + { tracing::info!("[SKIPPED ERROR]: {:#?}", e); return Ok((1, vec![])); } @@ -509,16 +511,20 @@ async fn run_query_inner( ), }; if let Err(e) = &response - && let Some(e) = e.as_db_error() { + && let Some(e) = e.as_db_error() + { if is_recovery_in_progress_error(&e.to_string()) { let tries = 5; let interval = 1; - for _ in 0..tries { // retry 5 times + for _ in 0..tries { + // retry 5 times sleep(Duration::from_secs(interval)).await; let query_task = client.simple_query(query); let response = timeout(Duration::from_secs(timeout_duration), query_task).await; match response { - Ok(Ok(r)) => { return Ok((0, r)); } + Ok(Ok(r)) => { + return Ok((0, r)); + } Err(_) => bail!( "[UNEXPECTED ERROR] Query timeout after {timeout_duration}s:\n{:?}", query diff --git a/src/tests/sqlsmith/tests/frontend/mod.rs b/src/tests/sqlsmith/tests/frontend/mod.rs index a0ab1d59cf58e..8f681ab38a956 100644 --- a/src/tests/sqlsmith/tests/frontend/mod.rs +++ b/src/tests/sqlsmith/tests/frontend/mod.rs @@ -149,7 +149,9 @@ async fn test_stream_query( setup_sql: &str, ) -> Result<()> { let mut rng; - if let Ok(x) = env::var("RW_RANDOM_SEED_SQLSMITH") && x == "true" { + if let Ok(x) = env::var("RW_RANDOM_SEED_SQLSMITH") + && x == "true" + { rng = SmallRng::from_entropy(); } else { rng = SmallRng::seed_from_u64(seed); @@ -205,7 +207,9 @@ fn test_batch_query( setup_sql: &str, ) -> Result<()> { let mut rng; - if let Ok(x) = env::var("RW_RANDOM_SEED_SQLSMITH") && x == "true" { + if let Ok(x) = env::var("RW_RANDOM_SEED_SQLSMITH") + && x == "true" + { rng = SmallRng::from_entropy(); } else { rng = SmallRng::seed_from_u64(seed); @@ -248,7 +252,9 @@ async fn setup_sqlsmith_with_seed_inner(seed: u64) -> Result { let session = frontend.session_ref(); let mut rng; - if let Ok(x) = env::var("RW_RANDOM_SEED_SQLSMITH") && x == "true" { + if let Ok(x) = env::var("RW_RANDOM_SEED_SQLSMITH") + && x == "true" + { rng = SmallRng::from_entropy(); } else { rng = SmallRng::seed_from_u64(seed); @@ -266,7 +272,9 @@ async fn setup_sqlsmith_with_seed_inner(seed: u64) -> Result { /// Otherwise no error: skip status: false. fn validate_result(result: Result) -> Result { if let Err(e) = result { - if let Some(s) = e.message() && is_permissible_error(s) { + if let Some(s) = e.message() + && is_permissible_error(s) + { return Ok(true); } else { return Err(e); diff --git a/src/utils/runtime/src/logger.rs b/src/utils/runtime/src/logger.rs index 916dd93d7a32b..c4fe90c294081 100644 --- a/src/utils/runtime/src/logger.rs +++ b/src/utils/runtime/src/logger.rs @@ -188,7 +188,9 @@ pub fn init_risingwave_logger(settings: LoggerSettings) { } // Overrides from env var. - if let Ok(rust_log) = std::env::var(EnvFilter::DEFAULT_ENV) && !rust_log.is_empty() { + if let Ok(rust_log) = std::env::var(EnvFilter::DEFAULT_ENV) + && !rust_log.is_empty() + { let rust_log_targets: Targets = rust_log.parse().expect("failed to parse `RUST_LOG`"); if let Some(default_level) = rust_log_targets.default_level() { filter = filter.with_default(default_level); diff --git a/src/utils/runtime/src/panic_hook.rs b/src/utils/runtime/src/panic_hook.rs index 848e7df8509c7..0a43fac48f191 100644 --- a/src/utils/runtime/src/panic_hook.rs +++ b/src/utils/runtime/src/panic_hook.rs @@ -15,7 +15,9 @@ /// Set panic hook to abort the process if we're not catching unwind, without losing the information /// of stack trace and await-tree. pub fn set_panic_hook() { - if let Ok(limit) = rlimit::Resource::CORE.get_soft() && limit > 0 { + if let Ok(limit) = rlimit::Resource::CORE.get_soft() + && limit > 0 + { tracing::info!(limit, "coredump on panic is likely to be enabled"); }; From 9616cbf88e7f488849004b7e3f14eb7880e87287 Mon Sep 17 00:00:00 2001 From: Dylan Date: Tue, 31 Oct 2023 13:53:43 +0800 Subject: [PATCH 3/7] build(release): release risingwave all-in-one (#13133) Co-authored-by: Jianwei Huang <1223644280@qq.com> --- ci/scripts/release.sh | 13 ++++++++++--- src/jni_core/src/jvm_runtime.rs | 9 +++++---- 2 files changed, 15 insertions(+), 7 deletions(-) diff --git a/ci/scripts/release.sh b/ci/scripts/release.sh index 08e5794f173cd..eb05e4aa63871 100755 --- a/ci/scripts/release.sh +++ b/ci/scripts/release.sh @@ -66,9 +66,15 @@ fi echo "--- Build connector node" cd ${REPO_ROOT}/java && mvn -B package -Dmaven.test.skip=true -Dno-build-rust -cd ${REPO_ROOT} && mv ${REPO_ROOT}/java/connector-node/assembly/target/risingwave-connector-1.0.0.tar.gz risingwave-connector-"${BUILDKITE_TAG}".tar.gz if [[ -n "${BUILDKITE_TAG}" ]]; then + echo "--- Collect all release assets" + cd ${REPO_ROOT} && mkdir release-assets && cd release-assets + cp -r ${REPO_ROOT}/target/release/* . + mv ${REPO_ROOT}/java/connector-node/assembly/target/risingwave-connector-1.0.0.tar.gz risingwave-connector-"${BUILDKITE_TAG}".tar.gz + tar -zxvf risingwave-connector-"${BUILDKITE_TAG}".tar.gz libs + ls -l + echo "--- Install gh cli" yum install -y dnf dnf install -y 'dnf-command(config-manager)' @@ -90,8 +96,9 @@ if [[ -n "${BUILDKITE_TAG}" ]]; then tar -czvf risectl-"${BUILDKITE_TAG}"-x86_64-unknown-linux.tar.gz risectl gh release upload "${BUILDKITE_TAG}" risectl-"${BUILDKITE_TAG}"-x86_64-unknown-linux.tar.gz - echo "--- Release build and upload risingwave connector node jar asset" - gh release upload "${BUILDKITE_TAG}" risingwave-connector-"${BUILDKITE_TAG}".tar.gz + echo "--- Release upload risingwave-all-in-one asset" + tar -czvf risingwave-"${BUILDKITE_TAG}"-x86_64-unknown-linux-all-in-one.tar.gz risingwave libs + gh release upload "${BUILDKITE_TAG}" risingwave-"${BUILDKITE_TAG}"-x86_64-unknown-linux-all-in-one.tar.gz fi diff --git a/src/jni_core/src/jvm_runtime.rs b/src/jni_core/src/jvm_runtime.rs index bd1f068b6eaee..9ba8a2a533683 100644 --- a/src/jni_core/src/jvm_runtime.rs +++ b/src/jni_core/src/jvm_runtime.rs @@ -48,12 +48,13 @@ impl JavaVmWrapper { let libs_path = if let Ok(libs_path) = std::env::var("CONNECTOR_LIBS_PATH") { libs_path } else { - return Err(ErrorCode::InternalError( - "environment variable CONNECTOR_LIBS_PATH is not specified".to_string(), - ) - .into()); + tracing::warn!("environment variable CONNECTOR_LIBS_PATH is not specified, so use default path `./libs` instead"); + let path = std::env::current_exe()?.parent().unwrap().join("./libs"); + path.to_str().unwrap().into() }; + tracing::info!("libs_path = {}", libs_path); + let dir = Path::new(&libs_path); if !dir.is_dir() { From f18e73e369ca2c863eebb10cb3f0b37833a5cd93 Mon Sep 17 00:00:00 2001 From: Richard Chien Date: Tue, 31 Oct 2023 14:07:08 +0800 Subject: [PATCH 4/7] perf(over window): incremental aggregation (new) (#13038) Signed-off-by: Richard Chien --- Cargo.lock | 1 + src/batch/src/executor/sort_over_window.rs | 5 +- src/expr/core/Cargo.toml | 1 + src/expr/core/src/window_function/call.rs | 3 +- .../src/window_function/state/aggregate.rs | 153 ++++++--- .../core/src/window_function/state/buffer.rs | 297 +++++++++++++----- .../core/src/window_function/state/mod.rs | 8 +- .../src/window_function/state/row_number.rs | 74 +++-- src/expr/core/src/window_function/states.rs | 45 ++- src/stream/src/executor/over_window/eowc.rs | 8 +- .../src/executor/over_window/general.rs | 5 +- .../integration_tests/eowc_over_window.rs | 29 +- 12 files changed, 447 insertions(+), 182 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 477cbf02bc3c0..1da55d7e68e4a 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -7487,6 +7487,7 @@ dependencies = [ "downcast-rs", "easy-ext", "either", + "enum-as-inner", "expect-test", "futures-async-stream", "futures-util", diff --git a/src/batch/src/executor/sort_over_window.rs b/src/batch/src/executor/sort_over_window.rs index c8b6c7ef9388c..21bfc8aa6b177 100644 --- a/src/batch/src/executor/sort_over_window.rs +++ b/src/batch/src/executor/sort_over_window.rs @@ -191,12 +191,11 @@ impl SortOverWindowExecutor { } } for row in rows.drain(..) { - if let Some(chunk) = - chunk_builder.append_one_row(row.chain(OwnedRow::new(states.curr_output()?))) + if let Some(chunk) = chunk_builder + .append_one_row(row.chain(OwnedRow::new(states.slide_no_evict_hint()?))) { yield chunk; } - states.just_slide_forward(); } } } diff --git a/src/expr/core/Cargo.toml b/src/expr/core/Cargo.toml index ada9a3639525c..ab8dd697e220d 100644 --- a/src/expr/core/Cargo.toml +++ b/src/expr/core/Cargo.toml @@ -31,6 +31,7 @@ ctor = "0.2" downcast-rs = "1.2" easy-ext = "1" either = "1" +enum-as-inner = "0.6" futures-async-stream = { workspace = true } futures-util = "0.3" itertools = "0.11" diff --git a/src/expr/core/src/window_function/call.rs b/src/expr/core/src/window_function/call.rs index a74beb672fd4f..ac7c1a2b78dd7 100644 --- a/src/expr/core/src/window_function/call.rs +++ b/src/expr/core/src/window_function/call.rs @@ -15,6 +15,7 @@ use std::cmp::Ordering; use std::fmt::Display; +use enum_as_inner::EnumAsInner; use risingwave_common::bail; use risingwave_common::types::DataType; use risingwave_pb::expr::window_frame::{PbBound, PbExclusion}; @@ -267,7 +268,7 @@ impl FrameBound { } } -#[derive(Debug, Copy, Clone, Eq, PartialEq, Hash, Default)] +#[derive(Debug, Copy, Clone, Eq, PartialEq, Hash, Default, EnumAsInner)] pub enum FrameExclusion { CurrentRow, // Group, diff --git a/src/expr/core/src/window_function/state/aggregate.rs b/src/expr/core/src/window_function/state/aggregate.rs index 7deee85693ef2..38958b50b8c38 100644 --- a/src/expr/core/src/window_function/state/aggregate.rs +++ b/src/expr/core/src/window_function/state/aggregate.rs @@ -15,7 +15,7 @@ use std::collections::BTreeSet; use futures_util::FutureExt; -use risingwave_common::array::{DataChunk, StreamChunk}; +use risingwave_common::array::{DataChunk, Op, StreamChunk}; use risingwave_common::estimate_size::{EstimateSize, KvSize}; use risingwave_common::types::{DataType, Datum}; use risingwave_common::util::iter_util::ZipEqFast; @@ -24,12 +24,16 @@ use smallvec::SmallVec; use super::buffer::WindowBuffer; use super::{StateEvictHint, StateKey, StatePos, WindowState}; -use crate::aggregate::{build_append_only, AggArgs, AggCall, BoxedAggregateFunction}; +use crate::aggregate::{ + AggArgs, AggCall, AggregateFunction, AggregateState as AggImplState, BoxedAggregateFunction, +}; +use crate::sig::FUNCTION_REGISTRY; use crate::window_function::{WindowFuncCall, WindowFuncKind}; use crate::Result; pub struct AggregateState { - agg_call: AggCall, + agg_func: BoxedAggregateFunction, + agg_impl: AggImpl, arg_data_types: Vec, buffer: WindowBuffer>, buffer_heap_size: KvSize, @@ -58,13 +62,54 @@ impl AggregateState { distinct: false, direct_args: vec![], }; + let agg_func_sig = FUNCTION_REGISTRY + .get_aggregate( + agg_kind, + &arg_data_types, + &call.return_type, + false, // means prefer retractable version + ) + .expect("the agg func must exist"); + let agg_func = agg_func_sig.build_aggregate(&agg_call)?; + let (agg_impl, enable_delta) = + if !agg_func_sig.append_only && call.frame.exclusion.is_no_others() { + let init_state = agg_func.create_state(); + (AggImpl::Incremental(init_state), true) + } else { + (AggImpl::Full, false) + }; Ok(Self { - agg_call, + agg_func, + agg_impl, arg_data_types, - buffer: WindowBuffer::new(call.frame.clone()), + buffer: WindowBuffer::new(call.frame.clone(), enable_delta), buffer_heap_size: KvSize::new(), }) } + + fn slide_inner(&mut self) -> StateEvictHint { + let removed_keys: BTreeSet<_> = self + .buffer + .slide() + .map(|(k, v)| { + v.iter().for_each(|arg| { + self.buffer_heap_size.sub_val(arg); + }); + self.buffer_heap_size.sub_val(&k); + k + }) + .collect(); + if removed_keys.is_empty() { + StateEvictHint::CannotEvict( + self.buffer + .smallest_key() + .expect("sliding without removing, must have some entry in the buffer") + .clone(), + ) + } else { + StateEvictHint::CanEvict(removed_keys) + } + } } impl WindowState for AggregateState { @@ -84,36 +129,35 @@ impl WindowState for AggregateState { } } - fn curr_output(&self) -> Result { + fn slide(&mut self) -> Result<(Datum, StateEvictHint)> { let wrapper = AggregatorWrapper { - agg: build_append_only(&self.agg_call)?, + agg_func: self.agg_func.as_ref(), arg_data_types: &self.arg_data_types, }; - wrapper.aggregate(self.buffer.curr_window_values().map(SmallVec::as_slice)) + let output = match self.agg_impl { + AggImpl::Full => wrapper.aggregate(self.buffer.curr_window_values()), + AggImpl::Incremental(ref mut state) => { + wrapper.update(state, self.buffer.consume_curr_window_values_delta()) + } + }?; + let evict_hint = self.slide_inner(); + Ok((output, evict_hint)) } - fn slide_forward(&mut self) -> StateEvictHint { - let removed_keys: BTreeSet<_> = self - .buffer - .slide() - .map(|(k, v)| { - v.iter().for_each(|arg| { - self.buffer_heap_size.sub_val(arg); - }); - self.buffer_heap_size.sub_val(&k); - k - }) - .collect(); - if removed_keys.is_empty() { - StateEvictHint::CannotEvict( - self.buffer - .smallest_key() - .expect("sliding without removing, must have some entry in the buffer") - .clone(), - ) - } else { - StateEvictHint::CanEvict(removed_keys) - } + fn slide_no_output(&mut self) -> Result { + match self.agg_impl { + AggImpl::Full => {} + AggImpl::Incremental(ref mut state) => { + // for incremental agg, we need to update the state even if the caller doesn't need + // the output + let wrapper = AggregatorWrapper { + agg_func: self.agg_func.as_ref(), + arg_data_types: &self.arg_data_types, + }; + wrapper.update(state, self.buffer.consume_curr_window_values_delta())?; + } + }; + Ok(self.slide_inner()) } } @@ -125,41 +169,62 @@ impl EstimateSize for AggregateState { } } +enum AggImpl { + Incremental(AggImplState), + Full, +} + struct AggregatorWrapper<'a> { - agg: BoxedAggregateFunction, + agg_func: &'a dyn AggregateFunction, arg_data_types: &'a [DataType], } impl AggregatorWrapper<'_> { - fn aggregate<'a>(&'a self, values: impl Iterator) -> Result { - // TODO(rc): switch to a better general version of aggregator implementation + fn aggregate(&self, values: impl IntoIterator) -> Result + where + V: AsRef<[Datum]>, + { + let mut state = self.agg_func.create_state(); + self.update( + &mut state, + values.into_iter().map(|args| (Op::Insert, args)), + ) + } + fn update( + &self, + state: &mut AggImplState, + delta: impl IntoIterator, + ) -> Result + where + V: AsRef<[Datum]>, + { let mut args_builders = self .arg_data_types .iter() .map(|data_type| data_type.create_array_builder(0 /* bad! */)) .collect::>(); - let mut n_values = 0; - for value in values { - n_values += 1; - for (builder, datum) in args_builders.iter_mut().zip_eq_fast(value.iter()) { + let mut ops = Vec::new(); + let mut n_rows = 0; + for (op, value) in delta { + n_rows += 1; + ops.push(op); + for (builder, datum) in args_builders.iter_mut().zip_eq_fast(value.as_ref()) { builder.append(datum); } } - let columns = args_builders .into_iter() .map(|builder| builder.finish().into()) .collect::>(); - let chunk = StreamChunk::from(DataChunk::new(columns, n_values)); + let chunk = StreamChunk::from_parts(ops, DataChunk::new(columns, n_rows)); - let mut state = self.agg.create_state(); - self.agg - .update(&mut state, &chunk) + self.agg_func + .update(state, &chunk) .now_or_never() .expect("we don't support UDAF currently, so the function should return immediately")?; - self.agg - .get_result(&state) + self.agg_func + .get_result(state) .now_or_never() .expect("we don't support UDAF currently, so the function should return immediately") } diff --git a/src/expr/core/src/window_function/state/buffer.rs b/src/expr/core/src/window_function/state/buffer.rs index a375c7bfec225..fa684b9049459 100644 --- a/src/expr/core/src/window_function/state/buffer.rs +++ b/src/expr/core/src/window_function/state/buffer.rs @@ -15,7 +15,8 @@ use std::collections::VecDeque; use std::ops::Range; -use either::Either; +use risingwave_common::array::Op; +use smallvec::{smallvec, SmallVec}; use crate::window_function::{Frame, FrameBounds, FrameExclusion}; @@ -26,12 +27,13 @@ struct Entry { // TODO(rc): May be a good idea to extract this into a separate crate. /// A common sliding window buffer. -pub struct WindowBuffer { +pub struct WindowBuffer { frame: Frame, buffer: VecDeque>, curr_idx: usize, left_idx: usize, // inclusive, note this can be > `curr_idx` right_excl_idx: usize, // exclusive, note this can be <= `curr_idx` + curr_delta: Option>, } /// Note: A window frame can be pure preceding, pure following, or acrossing the _current row_. @@ -41,15 +43,24 @@ pub struct CurrWindow<'a, K> { pub following_saturated: bool, } -impl WindowBuffer { - pub fn new(frame: Frame) -> Self { +impl WindowBuffer { + pub fn new(frame: Frame, enable_delta: bool) -> Self { assert!(frame.bounds.is_valid()); + if enable_delta { + // TODO(rc): currently only support `FrameExclusion::NoOthers` for delta + assert!(frame.exclusion.is_no_others()); + } Self { frame, buffer: Default::default(), curr_idx: 0, left_idx: 0, right_excl_idx: 0, + curr_delta: if enable_delta { + Some(Default::default()) + } else { + None + }, } } @@ -64,7 +75,10 @@ impl WindowBuffer { } else { // FIXME(rc): Clippy rule `clippy::nonminimal_bool` is misreporting that // the following can be simplified. - // assert!(self.curr_idx >= self.left_idx); + #[allow(clippy::nonminimal_bool)] + { + assert!(self.curr_idx >= self.left_idx); + } self.curr_idx - self.left_idx >= start_off.unsigned_abs() } } else { @@ -84,9 +98,12 @@ impl WindowBuffer { true // pure preceding frame, always following-saturated } else { // FIXME(rc): Ditto. - // assert!(self.right_excl_idx > 0); - // assert!(self.right_excl_idx > self.curr_idx); - // assert!(self.right_excl_idx <= self.buffer.len()); + #[allow(clippy::nonminimal_bool)] + { + assert!(self.right_excl_idx > 0); + assert!(self.right_excl_idx > self.curr_idx); + assert!(self.right_excl_idx <= self.buffer.len()); + } self.right_excl_idx - 1 - self.curr_idx >= end_off as usize } } else { @@ -110,28 +127,43 @@ impl WindowBuffer { } } + fn curr_window_outer(&self) -> Range { + self.left_idx..self.right_excl_idx + } + + fn curr_window_exclusion(&self) -> Range { + // TODO(rc): should intersect with `curr_window_outer` to be more accurate + match self.frame.exclusion { + FrameExclusion::CurrentRow => self.curr_idx..self.curr_idx + 1, + FrameExclusion::NoOthers => self.curr_idx..self.curr_idx, + } + } + + fn curr_window_ranges(&self) -> (Range, Range) { + let selection = self.curr_window_outer(); + let exclusion = self.curr_window_exclusion(); + range_except(selection, exclusion) + } + /// Iterate over values in the current window. pub fn curr_window_values(&self) -> impl Iterator { assert!(self.left_idx <= self.right_excl_idx); assert!(self.right_excl_idx <= self.buffer.len()); - let selection = self.left_idx..self.right_excl_idx; - if selection.is_empty() { - return Either::Left(std::iter::empty()); - } + let (left, right) = self.curr_window_ranges(); + self.buffer + .range(left) + .chain(self.buffer.range(right)) + .map(|Entry { value, .. }| value) + } - let exclusion = match self.frame.exclusion { - FrameExclusion::CurrentRow => self.curr_idx..self.curr_idx + 1, - FrameExclusion::NoOthers => self.curr_idx..self.curr_idx, - }; - let (left, right) = range_except(selection, exclusion); - - Either::Right( - self.buffer - .range(left) - .chain(self.buffer.range(right)) - .map(|Entry { value, .. }| value), - ) + /// Consume the delta of values comparing the current window to the previous window. + /// The delta is not guaranteed to be sorted, especially when frame exclusion is not `NoOthers`. + pub fn consume_curr_window_values_delta(&mut self) -> impl Iterator + '_ { + self.curr_delta + .as_mut() + .expect("delta mode should be enabled") + .drain(..) } fn recalculate_left_right(&mut self) { @@ -175,10 +207,29 @@ impl WindowBuffer { } } + fn maintain_delta(&mut self, old_outer: Range, new_outer: Range) { + debug_assert!(self.frame.exclusion.is_no_others()); + + let (outer_removed, outer_added) = range_diff(old_outer.clone(), new_outer.clone()); + let delta = self.curr_delta.as_mut().unwrap(); + for idx in outer_removed.iter().cloned().flatten() { + delta.push((Op::Delete, self.buffer[idx].value.clone())); + } + for idx in outer_added.iter().cloned().flatten() { + delta.push((Op::Insert, self.buffer[idx].value.clone())); + } + } + /// Append a key-value pair to the buffer. pub fn append(&mut self, key: K, value: V) { + let old_outer = self.curr_window_outer(); + self.buffer.push_back(Entry { key, value }); - self.recalculate_left_right() + self.recalculate_left_right(); + + if self.curr_delta.is_some() { + self.maintain_delta(old_outer, self.curr_window_outer()); + } } /// Get the smallest key that is still kept in the buffer. @@ -190,8 +241,15 @@ impl WindowBuffer { /// Slide the current window forward. /// Returns the keys that are removed from the buffer. pub fn slide(&mut self) -> impl Iterator + '_ { + let old_outer = self.curr_window_outer(); + self.curr_idx += 1; self.recalculate_left_right(); + + if self.curr_delta.is_some() { + self.maintain_delta(old_outer, self.curr_window_outer()); + } + let min_needed_idx = std::cmp::min(self.left_idx, self.curr_idx); self.curr_idx -= min_needed_idx; self.left_idx -= min_needed_idx; @@ -205,7 +263,12 @@ impl WindowBuffer { /// Calculate range (A - B), the result might be the union of two ranges when B is totally included /// in the A. fn range_except(a: Range, b: Range) -> (Range, Range) { - if a.end <= b.start || b.end <= a.start { + #[allow(clippy::if_same_then_else)] // for better readability + if a.is_empty() { + (0..0, 0..0) + } else if b.is_empty() { + (a, 0..0) + } else if a.end <= b.start || b.end <= a.start { // a: [ ) // b: [ ) // or @@ -233,31 +296,129 @@ fn range_except(a: Range, b: Range) -> (Range, Range } } +/// Calculate the difference of two ranges A and B, return (removed ranges, added ranges). +/// Note this is quite different from [`range_except`]. +#[allow(clippy::type_complexity)] // looks complex but it's not +fn range_diff( + a: Range, + b: Range, +) -> (SmallVec<[Range; 2]>, SmallVec<[Range; 2]>) { + if a.start == b.start { + match a.end.cmp(&b.end) { + std::cmp::Ordering::Equal => { + // a: [ ) + // b: [ ) + (smallvec![], smallvec![]) + } + std::cmp::Ordering::Less => { + // a: [ ) + // b: [ ) + (smallvec![], smallvec![a.end..b.end]) + } + std::cmp::Ordering::Greater => { + // a: [ ) + // b: [ ) + (smallvec![b.end..a.end], smallvec![]) + } + } + } else if a.end == b.end { + debug_assert!(a.start != b.start); + if a.start < b.start { + // a: [ ) + // b: [ ) + (smallvec![a.start..b.start], smallvec![]) + } else { + // a: [ ) + // b: [ ) + (smallvec![], smallvec![b.start..a.start]) + } + } else { + debug_assert!(a.start != b.start && a.end != b.end); + if a.end <= b.start || b.end <= a.start { + // a: [ ) + // b: [ [ ) + // or + // a: [ ) + // b: [ ) ) + (smallvec![a], smallvec![b]) + } else if b.start < a.start && a.end < b.end { + // a: [ ) + // b: [ ) + (smallvec![], smallvec![b.start..a.start, a.end..b.end]) + } else if a.start < b.start && b.end < a.end { + // a: [ ) + // b: [ ) + (smallvec![a.start..b.start, b.end..a.end], smallvec![]) + } else if a.end < b.end { + // a: [ ) + // b: [ ) + (smallvec![a.start..b.start], smallvec![a.end..b.end]) + } else { + // a: [ ) + // b: [ ) + (smallvec![b.end..a.end], smallvec![b.start..a.start]) + } + } +} + #[cfg(test)] mod tests { + use std::collections::HashSet; + use itertools::Itertools; use super::*; use crate::window_function::{Frame, FrameBound}; + #[test] + fn test_range_diff() { + fn test( + a: Range, + b: Range, + expected_removed: impl IntoIterator, + expected_added: impl IntoIterator, + ) { + let (removed, added) = range_diff(a, b); + let removed_set = removed.into_iter().flatten().collect::>(); + let added_set = added.into_iter().flatten().collect::>(); + let expected_removed_set = expected_removed.into_iter().collect::>(); + let expected_added_set = expected_added.into_iter().collect::>(); + assert_eq!(removed_set, expected_removed_set); + assert_eq!(added_set, expected_added_set); + } + + test(0..0, 0..0, [], []); + test(0..1, 0..1, [], []); + test(0..1, 0..2, [], [1]); + test(0..2, 0..1, [1], []); + test(0..2, 1..2, [0], []); + test(1..2, 0..2, [], [0]); + test(0..1, 1..2, [0], [1]); + test(0..1, 2..3, [0], [2]); + test(1..2, 0..1, [1], [0]); + test(2..3, 0..1, [2], [0]); + test(0..3, 1..2, [0, 2], []); + test(1..2, 0..3, [], [0, 2]); + test(0..3, 2..4, [0, 1], [3]); + test(2..4, 0..3, [3], [0, 1]); + } + #[test] fn test_rows_frame_unbounded_preceding_to_current_row() { - let mut buffer = WindowBuffer::new(Frame::rows( - FrameBound::UnboundedPreceding, - FrameBound::CurrentRow, - )); + let mut buffer = WindowBuffer::new( + Frame::rows(FrameBound::UnboundedPreceding, FrameBound::CurrentRow), + true, + ); let window = buffer.curr_window(); assert!(window.key.is_none()); assert!(!window.preceding_saturated); assert!(!window.following_saturated); - buffer.append(1, "hello"); let window = buffer.curr_window(); assert_eq!(window.key, Some(&1)); assert!(!window.preceding_saturated); // unbounded preceding is never saturated assert!(window.following_saturated); - buffer.append(2, "world"); let window = buffer.curr_window(); assert_eq!(window.key, Some(&1)); @@ -267,7 +428,6 @@ mod tests { buffer.curr_window_values().cloned().collect_vec(), vec!["hello"] ); - let removed_keys = buffer.slide().map(|(k, _)| k).collect_vec(); assert!(removed_keys.is_empty()); // unbouded preceding, nothing can ever be removed let window = buffer.curr_window(); @@ -279,16 +439,15 @@ mod tests { #[test] fn test_rows_frame_preceding_to_current_row() { - let mut buffer = WindowBuffer::new(Frame::rows( - FrameBound::Preceding(1), - FrameBound::CurrentRow, - )); + let mut buffer = WindowBuffer::new( + Frame::rows(FrameBound::Preceding(1), FrameBound::CurrentRow), + true, + ); let window = buffer.curr_window(); assert!(window.key.is_none()); assert!(!window.preceding_saturated); assert!(!window.following_saturated); - buffer.append(1, "hello"); let window = buffer.curr_window(); assert_eq!(window.key, Some(&1)); @@ -298,13 +457,11 @@ mod tests { buffer.curr_window_values().cloned().collect_vec(), vec!["hello"] ); - buffer.append(2, "world"); let window = buffer.curr_window(); assert_eq!(window.key, Some(&1)); assert!(!window.preceding_saturated); assert!(window.following_saturated); - let removed_keys = buffer.slide().map(|(k, _)| k).collect_vec(); assert!(removed_keys.is_empty()); let window = buffer.curr_window(); @@ -312,7 +469,6 @@ mod tests { assert!(window.preceding_saturated); assert!(window.following_saturated); assert_eq!(buffer.smallest_key(), Some(&1)); - buffer.append(3, "!"); let window = buffer.curr_window(); assert_eq!(window.key, Some(&2)); @@ -322,10 +478,10 @@ mod tests { #[test] fn test_rows_frame_preceding_to_preceding() { - let mut buffer = WindowBuffer::new(Frame::rows( - FrameBound::Preceding(2), - FrameBound::Preceding(1), - )); + let mut buffer = WindowBuffer::new( + Frame::rows(FrameBound::Preceding(2), FrameBound::Preceding(1)), + true, + ); buffer.append(1, "RisingWave"); let window = buffer.curr_window(); @@ -333,11 +489,9 @@ mod tests { assert!(!window.preceding_saturated); assert!(window.following_saturated); assert!(buffer.curr_window_values().collect_vec().is_empty()); - let removed_keys = buffer.slide().map(|(k, _)| k).collect_vec(); assert!(removed_keys.is_empty()); assert_eq!(buffer.smallest_key(), Some(&1)); - buffer.append(2, "is the best"); let window = buffer.curr_window(); assert_eq!(window.key, Some(&2)); @@ -347,11 +501,9 @@ mod tests { buffer.curr_window_values().cloned().collect_vec(), vec!["RisingWave"] ); - let removed_keys = buffer.slide().map(|(k, _)| k).collect_vec(); assert!(removed_keys.is_empty()); assert_eq!(buffer.smallest_key(), Some(&1)); - buffer.append(3, "streaming platform"); let window = buffer.curr_window(); assert_eq!(window.key, Some(&3)); @@ -361,7 +513,6 @@ mod tests { buffer.curr_window_values().cloned().collect_vec(), vec!["RisingWave", "is the best"] ); - let removed_keys = buffer.slide().map(|(k, _)| k).collect_vec(); assert_eq!(removed_keys, vec![1]); assert_eq!(buffer.smallest_key(), Some(&2)); @@ -369,10 +520,10 @@ mod tests { #[test] fn test_rows_frame_current_row_to_unbounded_following() { - let mut buffer = WindowBuffer::new(Frame::rows( - FrameBound::CurrentRow, - FrameBound::UnboundedFollowing, - )); + let mut buffer = WindowBuffer::new( + Frame::rows(FrameBound::CurrentRow, FrameBound::UnboundedFollowing), + true, + ); buffer.append(1, "RisingWave"); let window = buffer.curr_window(); @@ -383,7 +534,6 @@ mod tests { buffer.curr_window_values().cloned().collect_vec(), vec!["RisingWave"] ); - buffer.append(2, "is the best"); let window = buffer.curr_window(); assert_eq!(window.key, Some(&1)); @@ -393,7 +543,6 @@ mod tests { buffer.curr_window_values().cloned().collect_vec(), vec!["RisingWave", "is the best"] ); - let removed_keys = buffer.slide().map(|(k, _)| k).collect_vec(); assert_eq!(removed_keys, vec![1]); assert_eq!(buffer.smallest_key(), Some(&2)); @@ -409,10 +558,10 @@ mod tests { #[test] fn test_rows_frame_current_row_to_following() { - let mut buffer = WindowBuffer::new(Frame::rows( - FrameBound::CurrentRow, - FrameBound::Following(1), - )); + let mut buffer = WindowBuffer::new( + Frame::rows(FrameBound::CurrentRow, FrameBound::Following(1)), + true, + ); buffer.append(1, "RisingWave"); let window = buffer.curr_window(); @@ -423,7 +572,6 @@ mod tests { buffer.curr_window_values().cloned().collect_vec(), vec!["RisingWave"] ); - buffer.append(2, "is the best"); let window = buffer.curr_window(); assert_eq!(window.key, Some(&1)); @@ -433,7 +581,6 @@ mod tests { buffer.curr_window_values().cloned().collect_vec(), vec!["RisingWave", "is the best"] ); - buffer.append(3, "streaming platform"); let window = buffer.curr_window(); assert_eq!(window.key, Some(&1)); @@ -443,7 +590,6 @@ mod tests { buffer.curr_window_values().cloned().collect_vec(), vec!["RisingWave", "is the best"] ); - let removed_keys = buffer.slide().map(|(k, _)| k).collect_vec(); assert_eq!(removed_keys, vec![1]); let window = buffer.curr_window(); @@ -458,10 +604,10 @@ mod tests { #[test] fn test_rows_frame_following_to_following() { - let mut buffer = WindowBuffer::new(Frame::rows( - FrameBound::Following(1), - FrameBound::Following(2), - )); + let mut buffer = WindowBuffer::new( + Frame::rows(FrameBound::Following(1), FrameBound::Following(2)), + true, + ); buffer.append(1, "RisingWave"); let window = buffer.curr_window(); @@ -469,7 +615,6 @@ mod tests { assert!(window.preceding_saturated); assert!(!window.following_saturated); assert!(buffer.curr_window_values().collect_vec().is_empty()); - buffer.append(2, "is the best"); let window = buffer.curr_window(); assert_eq!(window.key, Some(&1)); @@ -479,7 +624,6 @@ mod tests { buffer.curr_window_values().cloned().collect_vec(), vec!["is the best"] ); - buffer.append(3, "streaming platform"); let window = buffer.curr_window(); assert_eq!(window.key, Some(&1)); @@ -489,7 +633,6 @@ mod tests { buffer.curr_window_values().cloned().collect_vec(), vec!["is the best", "streaming platform"] ); - let removed_keys = buffer.slide().map(|(k, _)| k).collect_vec(); assert_eq!(removed_keys, vec![1]); let window = buffer.curr_window(); @@ -504,11 +647,14 @@ mod tests { #[test] fn test_rows_frame_exclude_current_row() { - let mut buffer = WindowBuffer::new(Frame::rows_with_exclusion( - FrameBound::UnboundedPreceding, - FrameBound::CurrentRow, - FrameExclusion::CurrentRow, - )); + let mut buffer = WindowBuffer::new( + Frame::rows_with_exclusion( + FrameBound::UnboundedPreceding, + FrameBound::CurrentRow, + FrameExclusion::CurrentRow, + ), + false, + ); buffer.append(1, "hello"); assert!(buffer @@ -516,7 +662,6 @@ mod tests { .cloned() .collect_vec() .is_empty()); - buffer.append(2, "world"); let _ = buffer.slide(); assert_eq!( diff --git a/src/expr/core/src/window_function/state/mod.rs b/src/expr/core/src/window_function/state/mod.rs index 971fb97f66cdc..927f5aaf6e0c0 100644 --- a/src/expr/core/src/window_function/state/mod.rs +++ b/src/expr/core/src/window_function/state/mod.rs @@ -101,11 +101,11 @@ pub trait WindowState: EstimateSize { /// Get the current window frame position. fn curr_window(&self) -> StatePos<'_>; - /// Get the window function result of current window frame. - fn curr_output(&self) -> Result; + /// Slide the window frame forward and collect the output and evict hint. Similar to `Iterator::next`. + fn slide(&mut self) -> Result<(Datum, StateEvictHint)>; - /// Slide the window frame forward. - fn slide_forward(&mut self) -> StateEvictHint; + /// Slide the window frame forward and collect the evict hint. Don't calculate the output if possible. + fn slide_no_output(&mut self) -> Result; } pub fn create_window_state(call: &WindowFuncCall) -> Result> { diff --git a/src/expr/core/src/window_function/state/row_number.rs b/src/expr/core/src/window_function/state/row_number.rs index fd485292c9382..6a2759d69308c 100644 --- a/src/expr/core/src/window_function/state/row_number.rs +++ b/src/expr/core/src/window_function/state/row_number.rs @@ -36,6 +36,19 @@ impl RowNumberState { curr_row_number: 1, } } + + fn slide_inner(&mut self) -> StateEvictHint { + self.curr_row_number += 1; + self.buffer + .pop_front() + .expect("should not slide forward when the current window is not ready"); + // can't evict any state key in EOWC mode, because we can't recover from previous output now + StateEvictHint::CannotEvict( + self.first_key + .clone() + .expect("should have appended some rows"), + ) + } } impl WindowState for RowNumberState { @@ -54,25 +67,18 @@ impl WindowState for RowNumberState { } } - fn curr_output(&self) -> Result { - if self.curr_window().is_ready { - Ok(Some(self.curr_row_number.into())) + fn slide(&mut self) -> Result<(Datum, StateEvictHint)> { + let output = if self.curr_window().is_ready { + Some(self.curr_row_number.into()) } else { - Ok(None) - } + None + }; + let evict_hint = self.slide_inner(); + Ok((output, evict_hint)) } - fn slide_forward(&mut self) -> StateEvictHint { - self.curr_row_number += 1; - self.buffer - .pop_front() - .expect("should not slide forward when the current window is not ready"); - // can't evict any state key in EOWC mode, because we can't recover from previous output now - StateEvictHint::CannotEvict( - self.first_key - .clone() - .expect("should have appended some rows"), - ) + fn slide_no_output(&mut self) -> Result { + Ok(self.slide_inner()) } } @@ -92,6 +98,24 @@ mod tests { } } + #[test] + #[should_panic(expected = "should not slide forward when the current window is not ready")] + fn test_row_number_state_bad_use() { + let call = WindowFuncCall { + kind: WindowFuncKind::RowNumber, + args: AggArgs::None, + return_type: DataType::Int64, + frame: Frame::rows( + FrameBound::UnboundedPreceding, + FrameBound::UnboundedFollowing, + ), + }; + let mut state = RowNumberState::new(&call); + assert!(state.curr_window().key.is_none()); + assert!(!state.curr_window().is_ready); + _ = state.slide() + } + #[test] fn test_row_number_state() { let call = WindowFuncCall { @@ -106,24 +130,23 @@ mod tests { let mut state = RowNumberState::new(&call); assert!(state.curr_window().key.is_none()); assert!(!state.curr_window().is_ready); - assert!(state.curr_output().unwrap().is_none()); state.append(create_state_key(100), SmallVec::new()); assert_eq!(state.curr_window().key.unwrap(), &create_state_key(100)); assert!(state.curr_window().is_ready); - assert_eq!(state.curr_output().unwrap().unwrap(), 1i64.into()); - state.append(create_state_key(103), SmallVec::new()); - state.append(create_state_key(102), SmallVec::new()); - assert_eq!(state.curr_window().key.unwrap(), &create_state_key(100)); - let evict_hint = state.slide_forward(); + let (output, evict_hint) = state.slide().unwrap(); + assert_eq!(output.unwrap(), 1i64.into()); match evict_hint { StateEvictHint::CannotEvict(state_key) => { assert_eq!(state_key, create_state_key(100)); } _ => unreachable!(), } + assert!(!state.curr_window().is_ready); + state.append(create_state_key(103), SmallVec::new()); + state.append(create_state_key(102), SmallVec::new()); assert_eq!(state.curr_window().key.unwrap(), &create_state_key(103)); - assert_eq!(state.curr_output().unwrap().unwrap(), 2i64.into()); - let evict_hint = state.slide_forward(); + let (output, evict_hint) = state.slide().unwrap(); + assert_eq!(output.unwrap(), 2i64.into()); match evict_hint { StateEvictHint::CannotEvict(state_key) => { assert_eq!(state_key, create_state_key(100)); @@ -131,6 +154,7 @@ mod tests { _ => unreachable!(), } assert_eq!(state.curr_window().key.unwrap(), &create_state_key(102)); - assert_eq!(state.curr_output().unwrap().unwrap(), 3i64.into()); + let (output, _) = state.slide().unwrap(); + assert_eq!(output.unwrap(), 3i64.into()); } } diff --git a/src/expr/core/src/window_function/states.rs b/src/expr/core/src/window_function/states.rs index d039eb323e101..3a506d165c356 100644 --- a/src/expr/core/src/window_function/states.rs +++ b/src/expr/core/src/window_function/states.rs @@ -48,28 +48,43 @@ impl WindowStates { self.0.iter().all(|state| state.curr_window().is_ready) } - /// Get the current output of all windows. - pub fn curr_output(&self) -> Result> { + /// Slide all windows forward and collect the output and evict hints. + pub fn slide(&mut self) -> Result<(Vec, StateEvictHint)> { debug_assert!(self.are_aligned()); - self.0.iter().map(|state| state.curr_output()).try_collect() + let mut output = Vec::with_capacity(self.0.len()); + let mut evict_hint: Option = None; + for state in &mut self.0 { + let (x_output, x_evict) = state.slide()?; + output.push(x_output); + evict_hint = match evict_hint { + Some(evict_hint) => Some(evict_hint.merge(x_evict)), + None => Some(x_evict), + }; + } + Ok(( + output, + evict_hint.expect("# of evict hints = # of window states"), + )) } - /// Slide all windows forward and collect the evict hints. - pub fn slide_forward(&mut self) -> StateEvictHint { + /// Slide all windows forward and collect the output, ignoring the evict hints. + pub fn slide_no_evict_hint(&mut self) -> Result> { debug_assert!(self.are_aligned()); - self.0 - .iter_mut() - .map(|state| state.slide_forward()) - .reduce(StateEvictHint::merge) - .expect("# of evict hints = # of window states") + let mut output = Vec::with_capacity(self.0.len()); + for state in &mut self.0 { + let (x_output, _) = state.slide()?; + output.push(x_output); + } + Ok(output) } - /// Slide all windows forward, ignoring the evict hints. - pub fn just_slide_forward(&mut self) { + /// Slide all windows forward, ignoring the output and evict hints. + pub fn just_slide(&mut self) -> Result<()> { debug_assert!(self.are_aligned()); - self.0 - .iter_mut() - .for_each(|state| _ = state.slide_forward()); + for state in &mut self.0 { + state.slide_no_output()?; + } + Ok(()) } } diff --git a/src/stream/src/executor/over_window/eowc.rs b/src/stream/src/executor/over_window/eowc.rs index b5da45edd47e5..fa20e3b49d970 100644 --- a/src/stream/src/executor/over_window/eowc.rs +++ b/src/stream/src/executor/over_window/eowc.rs @@ -241,7 +241,7 @@ impl EowcOverWindowExecutor { // Ignore ready windows (all ready windows were outputted before). while partition.states.are_ready() { - partition.states.just_slide_forward(); + partition.states.just_slide()?; partition.curr_row_buffer.pop_front(); } @@ -276,7 +276,8 @@ impl EowcOverWindowExecutor { &encoded_partition_key, ) .await?; - let mut partition = vars.partitions.get_mut(&encoded_partition_key).unwrap(); + let partition: &mut Partition = + &mut vars.partitions.get_mut(&encoded_partition_key).unwrap(); // Materialize input to state table. this.state_table.insert(input_row); @@ -314,7 +315,7 @@ impl EowcOverWindowExecutor { // The partition is ready to output, so we can produce a row. // Get all outputs. - let ret_values = partition.states.curr_output()?; + let (ret_values, evict_hint) = partition.states.slide()?; let curr_row = partition .curr_row_buffer .pop_front() @@ -330,7 +331,6 @@ impl EowcOverWindowExecutor { } // Evict unneeded rows from state table. - let evict_hint = partition.states.slide_forward(); if let StateEvictHint::CanEvict(keys_to_evict) = evict_hint { for key in keys_to_evict { let order_key = memcmp_encoding::decode_row( diff --git a/src/stream/src/executor/over_window/general.rs b/src/stream/src/executor/over_window/general.rs index c9717f9defe61..38c959039bf56 100644 --- a/src/stream/src/executor/over_window/general.rs +++ b/src/stream/src/executor/over_window/general.rs @@ -512,7 +512,7 @@ impl OverWindowExecutor { // Slide to the first affected key. We can safely compare to `Some(first_curr_key)` here // because it must exist in the states, by the definition of affected range. while states.curr_key() != Some(first_curr_key.as_normal_expect()) { - states.just_slide_forward(); + states.just_slide()?; } let mut curr_key_cursor = part_with_delta.find(first_curr_key).unwrap(); assert_eq!( @@ -525,7 +525,7 @@ impl OverWindowExecutor { let (key, row) = curr_key_cursor .key_value() .expect("cursor must be valid until `last_curr_key`"); - let output = states.curr_output()?; + let output = states.slide_no_evict_hint()?; let new_row = OwnedRow::new( row.as_inner() .iter() @@ -554,7 +554,6 @@ impl OverWindowExecutor { } } - states.just_slide_forward(); curr_key_cursor.move_next(); key != last_curr_key diff --git a/src/stream/tests/integration_tests/eowc_over_window.rs b/src/stream/tests/integration_tests/eowc_over_window.rs index 9407b6013dc03..7334654d8dd50 100644 --- a/src/stream/tests/integration_tests/eowc_over_window.rs +++ b/src/stream/tests/integration_tests/eowc_over_window.rs @@ -186,13 +186,17 @@ async fn test_over_window_aggregate() { check_with_script( || create_executor(calls.clone(), store.clone()), r###" -- !barrier 1 -- !chunk |2 - I T I i - + 1 p1 100 10 - + 1 p1 101 16 - + 4 p1 102 20 -"###, + - !barrier 1 + - !chunk |2 + I T I i + + 1 p1 100 10 + + 1 p1 101 16 + + 4 p1 102 20 + - !chunk |2 + I T I i + + 2 p1 103 30 + + 6 p1 104 11 + "###, expect![[r#" - input: !barrier 1 output: @@ -209,6 +213,17 @@ async fn test_over_window_aggregate() { | + | 1 | p1 | 100 | 10 | 26 | | + | 1 | p1 | 101 | 16 | 46 | +---+---+----+-----+----+----+ + - input: !chunk |- + +---+---+----+-----+----+ + | + | 2 | p1 | 103 | 30 | + | + | 6 | p1 | 104 | 11 | + +---+---+----+-----+----+ + output: + - !chunk |- + +---+---+----+-----+----+----+ + | + | 4 | p1 | 102 | 20 | 66 | + | + | 2 | p1 | 103 | 30 | 61 | + +---+---+----+-----+----+----+ "#]], SnapshotOptions::default(), ) From 3626154b9c77c1cbbe4ac478ec8e399d6c31c184 Mon Sep 17 00:00:00 2001 From: xxchan Date: Tue, 31 Oct 2023 14:32:58 +0800 Subject: [PATCH 5/7] chore: ignore let-chains fmt in git blame (#13165) --- .git-blame-ignore-revs | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/.git-blame-ignore-revs b/.git-blame-ignore-revs index c3a80429ee84d..6efd862273624 100644 --- a/.git-blame-ignore-revs +++ b/.git-blame-ignore-revs @@ -35,4 +35,7 @@ f8266748dcb70541da944664552c1944ff8362e4 f2a3fd021059e680b35b24c63cff5f8dbe9f9d5f # chore(rustfmt): format let-chains and let-else #9409 -d70dba827c303373f3220c9733f7c7443e5c2d37 \ No newline at end of file +d70dba827c303373f3220c9733f7c7443e5c2d37 + +# chore: cargo +nightly fmt (#13162) (format let-chains) +c583e2c6c054764249acf484438c7bf7197765f4 From 27ea58e55f2de3646b4c475be81c0abdf00d3817 Mon Sep 17 00:00:00 2001 From: Yingjun Wu Date: Mon, 30 Oct 2023 23:54:46 -0700 Subject: [PATCH 6/7] doc: Update README.md (#13167) Co-authored-by: hengm3467 <100685635+hengm3467@users.noreply.github.com> --- README.md | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/README.md b/README.md index c2f26ebc1a29b..326145c8f67aa 100644 --- a/README.md +++ b/README.md @@ -47,16 +47,16 @@ Docker - Build status + X - codecov + YouTube From dc31d8b43959bda8ebaa3c6859772a291d0f4492 Mon Sep 17 00:00:00 2001 From: Shanicky Chen Date: Tue, 31 Oct 2023 15:32:02 +0800 Subject: [PATCH 7/7] feat: Mutual conversion between `PbFragment` and `fragment::Model` (#13154) Signed-off-by: Shanicky Chen --- .../migration/src/m20230908_072257_init.rs | 10 +- src/meta/model_v2/src/actor.rs | 19 +- src/meta/model_v2/src/fragment.rs | 50 +- src/meta/model_v2/src/lib.rs | 35 +- src/meta/src/controller/catalog.rs | 8 +- src/meta/src/controller/fragment.rs | 727 ++++++++++++++++++ src/meta/src/controller/mod.rs | 1 + 7 files changed, 817 insertions(+), 33 deletions(-) create mode 100644 src/meta/src/controller/fragment.rs diff --git a/src/meta/model_v2/migration/src/m20230908_072257_init.rs b/src/meta/model_v2/migration/src/m20230908_072257_init.rs index 59a30149a1158..e2aaa0da59d51 100644 --- a/src/meta/model_v2/migration/src/m20230908_072257_init.rs +++ b/src/meta/model_v2/migration/src/m20230908_072257_init.rs @@ -334,9 +334,6 @@ impl MigrationTrait for Migration { .col(ColumnDef::new(Fragment::VnodeMapping).json()) .col(ColumnDef::new(Fragment::StateTableIds).json()) .col(ColumnDef::new(Fragment::UpstreamFragmentId).json()) - .col(ColumnDef::new(Fragment::DispatcherType).string()) - .col(ColumnDef::new(Fragment::DistKeyIndices).json()) - .col(ColumnDef::new(Fragment::OutputIndices).json()) .foreign_key( &mut ForeignKey::create() .name("FK_fragment_table_id") @@ -359,11 +356,11 @@ impl MigrationTrait for Migration { .auto_increment(), ) .col(ColumnDef::new(Actor::FragmentId).integer().not_null()) - .col(ColumnDef::new(Actor::Status).string()) + .col(ColumnDef::new(Actor::Status).string().not_null()) .col(ColumnDef::new(Actor::Splits).json()) .col(ColumnDef::new(Actor::ParallelUnitId).integer().not_null()) .col(ColumnDef::new(Actor::UpstreamActorIds).json()) - .col(ColumnDef::new(Actor::Dispatchers).json()) + .col(ColumnDef::new(Actor::Dispatchers).json().not_null()) .col(ColumnDef::new(Actor::VnodeBitmap).string()) .foreign_key( &mut ForeignKey::create() @@ -842,9 +839,6 @@ enum Fragment { VnodeMapping, StateTableIds, UpstreamFragmentId, - DispatcherType, - DistKeyIndices, - OutputIndices, } #[derive(DeriveIden)] diff --git a/src/meta/model_v2/src/actor.rs b/src/meta/model_v2/src/actor.rs index 79a70e3f65e95..dd3ed244209ea 100644 --- a/src/meta/model_v2/src/actor.rs +++ b/src/meta/model_v2/src/actor.rs @@ -14,20 +14,23 @@ use sea_orm::entity::prelude::*; -use crate::I32Array; +use crate::{ + ActorId, ActorStatus, ActorUpstreamActors, ConnectorSplits, Dispatchers, FragmentId, + VnodeBitmap, +}; #[derive(Clone, Debug, PartialEq, DeriveEntityModel, Eq)] #[sea_orm(table_name = "actor")] pub struct Model { #[sea_orm(primary_key)] - pub actor_id: i32, - pub fragment_id: i32, - pub status: Option, - pub splits: Option, + pub actor_id: ActorId, + pub fragment_id: FragmentId, + pub status: ActorStatus, + pub splits: Option, pub parallel_unit_id: i32, - pub upstream_actor_ids: Option, - pub dispatchers: Option, - pub vnode_bitmap: Option, + pub upstream_actor_ids: ActorUpstreamActors, + pub dispatchers: Dispatchers, + pub vnode_bitmap: Option, } #[derive(Copy, Clone, Debug, EnumIter, DeriveRelation)] diff --git a/src/meta/model_v2/src/fragment.rs b/src/meta/model_v2/src/fragment.rs index c590a58da771e..9d6f618ae7217 100644 --- a/src/meta/model_v2/src/fragment.rs +++ b/src/meta/model_v2/src/fragment.rs @@ -12,25 +12,51 @@ // See the License for the specific language governing permissions and // limitations under the License. +use risingwave_pb::meta::table_fragments::fragment::PbFragmentDistributionType; use sea_orm::entity::prelude::*; -use crate::I32Array; +use crate::{FragmentId, FragmentVnodeMapping, StreamNode, TableId, U32Array}; #[derive(Clone, Debug, PartialEq, DeriveEntityModel, Eq)] #[sea_orm(table_name = "fragment")] pub struct Model { #[sea_orm(primary_key)] - pub fragment_id: i32, - pub table_id: i32, - pub fragment_type_mask: i32, - pub distribution_type: String, - pub stream_node: Json, - pub vnode_mapping: Option, - pub state_table_ids: Option, - pub upstream_fragment_id: Option, - pub dispatcher_type: Option, - pub dist_key_indices: Option, - pub output_indices: Option, + pub fragment_id: FragmentId, + pub table_id: TableId, + pub fragment_type_mask: u32, + pub distribution_type: DistributionType, + pub stream_node: StreamNode, + pub vnode_mapping: Option, + pub state_table_ids: U32Array, + pub upstream_fragment_id: U32Array, +} + +#[derive(Clone, Debug, PartialEq, Eq, EnumIter, DeriveActiveEnum)] +#[sea_orm(rs_type = "String", db_type = "String(None)")] +pub enum DistributionType { + #[sea_orm(string_value = "SINGLE")] + Single, + #[sea_orm(string_value = "HASH")] + Hash, +} + +impl From for PbFragmentDistributionType { + fn from(val: DistributionType) -> Self { + match val { + DistributionType::Single => PbFragmentDistributionType::Single, + DistributionType::Hash => PbFragmentDistributionType::Hash, + } + } +} + +impl From for DistributionType { + fn from(val: PbFragmentDistributionType) -> Self { + match val { + PbFragmentDistributionType::Unspecified => unreachable!(), + PbFragmentDistributionType::Single => DistributionType::Single, + PbFragmentDistributionType::Hash => DistributionType::Hash, + } + } } #[derive(Copy, Clone, Debug, EnumIter, DeriveRelation)] diff --git a/src/meta/model_v2/src/lib.rs b/src/meta/model_v2/src/lib.rs index 5fe23bcaa280c..5b593300d51c1 100644 --- a/src/meta/model_v2/src/lib.rs +++ b/src/meta/model_v2/src/lib.rs @@ -12,7 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. -use std::collections::HashMap; +use std::collections::{BTreeMap, HashMap}; use risingwave_pb::catalog::{PbCreateType, PbStreamJobStatus}; use sea_orm::{DeriveActiveEnum, EnumIter, FromJsonQueryResult}; @@ -62,6 +62,10 @@ pub type FunctionId = ObjectId; pub type ConnectionId = ObjectId; pub type UserId = u32; +pub type FragmentId = u32; + +pub type ActorId = u32; + #[derive(Clone, Debug, PartialEq, Eq, EnumIter, DeriveActiveEnum)] #[sea_orm(rs_type = "String", db_type = "String(None)")] pub enum JobStatus { @@ -104,10 +108,24 @@ macro_rules! derive_from_json_struct { #[derive(Clone, Debug, PartialEq, FromJsonQueryResult, Serialize, Deserialize, Default)] pub struct $struct_name(pub $field_type); impl Eq for $struct_name {} + + impl $struct_name { + pub fn into_inner(self) -> $field_type { + self.0 + } + + pub fn inner_ref(&self) -> &$field_type { + &self.0 + } + } }; } derive_from_json_struct!(I32Array, Vec); +derive_from_json_struct!(U32Array, Vec); + +derive_from_json_struct!(ActorUpstreamActors, BTreeMap>); + derive_from_json_struct!(DataType, risingwave_pb::data::DataType); derive_from_json_struct!(DataTypeArray, Vec); derive_from_json_struct!(FieldArray, Vec); @@ -132,3 +150,18 @@ derive_from_json_struct!( PrivateLinkService, risingwave_pb::catalog::connection::PbPrivateLinkService ); + +derive_from_json_struct!(StreamNode, risingwave_pb::stream_plan::PbStreamNode); +derive_from_json_struct!(Dispatchers, Vec); + +derive_from_json_struct!(ConnectorSplits, risingwave_pb::source::ConnectorSplits); +derive_from_json_struct!( + ActorStatus, + risingwave_pb::meta::table_fragments::PbActorStatus +); +derive_from_json_struct!(VnodeBitmap, risingwave_pb::common::Buffer); + +derive_from_json_struct!( + FragmentVnodeMapping, + risingwave_pb::common::ParallelUnitMapping +); diff --git a/src/meta/src/controller/catalog.rs b/src/meta/src/controller/catalog.rs index 9cf2ac987cbc9..0abfd5f4b354f 100644 --- a/src/meta/src/controller/catalog.rs +++ b/src/meta/src/controller/catalog.rs @@ -54,8 +54,8 @@ use crate::{MetaError, MetaResult}; /// `CatalogController` is the controller for catalog related operations, including database, schema, table, view, etc. pub struct CatalogController { - env: MetaSrvEnv, - inner: RwLock, + pub(crate) env: MetaSrvEnv, + pub(crate) inner: RwLock, } #[derive(Clone, Default)] @@ -79,8 +79,8 @@ impl CatalogController { } } -struct CatalogControllerInner { - db: DatabaseConnection, +pub(crate) struct CatalogControllerInner { + pub(crate) db: DatabaseConnection, } impl CatalogController { diff --git a/src/meta/src/controller/fragment.rs b/src/meta/src/controller/fragment.rs new file mode 100644 index 0000000000000..a3f8d3f7dc6c2 --- /dev/null +++ b/src/meta/src/controller/fragment.rs @@ -0,0 +1,727 @@ +// Copyright 2023 RisingWave Labs +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use std::collections::{BTreeMap, BTreeSet, HashMap}; +use std::mem::swap; + +use anyhow::Context; +use risingwave_common::bail; +use risingwave_common::util::stream_graph_visitor::visit_stream_node; +use risingwave_meta_model_v2::{ + actor, fragment, ActorStatus, ActorUpstreamActors, ConnectorSplits, Dispatchers, + FragmentVnodeMapping, StreamNode, TableId, U32Array, VnodeBitmap, +}; +use risingwave_pb::meta::table_fragments::fragment::PbFragmentDistributionType; +use risingwave_pb::meta::table_fragments::{PbActorStatus, PbFragment, PbState}; +use risingwave_pb::meta::PbTableFragments; +use risingwave_pb::source::PbConnectorSplits; +use risingwave_pb::stream_plan::stream_node::NodeBody; +use risingwave_pb::stream_plan::{PbStreamEnvironment, StreamActor}; + +use crate::controller::catalog::CatalogController; +use crate::MetaResult; + +impl CatalogController { + pub fn extract_fragment_and_actors_from_table_fragments( + PbTableFragments { + table_id, + fragments, + actor_status, + actor_splits, + .. + }: PbTableFragments, + ) -> MetaResult)>> { + let mut result = vec![]; + + let fragments: BTreeMap<_, _> = fragments.into_iter().collect(); + + for (_, fragment) in fragments { + let (fragment, actors) = Self::extract_fragment_and_actors( + table_id, + fragment, + &actor_status, + &actor_splits, + )?; + + result.push((fragment, actors)); + } + + Ok(result) + } + + pub fn extract_fragment_and_actors( + table_id: TableId, + pb_fragment: PbFragment, + pb_actor_status: &HashMap, + pb_actor_splits: &HashMap, + ) -> MetaResult<(fragment::Model, Vec)> { + let PbFragment { + fragment_id: pb_fragment_id, + fragment_type_mask: pb_fragment_type_mask, + distribution_type: pb_distribution_type, + actors: pb_actors, + vnode_mapping: pb_vnode_mapping, + state_table_ids: pb_state_table_ids, + upstream_fragment_ids: pb_upstream_fragment_ids, + } = pb_fragment; + + let state_table_ids = U32Array(pb_state_table_ids); + + assert!(!pb_actors.is_empty()); + + let stream_node = { + let actor_template = pb_actors.first().cloned().unwrap(); + let mut stream_node = actor_template.nodes.unwrap(); + visit_stream_node(&mut stream_node, |body| { + if let NodeBody::Merge(m) = body { + m.upstream_actor_id = vec![]; + } + }); + + stream_node + }; + + let mut actors = vec![]; + + for mut actor in pb_actors { + let mut upstream_actors = BTreeMap::new(); + + let node = actor.nodes.as_mut().context("nodes is empty")?; + + visit_stream_node(node, |body| { + if let NodeBody::Merge(m) = body { + let mut upstream_actor_ids = vec![]; + swap(&mut m.upstream_actor_id, &mut upstream_actor_ids); + assert!( + upstream_actors + .insert(m.upstream_fragment_id, upstream_actor_ids) + .is_none(), + "There should only be one link between two fragments" + ); + } + }); + + let StreamActor { + actor_id, + fragment_id, + nodes: _, + dispatcher: pb_dispatcher, + upstream_actor_id: pb_upstream_actor_id, + vnode_bitmap: pb_vnode_bitmap, + mview_definition: _, + } = actor; + + let splits = pb_actor_splits.get(&actor_id).cloned().map(ConnectorSplits); + let status = pb_actor_status.get(&actor_id).cloned().map(ActorStatus); + + let status = status.ok_or_else(|| { + anyhow::anyhow!( + "actor {} in fragment {} has no actor_status", + actor_id, + fragment_id + ) + })?; + + let parallel_unit_id = status + .inner_ref() + .parallel_unit + .as_ref() + .map(|parallel_unit| parallel_unit.id) + .expect("no parallel unit id found in actor_status") + as _; + + assert_eq!( + pb_upstream_actor_id + .iter() + .cloned() + .collect::>(), + upstream_actors + .values() + .flatten() + .cloned() + .collect::>() + ); + + actors.push(actor::Model { + actor_id, + fragment_id, + status, + splits, + parallel_unit_id, + upstream_actor_ids: ActorUpstreamActors(upstream_actors), + dispatchers: Dispatchers(pb_dispatcher), + vnode_bitmap: pb_vnode_bitmap.map(VnodeBitmap), + }); + } + + let upstream_fragment_id = U32Array(pb_upstream_fragment_ids); + + let vnode_mapping = pb_vnode_mapping.map(FragmentVnodeMapping); + + let stream_node = StreamNode(stream_node); + + let distribution_type = PbFragmentDistributionType::try_from(pb_distribution_type) + .unwrap() + .into(); + + let fragment = fragment::Model { + fragment_id: pb_fragment_id, + table_id, + fragment_type_mask: pb_fragment_type_mask, + distribution_type, + stream_node, + vnode_mapping, + state_table_ids, + upstream_fragment_id, + }; + + Ok((fragment, actors)) + } + + pub fn compose_table_fragments( + table_id: u32, + state: PbState, + env: Option, + fragments: Vec<(fragment::Model, Vec)>, + ) -> MetaResult { + let mut pb_fragments = HashMap::new(); + let mut pb_actor_splits = HashMap::new(); + let mut pb_actor_status = HashMap::new(); + + for (fragment, actors) in fragments { + let (fragment, fragment_actor_status, fragment_actor_splits) = + Self::compose_fragment(fragment, actors)?; + + pb_fragments.insert(fragment.fragment_id, fragment); + + pb_actor_splits.extend(fragment_actor_splits.into_iter()); + pb_actor_status.extend(fragment_actor_status.into_iter()); + } + + let table_fragments = PbTableFragments { + table_id, + state: state as _, + fragments: pb_fragments, + actor_status: pb_actor_status, + actor_splits: pb_actor_splits, + env, + }; + + Ok(table_fragments) + } + + #[allow(clippy::type_complexity)] + pub(crate) fn compose_fragment( + fragment: fragment::Model, + actors: Vec, + ) -> MetaResult<( + PbFragment, + HashMap, + HashMap, + )> { + let fragment::Model { + fragment_id, + table_id: _, + fragment_type_mask, + distribution_type, + stream_node, + vnode_mapping, + state_table_ids, + upstream_fragment_id, + } = fragment; + + let stream_node_template = stream_node.into_inner(); + + let mut pb_actors = vec![]; + + let mut pb_actor_status = HashMap::new(); + let mut pb_actor_splits = HashMap::new(); + + for actor in actors { + if actor.fragment_id != fragment_id { + bail!( + "fragment id {} from actor {} is different from fragment {}", + actor.fragment_id, + actor.actor_id, + fragment_id + ) + } + + let actor::Model { + actor_id, + fragment_id, + status, + splits, + upstream_actor_ids, + dispatchers, + vnode_bitmap, + .. + } = actor; + + let upstream_fragment_actors = upstream_actor_ids.into_inner(); + + let pb_nodes = { + let mut nodes = stream_node_template.clone(); + + visit_stream_node(&mut nodes, |body| { + if let NodeBody::Merge(m) = body + && let Some(upstream_actor_ids) = upstream_fragment_actors.get(&m.upstream_fragment_id) + { + m.upstream_actor_id = upstream_actor_ids.to_vec(); + } + }); + + Some(nodes) + }; + + let pb_vnode_bitmap = vnode_bitmap.map(|vnode_bitmap| vnode_bitmap.into_inner()); + + let pb_upstream_actor_id = upstream_fragment_actors + .values() + .flatten() + .cloned() + .collect(); + + let pb_dispatcher = dispatchers.into_inner(); + + pb_actor_status.insert(actor_id, status.into_inner()); + + if let Some(splits) = splits { + pb_actor_splits.insert(actor_id, splits.into_inner()); + } + + pb_actors.push(StreamActor { + actor_id, + fragment_id, + nodes: pb_nodes, + dispatcher: pb_dispatcher, + upstream_actor_id: pb_upstream_actor_id, + vnode_bitmap: pb_vnode_bitmap, + mview_definition: "".to_string(), + }) + } + + let pb_upstream_fragment_ids = upstream_fragment_id.into_inner().into_iter().collect(); + let pb_vnode_mapping = vnode_mapping.map(|mapping| mapping.into_inner()); + let pb_state_table_ids = state_table_ids.into_inner().into_iter().collect(); + let pb_distribution_type = PbFragmentDistributionType::from(distribution_type) as _; + let pb_fragment = PbFragment { + fragment_id, + fragment_type_mask: fragment_type_mask as _, + distribution_type: pb_distribution_type, + actors: pb_actors, + vnode_mapping: pb_vnode_mapping, + state_table_ids: pb_state_table_ids, + upstream_fragment_ids: pb_upstream_fragment_ids, + }; + + Ok((pb_fragment, pb_actor_status, pb_actor_splits)) + } +} + +#[cfg(test)] +mod tests { + use std::collections::{BTreeMap, HashMap}; + use std::default::Default; + + use itertools::Itertools; + use risingwave_common::hash::{ParallelUnitId, ParallelUnitMapping}; + use risingwave_common::util::iter_util::ZipEqDebug; + use risingwave_common::util::stream_graph_visitor::visit_stream_node; + use risingwave_meta_model_v2::fragment::DistributionType; + use risingwave_meta_model_v2::{ + actor, fragment, ActorStatus, ActorUpstreamActors, ConnectorSplits, Dispatchers, + FragmentVnodeMapping, StreamNode, U32Array, VnodeBitmap, + }; + use risingwave_pb::common::ParallelUnit; + use risingwave_pb::meta::table_fragments::fragment::PbFragmentDistributionType; + use risingwave_pb::meta::table_fragments::{PbActorStatus, PbFragment}; + use risingwave_pb::source::{PbConnectorSplit, PbConnectorSplits}; + use risingwave_pb::stream_plan::stream_node::{NodeBody, PbNodeBody}; + use risingwave_pb::stream_plan::{ + Dispatcher, MergeNode, PbDispatcher, PbDispatcherType, PbFragmentTypeFlag, PbStreamActor, + PbStreamNode, PbUnionNode, StreamActor, + }; + + use crate::controller::catalog::CatalogController; + use crate::manager::TableId; + use crate::model::{ActorId, FragmentId}; + use crate::MetaResult; + + const TEST_FRAGMENT_ID: FragmentId = 1; + + const TEST_UPSTREAM_FRAGMENT_ID: FragmentId = 2; + + const TEST_TABLE_ID: TableId = 1; + + const TEST_STATE_TABLE_ID: TableId = 1000; + + fn generate_parallel_units(count: u32) -> Vec { + (0..count) + .map(|parallel_unit_id| ParallelUnit { + id: parallel_unit_id, + ..Default::default() + }) + .collect_vec() + } + + fn generate_dispatchers_for_actor(actor_id: u32) -> Vec { + vec![PbDispatcher { + r#type: PbDispatcherType::Hash as _, + dispatcher_id: actor_id as u64, + downstream_actor_id: vec![actor_id], + ..Default::default() + }] + } + + fn generate_upstream_actor_ids_for_actor(actor_id: u32) -> BTreeMap> { + let mut upstream_actor_ids = BTreeMap::new(); + upstream_actor_ids.insert(TEST_UPSTREAM_FRAGMENT_ID, vec![(actor_id + 100) as ActorId]); + upstream_actor_ids.insert( + TEST_UPSTREAM_FRAGMENT_ID + 1, + vec![(actor_id + 200) as ActorId], + ); + upstream_actor_ids + } + + fn generate_merger_stream_node( + actor_upstream_actor_ids: &BTreeMap>, + ) -> PbStreamNode { + let mut input = vec![]; + for (upstream_fragment_id, upstream_actor_ids) in actor_upstream_actor_ids { + input.push(PbStreamNode { + node_body: Some(PbNodeBody::Merge(MergeNode { + upstream_actor_id: upstream_actor_ids.clone(), + upstream_fragment_id: *upstream_fragment_id, + ..Default::default() + })), + ..Default::default() + }); + } + + PbStreamNode { + input, + node_body: Some(PbNodeBody::Union(PbUnionNode {})), + ..Default::default() + } + } + + #[tokio::test] + async fn test_extract_fragment() -> MetaResult<()> { + let actor_count = 3u32; + let parallel_units = generate_parallel_units(actor_count); + let parallel_unit_mapping = ParallelUnitMapping::build(¶llel_units); + let actor_vnode_bitmaps = parallel_unit_mapping.to_bitmaps(); + + let upstream_actor_ids: HashMap>> = (0 + ..actor_count) + .map(|actor_id| (actor_id, generate_upstream_actor_ids_for_actor(actor_id))) + .collect(); + + let pb_actors = (0..actor_count) + .map(|actor_id| { + let actor_upstream_actor_ids = upstream_actor_ids.get(&actor_id).cloned().unwrap(); + let stream_node = generate_merger_stream_node(&actor_upstream_actor_ids); + + PbStreamActor { + actor_id: actor_id as _, + fragment_id: TEST_FRAGMENT_ID, + nodes: Some(stream_node), + dispatcher: generate_dispatchers_for_actor(actor_id), + upstream_actor_id: actor_upstream_actor_ids + .values() + .flatten() + .cloned() + .collect(), + vnode_bitmap: actor_vnode_bitmaps + .get(&actor_id) + .cloned() + .map(|bitmap| bitmap.to_protobuf()), + mview_definition: "".to_string(), + } + }) + .collect_vec(); + + let pb_fragment = PbFragment { + fragment_id: TEST_FRAGMENT_ID, + fragment_type_mask: PbFragmentTypeFlag::Source as _, + distribution_type: PbFragmentDistributionType::Hash as _, + actors: pb_actors.clone(), + vnode_mapping: Some(parallel_unit_mapping.to_protobuf()), + state_table_ids: vec![TEST_STATE_TABLE_ID], + upstream_fragment_ids: upstream_actor_ids + .values() + .flat_map(|m| m.keys()) + .cloned() + .collect(), + }; + + let pb_actor_status = (0..actor_count) + .map(|actor_id| { + ( + actor_id, + PbActorStatus { + parallel_unit: Some(parallel_units[actor_id as usize].clone()), + ..Default::default() + }, + ) + }) + .collect(); + + let pb_actor_splits = Default::default(); + + let (fragment, actors) = CatalogController::extract_fragment_and_actors( + TEST_TABLE_ID, + pb_fragment.clone(), + &pb_actor_status, + &pb_actor_splits, + )?; + + check_fragment_template(fragment.clone(), pb_actors.clone(), &upstream_actor_ids); + check_fragment(fragment, pb_fragment); + check_actors(actors, pb_actors, pb_actor_status, pb_actor_splits); + + Ok(()) + } + + fn check_fragment_template( + fragment: fragment::Model, + actors: Vec, + upstream_actor_ids: &HashMap>>, + ) { + let stream_node_template = fragment.stream_node.clone(); + + for PbStreamActor { + nodes, actor_id, .. + } in actors + { + let mut template_node = stream_node_template.clone().into_inner(); + let nodes = nodes.unwrap(); + let actor_upstream_actor_ids = upstream_actor_ids.get(&actor_id).cloned().unwrap(); + visit_stream_node(&mut template_node, |body| { + if let NodeBody::Merge(m) = body { + m.upstream_actor_id = actor_upstream_actor_ids + .get(&m.upstream_fragment_id) + .cloned() + .unwrap(); + } + }); + + assert_eq!(nodes, template_node); + } + } + + #[tokio::test] + async fn test_compose_fragment() -> MetaResult<()> { + let actor_count = 3u32; + let parallel_units = generate_parallel_units(actor_count); + let parallel_unit_mapping = ParallelUnitMapping::build(¶llel_units); + let mut actor_vnode_bitmaps = parallel_unit_mapping.to_bitmaps(); + + let upstream_actor_ids: HashMap>> = (0 + ..actor_count) + .map(|actor_id| (actor_id, generate_upstream_actor_ids_for_actor(actor_id))) + .collect(); + + let actors = (0..actor_count) + .map(|actor_id| { + let parallel_unit_id = actor_id as ParallelUnitId; + + let vnode_bitmap = actor_vnode_bitmaps + .remove(¶llel_unit_id) + .map(|m| VnodeBitmap(m.to_protobuf())); + + let actor_status = ActorStatus(PbActorStatus { + parallel_unit: Some(parallel_units[actor_id as usize].clone()), + ..Default::default() + }); + + let actor_splits = Some(ConnectorSplits(PbConnectorSplits { + splits: vec![PbConnectorSplit { + split_type: "dummy".to_string(), + ..Default::default() + }], + })); + + let actor_upstream_actor_ids = upstream_actor_ids.get(&actor_id).cloned().unwrap(); + let dispatchers = generate_dispatchers_for_actor(actor_id); + + actor::Model { + actor_id: actor_id as ActorId, + fragment_id: TEST_FRAGMENT_ID, + status: actor_status, + splits: actor_splits, + parallel_unit_id: parallel_unit_id as i32, + upstream_actor_ids: ActorUpstreamActors(actor_upstream_actor_ids), + dispatchers: Dispatchers(dispatchers), + vnode_bitmap, + } + }) + .collect_vec(); + + let stream_node = { + let template_actor = actors.first().cloned().unwrap(); + + let template_upstream_actor_ids = template_actor + .upstream_actor_ids + .into_inner() + .into_keys() + .map(|k| (k, vec![])) + .collect(); + + generate_merger_stream_node(&template_upstream_actor_ids) + }; + + let fragment = fragment::Model { + fragment_id: TEST_FRAGMENT_ID, + table_id: TEST_TABLE_ID, + fragment_type_mask: 0, + distribution_type: DistributionType::Hash, + stream_node: StreamNode(stream_node), + vnode_mapping: Some(FragmentVnodeMapping(parallel_unit_mapping.to_protobuf())), + state_table_ids: U32Array(vec![TEST_STATE_TABLE_ID]), + upstream_fragment_id: U32Array::default(), + }; + + let (pb_fragment, pb_actor_status, pb_actor_splits) = + CatalogController::compose_fragment(fragment.clone(), actors.clone()).unwrap(); + + assert_eq!(pb_actor_status.len(), actor_count as usize); + assert_eq!(pb_actor_splits.len(), actor_count as usize); + + for (actor_id, actor_status) in &pb_actor_status { + let parallel_unit_id = parallel_units[*actor_id as usize].id; + assert_eq!( + parallel_unit_id, + actor_status.parallel_unit.clone().unwrap().id + ); + } + + let pb_actors = pb_fragment.actors.clone(); + + check_fragment_template(fragment.clone(), pb_actors.clone(), &upstream_actor_ids); + check_fragment(fragment, pb_fragment); + check_actors(actors, pb_actors, pb_actor_status, pb_actor_splits); + + Ok(()) + } + + fn check_actors( + actors: Vec, + pb_actors: Vec, + pb_actor_status: HashMap, + pb_actor_splits: HashMap, + ) { + for ( + actor::Model { + actor_id, + fragment_id, + status, + splits, + parallel_unit_id, + upstream_actor_ids, + dispatchers, + vnode_bitmap, + }, + StreamActor { + actor_id: pb_actor_id, + fragment_id: pb_fragment_id, + nodes: pb_nodes, + dispatcher: pb_dispatcher, + upstream_actor_id: pb_upstream_actor_id, + vnode_bitmap: pb_vnode_bitmap, + mview_definition, + }, + ) in actors.into_iter().zip_eq_debug(pb_actors.into_iter()) + { + assert_eq!(actor_id, pb_actor_id); + assert_eq!(fragment_id, pb_fragment_id); + assert_eq!(parallel_unit_id, pb_actor_id as i32); + let upstream_actor_ids = upstream_actor_ids.into_inner(); + + assert_eq!( + upstream_actor_ids.values().flatten().cloned().collect_vec(), + pb_upstream_actor_id + ); + assert_eq!(dispatchers, Dispatchers(pb_dispatcher)); + assert_eq!( + vnode_bitmap, + pb_vnode_bitmap + .as_ref() + .map(|bitmap| VnodeBitmap(bitmap.clone())) + ); + + assert_eq!(mview_definition, ""); + + let mut pb_nodes = pb_nodes.unwrap(); + + visit_stream_node(&mut pb_nodes, |body| { + if let PbNodeBody::Merge(m) = body { + let upstream_actor_ids = upstream_actor_ids + .get(&m.upstream_fragment_id) + .cloned() + .unwrap(); + assert_eq!(upstream_actor_ids, m.upstream_actor_id); + } + }); + + assert_eq!( + status, + pb_actor_status + .get(&pb_actor_id) + .cloned() + .map(ActorStatus) + .unwrap() + ); + + assert_eq!( + splits, + pb_actor_splits + .get(&pb_actor_id) + .cloned() + .map(ConnectorSplits) + ); + } + } + + fn check_fragment(fragment: fragment::Model, pb_fragment: PbFragment) { + let PbFragment { + fragment_id, + fragment_type_mask, + distribution_type: pb_distribution_type, + actors: _, + vnode_mapping: pb_vnode_mapping, + state_table_ids: pb_state_table_ids, + upstream_fragment_ids: pb_upstream_fragment_ids, + } = pb_fragment; + + assert_eq!(fragment_id, TEST_FRAGMENT_ID); + assert_eq!(fragment_type_mask, fragment.fragment_type_mask); + assert_eq!( + pb_distribution_type, + PbFragmentDistributionType::from(fragment.distribution_type) as i32 + ); + assert_eq!( + pb_vnode_mapping.map(FragmentVnodeMapping), + fragment.vnode_mapping + ); + + assert_eq!( + U32Array(pb_upstream_fragment_ids), + fragment.upstream_fragment_id + ); + + assert_eq!(U32Array(pb_state_table_ids), fragment.state_table_ids); + } +} diff --git a/src/meta/src/controller/mod.rs b/src/meta/src/controller/mod.rs index c69c615165d11..7fe9de46e5742 100644 --- a/src/meta/src/controller/mod.rs +++ b/src/meta/src/controller/mod.rs @@ -31,6 +31,7 @@ use crate::MetaError; #[allow(dead_code)] pub mod catalog; pub mod cluster; +pub mod fragment; pub mod rename; pub mod system_param; pub mod utils;